tesseract v5.3.3.20231005
intmatcher.h
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: intmatcher.h
3 ** Purpose: Interface to high level generic classifier routines.
4 ** Author: Robert Moss
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17#ifndef INTMATCHER_H
18#define INTMATCHER_H
19
20#include "intproto.h"
21#include "params.h"
22
23namespace tesseract {
24
25// Character fragments could be present in the trained templaes
26// but turned on/off on the language-by-language basis or depending
27// on particular properties of the corpus (e.g. when we expect the
28// images to have low exposure).
29extern BOOL_VAR_H(disable_character_fragments);
30
31extern INT_VAR_H(classify_integer_matcher_multiplier);
32
33struct UnicharRating;
34
36 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
37
38 float Rating;
40};
41
46#define SE_TABLE_BITS 9
47#define SE_TABLE_SIZE 512
48
53
54 void Clear(const INT_CLASS_STRUCT *class_template);
55 void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);
56 void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);
57 void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);
58};
59
61public:
62 // Integer Matcher Theta Fudge (0-255).
63 static const int kIntThetaFudge = 128;
64 // Bits in Similarity to Evidence Lookup (8-9).
65 static const int kEvidenceTableBits = 9;
66 // Integer Evidence Truncation Bits (8-14).
67 static const int kIntEvidenceTruncBits = 14;
68 // Similarity to Evidence Table Exponential Multiplier.
69 static const float kSEExponentialMultiplier;
70 // Center of Similarity Curve.
71 static const float kSimilarityCenter;
72
73 IntegerMatcher(tesseract::IntParam *classify_debug_level);
74
75 void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
76 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
77 tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,
78 bool SeparateDebugWindows);
79
80 // Applies the CN normalization factor to the given rating and returns
81 // the modified rating.
82 float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,
83 int matcher_multiplier);
84
85 int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
86 int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,
87 int AdaptProtoThreshold, int Debug);
88
89 int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
90 int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,
91 int AdaptFeatureThreshold, int Debug);
92
93private:
94 int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
95 int FeatureNum, const INT_FEATURE_STRUCT *Feature,
96 ScratchEvidence *evidence, int Debug);
97
98 int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,
100
101#ifndef GRAPHICS_DISABLED
102 void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
103 const ScratchEvidence &tables, int16_t NumFeatures, int Debug);
104
105 void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,
106 const ScratchEvidence &tables, bool SeparateDebugWindows);
107
108 void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
109 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
110 int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);
111#endif
112
113private:
114 tesseract::IntParam *classify_debug_level_;
115 uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
116 uint32_t evidence_table_mask_;
117 uint32_t mult_trunc_shift_bits_;
118 uint32_t table_trunc_shift_bits_;
119 uint32_t evidence_mult_mask_;
120};
121
122} // namespace tesseract
123
124#endif
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_PROTO_INDEX
Definition: intproto.h:44
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define SE_TABLE_SIZE
Definition: intmatcher.h:47
BOOL_VAR_H(wordrec_display_splits)
int16_t PROTO_ID
Definition: matchdefs.h:40
INT_VAR_H(editor_image_xpos)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:137
uint8_t FEATURE_ID
Definition: matchdefs.h:46
void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures)
void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask)
void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template)
Definition: intmatcher.cpp:702
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:50
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:52
void Clear(const INT_CLASS_STRUCT *class_template)
Definition: intmatcher.cpp:697
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:51
static const int kIntThetaFudge
Definition: intmatcher.h:63
void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:482
static const int kEvidenceTableBits
Definition: intmatcher.h:65
int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:619
static const float kSEExponentialMultiplier
Definition: intmatcher.h:69
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:555
static const float kSimilarityCenter
Definition: intmatcher.h:71
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:67
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:668