tesseract  4.00.00dev
intmatcher.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.c
3  ** Purpose: Generic high level classification routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created.
6  ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
7  ** support for adaptive matching.
8  ** (c) Copyright Hewlett-Packard Company, 1988.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  ******************************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 /*----------------------------------------------------------------------------
26  Include Files and Type Defines
27 ----------------------------------------------------------------------------*/
28 #include "intmatcher.h"
29 
30 #include "fontinfo.h"
31 #include "intproto.h"
32 #include "callcpp.h"
33 #include "scrollview.h"
34 #include "float2int.h"
35 #include "globals.h"
36 #include "helpers.h"
37 #include "classify.h"
38 #include "shapetable.h"
39 #include <math.h>
40 
43 
44 /*----------------------------------------------------------------------------
45  Global Data Definitions and Declarations
46 ----------------------------------------------------------------------------*/
47 // Parameters of the sigmoid used to convert similarity to evidence in the
48 // similarity_evidence_table_ that is used to convert distance metric to an
49 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
51 const float IntegerMatcher::kSimilarityCenter = 0.0075;
52 
53 #define offset_table_entries \
54  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
55  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
56  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \
57  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \
58  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
59  0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
60  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \
61  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \
62  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
63  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
64  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
65 
66 #define INTMATCHER_OFFSET_TABLE_SIZE 256
67 
68 #define next_table_entries \
69  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \
70  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \
71  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \
72  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \
73  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \
74  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \
75  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \
76  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \
77  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \
78  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \
79  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \
80  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \
81  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \
82  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \
83  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \
84  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \
85  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \
86  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \
87  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \
88  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \
89  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
90 
91 // See http://b/19318793 (#6) for a complete discussion. Merging arrays
92 // offset_table and next_table helps improve performance of PIE code.
93 static const uinT8 data_table[512] = {offset_table_entries, next_table_entries};
94 
95 static const uinT8* const offset_table = &data_table[0];
96 static const uinT8* const next_table =
97  &data_table[INTMATCHER_OFFSET_TABLE_SIZE];
98 
99 namespace tesseract {
100 
101 // Encapsulation of the intermediate data and computations made by the class
102 // pruner. The class pruner implements a simple linear classifier on binary
103 // features by heavily quantizing the feature space, and applying
104 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
105 // weights is compensated by a non-constant bias that is dependent on the
106 // number of features present.
107 class ClassPruner {
108  public:
109  ClassPruner(int max_classes) {
110  // The unrolled loop in ComputeScores means that the array sizes need to
111  // be rounded up so that the array is big enough to accommodate the extra
112  // entries accessed by the unrolling. Each pruner word is of sized
113  // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
114  // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
115  // See ComputeScores.
116  max_classes_ = max_classes;
117  rounded_classes_ = RoundUp(
119  class_count_ = new int[rounded_classes_];
120  norm_count_ = new int[rounded_classes_];
121  sort_key_ = new int[rounded_classes_ + 1];
122  sort_index_ = new int[rounded_classes_ + 1];
123  for (int i = 0; i < rounded_classes_; i++) {
124  class_count_[i] = 0;
125  }
126  pruning_threshold_ = 0;
127  num_features_ = 0;
128  num_classes_ = 0;
129  }
130 
132  delete []class_count_;
133  delete []norm_count_;
134  delete []sort_key_;
135  delete []sort_index_;
136  }
137 
140  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
141  int num_features, const INT_FEATURE_STRUCT* features) {
142  num_features_ = num_features;
143  int num_pruners = int_templates->NumClassPruners;
144  for (int f = 0; f < num_features; ++f) {
145  const INT_FEATURE_STRUCT* feature = &features[f];
146  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
147  int x = feature->X * NUM_CP_BUCKETS >> 8;
148  int y = feature->Y * NUM_CP_BUCKETS >> 8;
149  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
150  int class_id = 0;
151  // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
152  // we need a collection of them, indexed by pruner_set.
153  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
154  // Look up quantized feature in a 3-D array, an array of weights for
155  // each class.
156  const uinT32* pruner_word_ptr =
157  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
158  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
159  uinT32 pruner_word = *pruner_word_ptr++;
160  // This inner loop is unrolled to speed up the ClassPruner.
161  // Currently gcc would not unroll it unless it is set to O3
162  // level of optimization or -funroll-loops is specified.
163  /*
164  uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
165  for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
166  class_count_[class_id++] += pruner_word & class_mask;
167  pruner_word >>= NUM_BITS_PER_CLASS;
168  }
169  */
170  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
171  pruner_word >>= NUM_BITS_PER_CLASS;
172  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
173  pruner_word >>= NUM_BITS_PER_CLASS;
174  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
175  pruner_word >>= NUM_BITS_PER_CLASS;
176  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
177  pruner_word >>= NUM_BITS_PER_CLASS;
178  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
179  pruner_word >>= NUM_BITS_PER_CLASS;
180  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
181  pruner_word >>= NUM_BITS_PER_CLASS;
182  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
183  pruner_word >>= NUM_BITS_PER_CLASS;
184  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
185  pruner_word >>= NUM_BITS_PER_CLASS;
186  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
187  pruner_word >>= NUM_BITS_PER_CLASS;
188  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
189  pruner_word >>= NUM_BITS_PER_CLASS;
190  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
191  pruner_word >>= NUM_BITS_PER_CLASS;
192  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
193  pruner_word >>= NUM_BITS_PER_CLASS;
194  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
195  pruner_word >>= NUM_BITS_PER_CLASS;
196  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
197  pruner_word >>= NUM_BITS_PER_CLASS;
198  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
199  pruner_word >>= NUM_BITS_PER_CLASS;
200  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
201  }
202  }
203  }
204  }
205 
211  void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
212  int cutoff_strength) {
213  for (int class_id = 0; class_id < max_classes_; ++class_id) {
214  if (num_features_ < expected_num_features[class_id]) {
215  int deficit = expected_num_features[class_id] - num_features_;
216  class_count_[class_id] -= class_count_[class_id] * deficit /
217  (num_features_ * cutoff_strength + deficit);
218  }
219  }
220  }
221 
224  void DisableDisabledClasses(const UNICHARSET& unicharset) {
225  for (int class_id = 0; class_id < max_classes_; ++class_id) {
226  if (!unicharset.get_enabled(class_id))
227  class_count_[class_id] = 0; // This char is disabled!
228  }
229  }
230 
232  void DisableFragments(const UNICHARSET& unicharset) {
233  for (int class_id = 0; class_id < max_classes_; ++class_id) {
234  // Do not include character fragments in the class pruner
235  // results if disable_character_fragments is true.
236  if (unicharset.get_fragment(class_id)) {
237  class_count_[class_id] = 0;
238  }
239  }
240  }
241 
246  void NormalizeForXheight(int norm_multiplier,
247  const uinT8* normalization_factors) {
248  for (int class_id = 0; class_id < max_classes_; class_id++) {
249  norm_count_[class_id] = class_count_[class_id] -
250  ((norm_multiplier * normalization_factors[class_id]) >> 8);
251  }
252  }
253 
256  for (int class_id = 0; class_id < max_classes_; class_id++) {
257  norm_count_[class_id] = class_count_[class_id];
258  }
259  }
260 
264  void PruneAndSort(int pruning_factor, int keep_this,
265  bool max_of_non_fragments, const UNICHARSET& unicharset) {
266  int max_count = 0;
267  for (int c = 0; c < max_classes_; ++c) {
268  if (norm_count_[c] > max_count &&
269  // This additional check is added in order to ensure that
270  // the classifier will return at least one non-fragmented
271  // character match.
272  // TODO(daria): verify that this helps accuracy and does not
273  // hurt performance.
274  (!max_of_non_fragments || !unicharset.get_fragment(c))) {
275  max_count = norm_count_[c];
276  }
277  }
278  // Prune Classes.
279  pruning_threshold_ = (max_count * pruning_factor) >> 8;
280  // Select Classes.
281  if (pruning_threshold_ < 1)
282  pruning_threshold_ = 1;
283  num_classes_ = 0;
284  for (int class_id = 0; class_id < max_classes_; class_id++) {
285  if (norm_count_[class_id] >= pruning_threshold_ ||
286  class_id == keep_this) {
287  ++num_classes_;
288  sort_index_[num_classes_] = class_id;
289  sort_key_[num_classes_] = norm_count_[class_id];
290  }
291  }
292 
293  // Sort Classes using Heapsort Algorithm.
294  if (num_classes_ > 1)
295  HeapSort(num_classes_, sort_key_, sort_index_);
296  }
297 
300  void DebugMatch(const Classify& classify,
301  const INT_TEMPLATES_STRUCT* int_templates,
302  const INT_FEATURE_STRUCT* features) const {
303  int num_pruners = int_templates->NumClassPruners;
304  int max_num_classes = int_templates->NumClasses;
305  for (int f = 0; f < num_features_; ++f) {
306  const INT_FEATURE_STRUCT* feature = &features[f];
307  tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
308  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
309  int x = feature->X * NUM_CP_BUCKETS >> 8;
310  int y = feature->Y * NUM_CP_BUCKETS >> 8;
311  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
312  int class_id = 0;
313  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
314  // Look up quantized feature in a 3-D array, an array of weights for
315  // each class.
316  const uinT32* pruner_word_ptr =
317  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
318  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
319  uinT32 pruner_word = *pruner_word_ptr++;
320  for (int word_class = 0; word_class < 16 &&
321  class_id < max_num_classes; ++word_class, ++class_id) {
322  if (norm_count_[class_id] >= pruning_threshold_) {
323  tprintf(" %s=%d,",
324  classify.ClassIDToDebugStr(int_templates,
325  class_id, 0).string(),
326  pruner_word & CLASS_PRUNER_CLASS_MASK);
327  }
328  pruner_word >>= NUM_BITS_PER_CLASS;
329  }
330  }
331  tprintf("\n");
332  }
333  }
334  }
335 
337  void SummarizeResult(const Classify& classify,
338  const INT_TEMPLATES_STRUCT* int_templates,
339  const uinT16* expected_num_features,
340  int norm_multiplier,
341  const uinT8* normalization_factors) const {
342  tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
343  for (int i = 0; i < num_classes_; ++i) {
344  int class_id = sort_index_[num_classes_ - i];
345  STRING class_string = classify.ClassIDToDebugStr(int_templates,
346  class_id, 0);
347  tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
348  class_string.string(),
349  class_count_[class_id],
350  expected_num_features[class_id],
351  (norm_multiplier * normalization_factors[class_id]) >> 8,
352  sort_key_[num_classes_ - i],
353  100.0 - 100.0 * sort_key_[num_classes_ - i] /
354  (CLASS_PRUNER_CLASS_MASK * num_features_));
355  }
356  }
357 
361  CP_RESULT_STRUCT empty;
362  results->init_to_size(num_classes_, empty);
363  for (int c = 0; c < num_classes_; ++c) {
364  (*results)[c].Class = sort_index_[num_classes_ - c];
365  (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
366  (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
367  }
368  return num_classes_;
369  }
370 
371  private:
373  int *class_count_;
377  int *norm_count_;
379  int *sort_key_;
381  int *sort_index_;
383  int max_classes_;
385  int rounded_classes_;
387  int pruning_threshold_;
389  int num_features_;
391  int num_classes_;
392 };
393 
394 /*----------------------------------------------------------------------------
395  Public Code
396 ----------------------------------------------------------------------------*/
413  int num_features, int keep_this,
414  const INT_FEATURE_STRUCT* features,
415  const uinT8* normalization_factors,
416  const uinT16* expected_num_features,
418  ClassPruner pruner(int_templates->NumClasses);
419  // Compute initial match scores for all classes.
420  pruner.ComputeScores(int_templates, num_features, features);
421  // Adjust match scores for number of expected features.
422  pruner.AdjustForExpectedNumFeatures(expected_num_features,
423  classify_cp_cutoff_strength);
424  // Apply disabled classes in unicharset - only works without a shape_table.
425  if (shape_table_ == NULL)
426  pruner.DisableDisabledClasses(unicharset);
427  // If fragments are disabled, remove them, also only without a shape table.
428  if (disable_character_fragments && shape_table_ == NULL)
429  pruner.DisableFragments(unicharset);
430 
431  // If we have good x-heights, apply the given normalization factors.
432  if (normalization_factors != NULL) {
433  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
434  normalization_factors);
435  } else {
436  pruner.NoNormalization();
437  }
438  // Do the actual pruning and sort the short-list.
439  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
440  shape_table_ == NULL, unicharset);
441 
442  if (classify_debug_level > 2) {
443  pruner.DebugMatch(*this, int_templates, features);
444  }
445  if (classify_debug_level > 1) {
446  pruner.SummarizeResult(*this, int_templates, expected_num_features,
447  classify_class_pruner_multiplier,
448  normalization_factors);
449  }
450  // Convert to the expected output format.
451  return pruner.SetupResults(results);
452 }
453 
454 } // namespace tesseract
455 
475 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
476  BIT_VECTOR ProtoMask,
477  BIT_VECTOR ConfigMask,
478  inT16 NumFeatures,
479  const INT_FEATURE_STRUCT* Features,
480  UnicharRating* Result,
481  int AdaptFeatureThreshold,
482  int Debug,
483  bool SeparateDebugWindows) {
484  ScratchEvidence *tables = new ScratchEvidence();
485  int Feature;
486 
487  if (MatchDebuggingOn (Debug))
488  cprintf ("Integer Matcher -------------------------------------------\n");
489 
490  tables->Clear(ClassTemplate);
491  Result->feature_misses = 0;
492 
493  for (Feature = 0; Feature < NumFeatures; Feature++) {
494  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
495  Feature, &Features[Feature],
496  tables, Debug);
497  // Count features that were missed over all configs.
498  if (csum == 0)
499  ++Result->feature_misses;
500  }
501 
502 #ifndef GRAPHICS_DISABLED
503  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
504  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
505  NumFeatures, Debug);
506  }
507 
508  if (DisplayProtoMatchesOn(Debug)) {
509  DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
510  *tables, SeparateDebugWindows);
511  }
512 
513  if (DisplayFeatureMatchesOn(Debug)) {
514  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
515  Features, AdaptFeatureThreshold, Debug,
516  SeparateDebugWindows);
517  }
518 #endif
519 
520  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
521  tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
522 
523  FindBestMatch(ClassTemplate, *tables, Result);
524 
525 #ifndef GRAPHICS_DISABLED
526  if (PrintMatchSummaryOn(Debug))
527  Result->Print();
528 
529  if (MatchDebuggingOn(Debug))
530  cprintf("Match Complete --------------------------------------------\n");
531 #endif
532 
533  delete tables;
534 }
535 
557  INT_CLASS ClassTemplate,
558  BIT_VECTOR ProtoMask,
559  BIT_VECTOR ConfigMask,
560  uinT16 BlobLength,
561  inT16 NumFeatures,
562  INT_FEATURE_ARRAY Features,
563  PROTO_ID *ProtoArray,
564  int AdaptProtoThreshold,
565  int Debug) {
566  ScratchEvidence *tables = new ScratchEvidence();
567  int NumGoodProtos = 0;
568 
569  /* DEBUG opening heading */
570  if (MatchDebuggingOn (Debug))
571  cprintf
572  ("Find Good Protos -------------------------------------------\n");
573 
574  tables->Clear(ClassTemplate);
575 
576  for (int Feature = 0; Feature < NumFeatures; Feature++)
577  UpdateTablesForFeature(
578  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
579  tables, Debug);
580 
581 #ifndef GRAPHICS_DISABLED
582  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
583  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
584  NumFeatures, Debug);
585 #endif
586 
587  /* Average Proto Evidences & Find Good Protos */
588  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
589  /* Compute Average for Actual Proto */
590  int Temp = 0;
591  for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
592  Temp += tables->proto_evidence_[proto][i];
593 
594  Temp /= ClassTemplate->ProtoLengths[proto];
595 
596  /* Find Good Protos */
597  if (Temp >= AdaptProtoThreshold) {
598  *ProtoArray = proto;
599  ProtoArray++;
600  NumGoodProtos++;
601  }
602  }
603 
604  if (MatchDebuggingOn (Debug))
605  cprintf ("Match Complete --------------------------------------------\n");
606  delete tables;
607 
608  return NumGoodProtos;
609 }
610 
627  INT_CLASS ClassTemplate,
628  BIT_VECTOR ProtoMask,
629  BIT_VECTOR ConfigMask,
630  uinT16 BlobLength,
631  inT16 NumFeatures,
632  INT_FEATURE_ARRAY Features,
633  FEATURE_ID *FeatureArray,
634  int AdaptFeatureThreshold,
635  int Debug) {
636  ScratchEvidence *tables = new ScratchEvidence();
637  int NumBadFeatures = 0;
638 
639  /* DEBUG opening heading */
640  if (MatchDebuggingOn(Debug))
641  cprintf("Find Bad Features -------------------------------------------\n");
642 
643  tables->Clear(ClassTemplate);
644 
645  for (int Feature = 0; Feature < NumFeatures; Feature++) {
646  UpdateTablesForFeature(
647  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
648  tables, Debug);
649 
650  /* Find Best Evidence for Current Feature */
651  int best = 0;
652  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
653  if (tables->feature_evidence_[i] > best)
654  best = tables->feature_evidence_[i];
655 
656  /* Find Bad Features */
657  if (best < AdaptFeatureThreshold) {
658  *FeatureArray = Feature;
659  FeatureArray++;
660  NumBadFeatures++;
661  }
662  }
663 
664 #ifndef GRAPHICS_DISABLED
665  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
666  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
667  NumFeatures, Debug);
668 #endif
669 
670  if (MatchDebuggingOn(Debug))
671  cprintf("Match Complete --------------------------------------------\n");
672 
673  delete tables;
674  return NumBadFeatures;
675 }
676 
677 
678 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
679  classify_debug_level_ = classify_debug_level;
680 
681  /* Initialize table for evidence to similarity lookup */
682  for (int i = 0; i < SE_TABLE_SIZE; i++) {
683  uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
684  double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
685  double evidence = Similarity / kSimilarityCenter;
686  evidence = 255.0 / (evidence * evidence + 1.0);
687 
688  if (kSEExponentialMultiplier > 0.0) {
689  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
690  exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
691  evidence *= ClipToRange(scale, 0.0, 1.0);
692  }
693 
694  similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
695  }
696 
697  /* Initialize evidence computation variables */
698  evidence_table_mask_ =
699  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
700  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
701  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
702  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
703 }
704 
705 /*----------------------------------------------------------------------------
706  Private Code
707 ----------------------------------------------------------------------------*/
708 void ScratchEvidence::Clear(const INT_CLASS class_template) {
709  memset(sum_feature_evidence_, 0,
710  class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
711  memset(proto_evidence_, 0,
712  class_template->NumProtos * sizeof(proto_evidence_[0]));
713 }
714 
716  memset(feature_evidence_, 0,
717  class_template->NumConfigs * sizeof(feature_evidence_[0]));
718 }
719 
726 void IMDebugConfiguration(int FeatureNum,
727  uinT16 ActualProtoNum,
728  uinT8 Evidence,
729  BIT_VECTOR ConfigMask,
730  uinT32 ConfigWord) {
731  cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
732  FeatureNum, (int) ActualProtoNum, (int) Evidence);
733  while (ConfigWord) {
734  if (ConfigWord & 1)
735  cprintf ("1");
736  else
737  cprintf ("0");
738  ConfigWord >>= 1;
739  }
740  cprintf ("\n");
741 }
742 
749 void IMDebugConfigurationSum(int FeatureNum,
750  uinT8 *FeatureEvidence,
751  inT32 ConfigCount) {
752  cprintf("F=%3d, C=", FeatureNum);
753  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
754  cprintf("%4d", FeatureEvidence[ConfigNum]);
755  }
756  cprintf("\n");
757 }
758 
770 int IntegerMatcher::UpdateTablesForFeature(
771  INT_CLASS ClassTemplate,
772  BIT_VECTOR ProtoMask,
773  BIT_VECTOR ConfigMask,
774  int FeatureNum,
775  const INT_FEATURE_STRUCT* Feature,
776  ScratchEvidence *tables,
777  int Debug) {
778  uinT32 ConfigWord;
779  uinT32 ProtoWord;
780  uinT32 ProtoNum;
781  uinT32 ActualProtoNum;
782  uinT8 proto_byte;
783  inT32 proto_word_offset;
784  inT32 proto_offset;
785  uinT8 config_byte;
786  inT32 config_offset;
787  PROTO_SET ProtoSet;
788  uinT32 *ProtoPrunerPtr;
789  INT_PROTO Proto;
790  int ProtoSetIndex;
791  uinT8 Evidence;
792  uinT32 XFeatureAddress;
793  uinT32 YFeatureAddress;
794  uinT32 ThetaFeatureAddress;
795  uinT8* UINT8Pointer;
796  int ProtoIndex;
797  uinT8 Temp;
798  int* IntPointer;
799  int ConfigNum;
800  inT32 M3;
801  inT32 A3;
802  uinT32 A4;
803 
804  tables->ClearFeatureEvidence(ClassTemplate);
805 
806  /* Precompute Feature Address offset for Proto Pruning */
807  XFeatureAddress = ((Feature->X >> 2) << 1);
808  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
809  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
810 
811  for (ProtoSetIndex = 0, ActualProtoNum = 0;
812  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
813  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
814  ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
815  for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
816  ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
817  (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
818  /* Prune Protos of current Proto Set */
819  ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
820  ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
821  ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
822  ProtoWord &= *ProtoMask;
823 
824  if (ProtoWord != 0) {
825  proto_byte = ProtoWord & 0xff;
826  ProtoWord >>= 8;
827  proto_word_offset = 0;
828  while (ProtoWord != 0 || proto_byte != 0) {
829  while (proto_byte == 0) {
830  proto_byte = ProtoWord & 0xff;
831  ProtoWord >>= 8;
832  proto_word_offset += 8;
833  }
834  proto_offset = offset_table[proto_byte] + proto_word_offset;
835  proto_byte = next_table[proto_byte];
836  Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
837  ConfigWord = Proto->Configs[0];
838  A3 = (((Proto->A * (Feature->X - 128)) << 1)
839  - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
840  M3 =
841  (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
842 
843  if (A3 < 0)
844  A3 = ~A3;
845  if (M3 < 0)
846  M3 = ~M3;
847  A3 >>= mult_trunc_shift_bits_;
848  M3 >>= mult_trunc_shift_bits_;
849  if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
850  A3 = evidence_mult_mask_;
851  if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
852  M3 = evidence_mult_mask_;
853 
854  A4 = (A3 * A3) + (M3 * M3);
855  A4 >>= table_trunc_shift_bits_;
856  if (A4 > evidence_table_mask_)
857  Evidence = 0;
858  else
859  Evidence = similarity_evidence_table_[A4];
860 
861  if (PrintFeatureMatchesOn (Debug))
862  IMDebugConfiguration (FeatureNum,
863  ActualProtoNum + proto_offset,
864  Evidence, ConfigMask, ConfigWord);
865 
866  ConfigWord &= *ConfigMask;
867 
868  UINT8Pointer = tables->feature_evidence_ - 8;
869  config_byte = 0;
870  while (ConfigWord != 0 || config_byte != 0) {
871  while (config_byte == 0) {
872  config_byte = ConfigWord & 0xff;
873  ConfigWord >>= 8;
874  UINT8Pointer += 8;
875  }
876  config_offset = offset_table[config_byte];
877  config_byte = next_table[config_byte];
878  if (Evidence > UINT8Pointer[config_offset])
879  UINT8Pointer[config_offset] = Evidence;
880  }
881 
882  UINT8Pointer =
883  &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
884  for (ProtoIndex =
885  ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
886  ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
887  if (Evidence > *UINT8Pointer) {
888  Temp = *UINT8Pointer;
889  *UINT8Pointer = Evidence;
890  Evidence = Temp;
891  }
892  else if (Evidence == 0)
893  break;
894  }
895  }
896  }
897  }
898  }
899 
900  if (PrintFeatureMatchesOn(Debug)) {
901  IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
902  ClassTemplate->NumConfigs);
903  }
904 
905  IntPointer = tables->sum_feature_evidence_;
906  UINT8Pointer = tables->feature_evidence_;
907  int SumOverConfigs = 0;
908  for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
909  int evidence = *UINT8Pointer++;
910  SumOverConfigs += evidence;
911  *IntPointer++ += evidence;
912  }
913  return SumOverConfigs;
914 }
915 
922 #ifndef GRAPHICS_DISABLED
923 void IntegerMatcher::DebugFeatureProtoError(
924  INT_CLASS ClassTemplate,
925  BIT_VECTOR ProtoMask,
926  BIT_VECTOR ConfigMask,
927  const ScratchEvidence& tables,
928  inT16 NumFeatures,
929  int Debug) {
930  FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
931  int ConfigNum;
932  uinT32 ConfigWord;
933  int ProtoSetIndex;
934  uinT16 ProtoNum;
935  uinT8 ProtoWordNum;
936  PROTO_SET ProtoSet;
937  uinT16 ActualProtoNum;
938 
939  if (PrintMatchSummaryOn(Debug)) {
940  cprintf("Configuration Mask:\n");
941  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
942  cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
943  cprintf("\n");
944 
945  cprintf("Feature Error for Configurations:\n");
946  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
947  cprintf(
948  " %5.1f",
949  100.0 * (1.0 -
950  (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
951  / NumFeatures / 256.0));
952  }
953  cprintf("\n\n\n");
954  }
955 
956  if (PrintMatchSummaryOn (Debug)) {
957  cprintf ("Proto Mask:\n");
958  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
959  ProtoSetIndex++) {
960  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
961  for (ProtoWordNum = 0; ProtoWordNum < 2;
962  ProtoWordNum++, ProtoMask++) {
963  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
964  for (ProtoNum = 0;
965  ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
966  && (ActualProtoNum < ClassTemplate->NumProtos));
967  ProtoNum++, ActualProtoNum++)
968  cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
969  cprintf ("\n");
970  }
971  }
972  cprintf ("\n");
973  }
974 
975  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
976  ProtoConfigs[i] = 0;
977 
978  if (PrintProtoMatchesOn (Debug)) {
979  cprintf ("Proto Evidence:\n");
980  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
981  ProtoSetIndex++) {
982  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
983  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
984  for (ProtoNum = 0;
985  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
986  (ActualProtoNum < ClassTemplate->NumProtos));
987  ProtoNum++, ActualProtoNum++) {
988  cprintf ("P %3d =", ActualProtoNum);
989  int temp = 0;
990  for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
991  uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
992  cprintf(" %d", data);
993  temp += data;
994  }
995 
996  cprintf(" = %6.4f%%\n",
997  temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
998 
999  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1000  ConfigNum = 0;
1001  while (ConfigWord) {
1002  cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1003  if (ConfigWord & 1)
1004  ProtoConfigs[ConfigNum] += temp;
1005  ConfigNum++;
1006  ConfigWord >>= 1;
1007  }
1008  cprintf("\n");
1009  }
1010  }
1011  }
1012 
1013  if (PrintMatchSummaryOn (Debug)) {
1014  cprintf ("Proto Error for Configurations:\n");
1015  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1016  cprintf (" %5.1f",
1017  100.0 * (1.0 -
1018  ProtoConfigs[ConfigNum] /
1019  ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1020  cprintf ("\n\n");
1021  }
1022 
1023  if (PrintProtoMatchesOn (Debug)) {
1024  cprintf ("Proto Sum for Configurations:\n");
1025  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1026  cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1027  cprintf ("\n\n");
1028 
1029  cprintf ("Proto Length for Configurations:\n");
1030  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1031  cprintf (" %4.1f",
1032  (float) ClassTemplate->ConfigLengths[ConfigNum]);
1033  cprintf ("\n\n");
1034  }
1035 
1036 }
1037 
1038 void IntegerMatcher::DisplayProtoDebugInfo(
1039  INT_CLASS ClassTemplate,
1040  BIT_VECTOR ProtoMask,
1041  BIT_VECTOR ConfigMask,
1042  const ScratchEvidence& tables,
1043  bool SeparateDebugWindows) {
1044  uinT16 ProtoNum;
1045  uinT16 ActualProtoNum;
1046  PROTO_SET ProtoSet;
1047  int ProtoSetIndex;
1048 
1050  if (SeparateDebugWindows) {
1053  }
1054 
1055 
1056  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1057  ProtoSetIndex++) {
1058  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1059  ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1060  for (ProtoNum = 0;
1061  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1062  (ActualProtoNum < ClassTemplate->NumProtos));
1063  ProtoNum++, ActualProtoNum++) {
1064  /* Compute Average for Actual Proto */
1065  int temp = 0;
1066  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1067  temp += tables.proto_evidence_[ActualProtoNum][i];
1068 
1069  temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1070 
1071  if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1072  DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1073  }
1074  }
1075  }
1076 }
1077 
1078 
1079 void IntegerMatcher::DisplayFeatureDebugInfo(
1080  INT_CLASS ClassTemplate,
1081  BIT_VECTOR ProtoMask,
1082  BIT_VECTOR ConfigMask,
1083  inT16 NumFeatures,
1084  const INT_FEATURE_STRUCT* Features,
1085  int AdaptFeatureThreshold,
1086  int Debug,
1087  bool SeparateDebugWindows) {
1088  ScratchEvidence *tables = new ScratchEvidence();
1089 
1090  tables->Clear(ClassTemplate);
1091 
1093  if (SeparateDebugWindows) {
1096  }
1097 
1098  for (int Feature = 0; Feature < NumFeatures; Feature++) {
1099  UpdateTablesForFeature(
1100  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1101  tables, 0);
1102 
1103  /* Find Best Evidence for Current Feature */
1104  int best = 0;
1105  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
1106  if (tables->feature_evidence_[i] > best)
1107  best = tables->feature_evidence_[i];
1108 
1109  /* Update display for current feature */
1110  if (ClipMatchEvidenceOn(Debug)) {
1111  if (best < AdaptFeatureThreshold)
1112  DisplayIntFeature(&Features[Feature], 0.0);
1113  else
1114  DisplayIntFeature(&Features[Feature], 1.0);
1115  } else {
1116  DisplayIntFeature(&Features[Feature], best / 255.0);
1117  }
1118  }
1119 
1120  delete tables;
1121 }
1122 #endif
1123 
1128  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
1129 
1130  int *IntPointer;
1131  uinT32 ConfigWord;
1132  int ProtoSetIndex;
1133  uinT16 ProtoNum;
1134  PROTO_SET ProtoSet;
1135  int NumProtos;
1136  uinT16 ActualProtoNum;
1137 
1138  NumProtos = ClassTemplate->NumProtos;
1139 
1140  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1141  ProtoSetIndex++) {
1142  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1143  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1144  for (ProtoNum = 0;
1145  ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1146  ProtoNum++, ActualProtoNum++) {
1147  int temp = 0;
1148  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1149  temp += proto_evidence_[ActualProtoNum] [i];
1150 
1151  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1152  ConfigWord &= *ConfigMask;
1153  IntPointer = sum_feature_evidence_;
1154  while (ConfigWord) {
1155  if (ConfigWord & 1)
1156  *IntPointer += temp;
1157  IntPointer++;
1158  ConfigWord >>= 1;
1159  }
1160  }
1161  }
1162 }
1163 
1169  INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
1170 
1171  for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
1172  sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
1173  (NumFeatures + ClassTemplate->ConfigLengths[i]);
1174  }
1175 }
1176 
1184 int IntegerMatcher::FindBestMatch(
1185  INT_CLASS class_template,
1186  const ScratchEvidence &tables,
1187  UnicharRating* result) {
1188  int best_match = 0;
1189  result->config = 0;
1190  result->fonts.truncate(0);
1191  result->fonts.reserve(class_template->NumConfigs);
1192 
1193  /* Find best match */
1194  for (int c = 0; c < class_template->NumConfigs; ++c) {
1195  int rating = tables.sum_feature_evidence_[c];
1196  if (*classify_debug_level_ > 2)
1197  tprintf("Config %d, rating=%d\n", c, rating);
1198  if (rating > best_match) {
1199  result->config = c;
1200  best_match = rating;
1201  }
1202  result->fonts.push_back(ScoredFont(c, rating));
1203  }
1204 
1205  // Compute confidence on a Probability scale.
1206  result->rating = best_match / 65536.0f;
1207 
1208  return best_match;
1209 }
1210 
1215 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1216  int normalization_factor,
1217  int matcher_multiplier) {
1218  return (rating * blob_length +
1219  matcher_multiplier * normalization_factor / 256.0) /
1220  (blob_length + matcher_multiplier);
1221 }
1222 
1234 void
1235 HeapSort (int n, register int ra[], register int rb[]) {
1236  int i, rra, rrb;
1237  int l, j, ir;
1238 
1239  l = (n >> 1) + 1;
1240  ir = n;
1241  for (;;) {
1242  if (l > 1) {
1243  rra = ra[--l];
1244  rrb = rb[l];
1245  }
1246  else {
1247  rra = ra[ir];
1248  rrb = rb[ir];
1249  ra[ir] = ra[1];
1250  rb[ir] = rb[1];
1251  if (--ir == 1) {
1252  ra[1] = rra;
1253  rb[1] = rrb;
1254  return;
1255  }
1256  }
1257  i = l;
1258  j = l << 1;
1259  while (j <= ir) {
1260  if (j < ir && ra[j] < ra[j + 1])
1261  ++j;
1262  if (rra < ra[j]) {
1263  ra[i] = ra[j];
1264  rb[i] = rb[j];
1265  j += (i = j);
1266  }
1267  else
1268  j = ir + 1;
1269  }
1270  ra[i] = rra;
1271  rb[i] = rrb;
1272  }
1273 }
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:200
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:203
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:360
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uinT16 *expected_num_features, int norm_multiplier, const uinT8 *normalization_factors) const
Definition: intmatcher.cpp:337
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1849
#define NUM_CP_BUCKETS
Definition: intproto.h:52
uint32_t uinT32
Definition: host.h:39
#define PrintMatchSummaryOn(D)
Definition: intproto.h:198
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence)
Definition: intproto.cpp:644
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
void IMDebugConfigurationSum(int FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
Definition: intmatcher.cpp:749
#define NUM_PP_BUCKETS
Definition: intproto.h:51
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:140
int16_t inT16
Definition: host.h:36
void IMDebugConfiguration(int FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
Definition: intmatcher.cpp:726
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:708
uinT16 NumProtos
Definition: intproto.h:108
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:122
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72
void AdjustForExpectedNumFeatures(const uinT16 *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:211
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
ClassPruner(int max_classes)
Definition: intmatcher.cpp:109
#define MatchDebuggingOn(D)
Definition: intproto.h:197
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
uinT8 NumProtoSets
Definition: intproto.h:109
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:877
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
#define tprintf(...)
Definition: tprintf.h:31
#define SE_TABLE_BITS
Definition: intmatcher.h:66
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:733
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
uint8_t uinT8
Definition: host.h:35
#define INTMATCHER_OFFSET_TABLE_SIZE
Definition: intmatcher.cpp:66
#define BITS_PER_WERD
Definition: intproto.h:44
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
int8_t inT8
Definition: host.h:34
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
bool disable_character_fragments
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:715
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
const char * string() const
Definition: strngs.cpp:198
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence)
Definition: intproto.cpp:623
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
uinT8 * ProtoLengths
Definition: intproto.h:112
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1817
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
Definition: strngs.h:45
int32_t inT32
Definition: host.h:38
#define PrintProtoMatchesOn(D)
Definition: intproto.h:202
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
void NormalizeForXheight(int norm_multiplier, const uinT8 *normalization_factors)
Definition: intmatcher.cpp:246
#define offset_table_entries
Definition: intmatcher.cpp:53
void HeapSort(int n, register int ra[], register int rb[])
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:300
#define next_table_entries
Definition: intmatcher.cpp:68
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:224
uinT8 FEATURE_ID
Definition: matchdefs.h:47
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:199
float FLOAT32
Definition: host.h:42
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:201
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:232
uinT8 NumConfigs
Definition: intproto.h:110
static const float kSimilarityCenter
Definition: intmatcher.h:94
inT16 PROTO_ID
Definition: matchdefs.h:41
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1838
uint16_t uinT16
Definition: host.h:37
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:264
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:556
void init_to_size(int size, T t)
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:626
int RoundUp(int n, int block_size)
Definition: helpers.h:116
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:678
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:55