All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
intmatcher.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.c
3  ** Purpose: Generic high level classification routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created.
6  ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
7  ** support for adaptive matching.
8  ** (c) Copyright Hewlett-Packard Company, 1988.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  ******************************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 /*----------------------------------------------------------------------------
26  Include Files and Type Defines
27 ----------------------------------------------------------------------------*/
28 #include "intmatcher.h"
29 
30 #include "fontinfo.h"
31 #include "intproto.h"
32 #include "callcpp.h"
33 #include "scrollview.h"
34 #include "float2int.h"
35 #include "globals.h"
36 #include "helpers.h"
37 #include "classify.h"
38 #include "shapetable.h"
39 #include <math.h>
40 
43 
44 /*----------------------------------------------------------------------------
45  Global Data Definitions and Declarations
46 ----------------------------------------------------------------------------*/
47 // Parameters of the sigmoid used to convert similarity to evidence in the
48 // similarity_evidence_table_ that is used to convert distance metric to an
49 // 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
51 const float IntegerMatcher::kSimilarityCenter = 0.0075;
52 
53 #define offset_table_entries \
54  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
55  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
56  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \
57  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \
58  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
59  0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
60  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \
61  0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \
62  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
63  0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
64  0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
65 
66 #define INTMATCHER_OFFSET_TABLE_SIZE 256
67 
68 #define next_table_entries \
69  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \
70  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \
71  0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \
72  0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \
73  0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \
74  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \
75  0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \
76  0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \
77  0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \
78  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \
79  0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \
80  0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \
81  0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \
82  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \
83  0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \
84  0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \
85  0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \
86  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \
87  0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \
88  0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \
89  0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
90 
91 // See http://b/19318793 (#6) for a complete discussion. Merging arrays
92 // offset_table and next_table helps improve performance of PIE code.
93 static const uinT8 data_table[512] = {offset_table_entries, next_table_entries};
94 
95 static const uinT8* const offset_table = &data_table[0];
96 static const uinT8* const next_table =
97  &data_table[INTMATCHER_OFFSET_TABLE_SIZE];
98 
99 namespace tesseract {
100 
101 // Encapsulation of the intermediate data and computations made by the class
102 // pruner. The class pruner implements a simple linear classifier on binary
103 // features by heavily quantizing the feature space, and applying
104 // NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
105 // weights is compensated by a non-constant bias that is dependent on the
106 // number of features present.
107 class ClassPruner {
108  public:
109  ClassPruner(int max_classes) {
110  // The unrolled loop in ComputeScores means that the array sizes need to
111  // be rounded up so that the array is big enough to accommodate the extra
112  // entries accessed by the unrolling. Each pruner word is of sized
113  // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
114  // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
115  // See ComputeScores.
116  max_classes_ = max_classes;
117  rounded_classes_ = RoundUp(
119  class_count_ = new int[rounded_classes_];
120  norm_count_ = new int[rounded_classes_];
121  sort_key_ = new int[rounded_classes_ + 1];
122  sort_index_ = new int[rounded_classes_ + 1];
123  for (int i = 0; i < rounded_classes_; i++) {
124  class_count_[i] = 0;
125  }
126  pruning_threshold_ = 0;
127  num_features_ = 0;
128  num_classes_ = 0;
129  }
130 
132  delete []class_count_;
133  delete []norm_count_;
134  delete []sort_key_;
135  delete []sort_index_;
136  }
137 
140  void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
141  int num_features, const INT_FEATURE_STRUCT* features) {
142  num_features_ = num_features;
143  int num_pruners = int_templates->NumClassPruners;
144  for (int f = 0; f < num_features; ++f) {
145  const INT_FEATURE_STRUCT* feature = &features[f];
146  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
147  int x = feature->X * NUM_CP_BUCKETS >> 8;
148  int y = feature->Y * NUM_CP_BUCKETS >> 8;
149  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
150  int class_id = 0;
151  // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
152  // we need a collection of them, indexed by pruner_set.
153  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
154  // Look up quantized feature in a 3-D array, an array of weights for
155  // each class.
156  const uinT32* pruner_word_ptr =
157  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
158  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
159  uinT32 pruner_word = *pruner_word_ptr++;
160  // This inner loop is unrolled to speed up the ClassPruner.
161  // Currently gcc would not unroll it unless it is set to O3
162  // level of optimization or -funroll-loops is specified.
163  /*
164  uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
165  for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
166  class_count_[class_id++] += pruner_word & class_mask;
167  pruner_word >>= NUM_BITS_PER_CLASS;
168  }
169  */
170  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
171  pruner_word >>= NUM_BITS_PER_CLASS;
172  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
173  pruner_word >>= NUM_BITS_PER_CLASS;
174  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
175  pruner_word >>= NUM_BITS_PER_CLASS;
176  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
177  pruner_word >>= NUM_BITS_PER_CLASS;
178  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
179  pruner_word >>= NUM_BITS_PER_CLASS;
180  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
181  pruner_word >>= NUM_BITS_PER_CLASS;
182  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
183  pruner_word >>= NUM_BITS_PER_CLASS;
184  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
185  pruner_word >>= NUM_BITS_PER_CLASS;
186  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
187  pruner_word >>= NUM_BITS_PER_CLASS;
188  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
189  pruner_word >>= NUM_BITS_PER_CLASS;
190  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
191  pruner_word >>= NUM_BITS_PER_CLASS;
192  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
193  pruner_word >>= NUM_BITS_PER_CLASS;
194  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
195  pruner_word >>= NUM_BITS_PER_CLASS;
196  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
197  pruner_word >>= NUM_BITS_PER_CLASS;
198  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
199  pruner_word >>= NUM_BITS_PER_CLASS;
200  class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
201  }
202  }
203  }
204  }
205 
211  void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
212  int cutoff_strength) {
213  for (int class_id = 0; class_id < max_classes_; ++class_id) {
214  if (num_features_ < expected_num_features[class_id]) {
215  int deficit = expected_num_features[class_id] - num_features_;
216  class_count_[class_id] -= class_count_[class_id] * deficit /
217  (num_features_ * cutoff_strength + deficit);
218  }
219  }
220  }
221 
224  void DisableDisabledClasses(const UNICHARSET& unicharset) {
225  for (int class_id = 0; class_id < max_classes_; ++class_id) {
226  if (!unicharset.get_enabled(class_id))
227  class_count_[class_id] = 0; // This char is disabled!
228  }
229  }
230 
232  void DisableFragments(const UNICHARSET& unicharset) {
233  for (int class_id = 0; class_id < max_classes_; ++class_id) {
234  // Do not include character fragments in the class pruner
235  // results if disable_character_fragments is true.
236  if (unicharset.get_fragment(class_id)) {
237  class_count_[class_id] = 0;
238  }
239  }
240  }
241 
246  void NormalizeForXheight(int norm_multiplier,
247  const uinT8* normalization_factors) {
248  for (int class_id = 0; class_id < max_classes_; class_id++) {
249  norm_count_[class_id] = class_count_[class_id] -
250  ((norm_multiplier * normalization_factors[class_id]) >> 8);
251  }
252  }
253 
256  for (int class_id = 0; class_id < max_classes_; class_id++) {
257  norm_count_[class_id] = class_count_[class_id];
258  }
259  }
260 
264  void PruneAndSort(int pruning_factor, int keep_this,
265  bool max_of_non_fragments, const UNICHARSET& unicharset) {
266  int max_count = 0;
267  for (int c = 0; c < max_classes_; ++c) {
268  if (norm_count_[c] > max_count &&
269  // This additional check is added in order to ensure that
270  // the classifier will return at least one non-fragmented
271  // character match.
272  // TODO(daria): verify that this helps accuracy and does not
273  // hurt performance.
274  (!max_of_non_fragments || !unicharset.get_fragment(c))) {
275  max_count = norm_count_[c];
276  }
277  }
278  // Prune Classes.
279  pruning_threshold_ = (max_count * pruning_factor) >> 8;
280  // Select Classes.
281  if (pruning_threshold_ < 1)
282  pruning_threshold_ = 1;
283  num_classes_ = 0;
284  for (int class_id = 0; class_id < max_classes_; class_id++) {
285  if (norm_count_[class_id] >= pruning_threshold_ ||
286  class_id == keep_this) {
287  ++num_classes_;
288  sort_index_[num_classes_] = class_id;
289  sort_key_[num_classes_] = norm_count_[class_id];
290  }
291  }
292 
293  // Sort Classes using Heapsort Algorithm.
294  if (num_classes_ > 1)
295  HeapSort(num_classes_, sort_key_, sort_index_);
296  }
297 
299  void DebugMatch(const Classify& classify,
300  const INT_TEMPLATES_STRUCT* int_templates,
301  const INT_FEATURE_STRUCT* features) const {
302  int num_pruners = int_templates->NumClassPruners;
303  int max_num_classes = int_templates->NumClasses;
304  for (int f = 0; f < num_features_; ++f) {
305  const INT_FEATURE_STRUCT* feature = &features[f];
306  tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
307  // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
308  int x = feature->X * NUM_CP_BUCKETS >> 8;
309  int y = feature->Y * NUM_CP_BUCKETS >> 8;
310  int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
311  int class_id = 0;
312  for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
313  // Look up quantized feature in a 3-D array, an array of weights for
314  // each class.
315  const uinT32* pruner_word_ptr =
316  int_templates->ClassPruners[pruner_set]->p[x][y][theta];
317  for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
318  uinT32 pruner_word = *pruner_word_ptr++;
319  for (int word_class = 0; word_class < 16 &&
320  class_id < max_num_classes; ++word_class, ++class_id) {
321  if (norm_count_[class_id] >= pruning_threshold_) {
322  tprintf(" %s=%d,",
323  classify.ClassIDToDebugStr(int_templates,
324  class_id, 0).string(),
325  pruner_word & CLASS_PRUNER_CLASS_MASK);
326  }
327  pruner_word >>= NUM_BITS_PER_CLASS;
328  }
329  }
330  tprintf("\n");
331  }
332  }
333  }
334 
336  void SummarizeResult(const Classify& classify,
337  const INT_TEMPLATES_STRUCT* int_templates,
338  const uinT16* expected_num_features,
339  int norm_multiplier,
340  const uinT8* normalization_factors) const {
341  tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
342  for (int i = 0; i < num_classes_; ++i) {
343  int class_id = sort_index_[num_classes_ - i];
344  STRING class_string = classify.ClassIDToDebugStr(int_templates,
345  class_id, 0);
346  tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
347  class_string.string(),
348  class_count_[class_id],
349  expected_num_features[class_id],
350  (norm_multiplier * normalization_factors[class_id]) >> 8,
351  sort_key_[num_classes_ - i],
352  100.0 - 100.0 * sort_key_[num_classes_ - i] /
353  (CLASS_PRUNER_CLASS_MASK * num_features_));
354  }
355  }
356 
360  CP_RESULT_STRUCT empty;
361  results->init_to_size(num_classes_, empty);
362  for (int c = 0; c < num_classes_; ++c) {
363  (*results)[c].Class = sort_index_[num_classes_ - c];
364  (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
365  (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
366  }
367  return num_classes_;
368  }
369 
370  private:
372  int *class_count_;
375  int *norm_count_;
377  int *sort_key_;
379  int *sort_index_;
381  int max_classes_;
383  int rounded_classes_;
385  int pruning_threshold_;
387  int num_features_;
389  int num_classes_;
390 };
391 
392 /*----------------------------------------------------------------------------
393  Public Code
394 ----------------------------------------------------------------------------*/
410  int num_features, int keep_this,
411  const INT_FEATURE_STRUCT* features,
412  const uinT8* normalization_factors,
413  const uinT16* expected_num_features,
415  ClassPruner pruner(int_templates->NumClasses);
416  // Compute initial match scores for all classes.
417  pruner.ComputeScores(int_templates, num_features, features);
418  // Adjust match scores for number of expected features.
419  pruner.AdjustForExpectedNumFeatures(expected_num_features,
421  // Apply disabled classes in unicharset - only works without a shape_table.
422  if (shape_table_ == NULL)
424  // If fragments are disabled, remove them, also only without a shape table.
427 
428  // If we have good x-heights, apply the given normalization factors.
429  if (normalization_factors != NULL) {
431  normalization_factors);
432  } else {
433  pruner.NoNormalization();
434  }
435  // Do the actual pruning and sort the short-list.
438 
439  if (classify_debug_level > 2) {
440  pruner.DebugMatch(*this, int_templates, features);
441  }
442  if (classify_debug_level > 1) {
443  pruner.SummarizeResult(*this, int_templates, expected_num_features,
445  normalization_factors);
446  }
447  // Convert to the expected output format.
448  return pruner.SetupResults(results);
449 }
450 
451 } // namespace tesseract
452 
472 void IntegerMatcher::Match(INT_CLASS ClassTemplate,
473  BIT_VECTOR ProtoMask,
474  BIT_VECTOR ConfigMask,
475  inT16 NumFeatures,
476  const INT_FEATURE_STRUCT* Features,
477  UnicharRating* Result,
478  int AdaptFeatureThreshold,
479  int Debug,
480  bool SeparateDebugWindows) {
481  ScratchEvidence *tables = new ScratchEvidence();
482  int Feature;
483  int BestMatch;
484 
485  if (MatchDebuggingOn (Debug))
486  cprintf ("Integer Matcher -------------------------------------------\n");
487 
488  tables->Clear(ClassTemplate);
489  Result->feature_misses = 0;
490 
491  for (Feature = 0; Feature < NumFeatures; Feature++) {
492  int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
493  Feature, &Features[Feature],
494  tables, Debug);
495  // Count features that were missed over all configs.
496  if (csum == 0)
497  ++Result->feature_misses;
498  }
499 
500 #ifndef GRAPHICS_DISABLED
501  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
502  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
503  NumFeatures, Debug);
504  }
505 
506  if (DisplayProtoMatchesOn(Debug)) {
507  DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
508  *tables, SeparateDebugWindows);
509  }
510 
511  if (DisplayFeatureMatchesOn(Debug)) {
512  DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
513  Features, AdaptFeatureThreshold, Debug,
514  SeparateDebugWindows);
515  }
516 #endif
517 
518  tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
519  tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
520 
521  BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
522 
523 #ifndef GRAPHICS_DISABLED
524  if (PrintMatchSummaryOn(Debug))
525  Result->Print();
526 
527  if (MatchDebuggingOn(Debug))
528  cprintf("Match Complete --------------------------------------------\n");
529 #endif
530 
531  delete tables;
532 }
533 
555  INT_CLASS ClassTemplate,
556  BIT_VECTOR ProtoMask,
557  BIT_VECTOR ConfigMask,
558  uinT16 BlobLength,
559  inT16 NumFeatures,
560  INT_FEATURE_ARRAY Features,
561  PROTO_ID *ProtoArray,
562  int AdaptProtoThreshold,
563  int Debug) {
564  ScratchEvidence *tables = new ScratchEvidence();
565  int NumGoodProtos = 0;
566 
567  /* DEBUG opening heading */
568  if (MatchDebuggingOn (Debug))
569  cprintf
570  ("Find Good Protos -------------------------------------------\n");
571 
572  tables->Clear(ClassTemplate);
573 
574  for (int Feature = 0; Feature < NumFeatures; Feature++)
575  UpdateTablesForFeature(
576  ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
577  tables, Debug);
578 
579 #ifndef GRAPHICS_DISABLED
580  if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
581  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
582  NumFeatures, Debug);
583 #endif
584 
585  /* Average Proto Evidences & Find Good Protos */
586  for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
587  /* Compute Average for Actual Proto */
588  int Temp = 0;
589  for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
590  Temp += tables->proto_evidence_[proto][i];
591 
592  Temp /= ClassTemplate->ProtoLengths[proto];
593 
594  /* Find Good Protos */
595  if (Temp >= AdaptProtoThreshold) {
596  *ProtoArray = proto;
597  ProtoArray++;
598  NumGoodProtos++;
599  }
600  }
601 
602  if (MatchDebuggingOn (Debug))
603  cprintf ("Match Complete --------------------------------------------\n");
604  delete tables;
605 
606  return NumGoodProtos;
607 }
608 
609 
626  INT_CLASS ClassTemplate,
627  BIT_VECTOR ProtoMask,
628  BIT_VECTOR ConfigMask,
629  uinT16 BlobLength,
630  inT16 NumFeatures,
631  INT_FEATURE_ARRAY Features,
632  FEATURE_ID *FeatureArray,
633  int AdaptFeatureThreshold,
634  int Debug) {
635  ScratchEvidence *tables = new ScratchEvidence();
636  int NumBadFeatures = 0;
637 
638  /* DEBUG opening heading */
639  if (MatchDebuggingOn(Debug))
640  cprintf("Find Bad Features -------------------------------------------\n");
641 
642  tables->Clear(ClassTemplate);
643 
644  for (int Feature = 0; Feature < NumFeatures; Feature++) {
645  UpdateTablesForFeature(
646  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
647  tables, Debug);
648 
649  /* Find Best Evidence for Current Feature */
650  int best = 0;
651  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
652  if (tables->feature_evidence_[i] > best)
653  best = tables->feature_evidence_[i];
654 
655  /* Find Bad Features */
656  if (best < AdaptFeatureThreshold) {
657  *FeatureArray = Feature;
658  FeatureArray++;
659  NumBadFeatures++;
660  }
661  }
662 
663 #ifndef GRAPHICS_DISABLED
664  if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
665  DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
666  NumFeatures, Debug);
667 #endif
668 
669  if (MatchDebuggingOn(Debug))
670  cprintf("Match Complete --------------------------------------------\n");
671 
672  delete tables;
673  return NumBadFeatures;
674 }
675 
676 
677 void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
678  classify_debug_level_ = classify_debug_level;
679 
680  /* Initialize table for evidence to similarity lookup */
681  for (int i = 0; i < SE_TABLE_SIZE; i++) {
682  uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
683  double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
684  double evidence = Similarity / kSimilarityCenter;
685  evidence = 255.0 / (evidence * evidence + 1.0);
686 
687  if (kSEExponentialMultiplier > 0.0) {
688  double scale = 1.0 - exp(-kSEExponentialMultiplier) *
689  exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
690  evidence *= ClipToRange(scale, 0.0, 1.0);
691  }
692 
693  similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
694  }
695 
696  /* Initialize evidence computation variables */
697  evidence_table_mask_ =
698  ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
699  mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
700  table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
701  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
702 }
703 
704 
705 /*----------------------------------------------------------------------------
706  Private Code
707 ----------------------------------------------------------------------------*/
708 void ScratchEvidence::Clear(const INT_CLASS class_template) {
709  memset(sum_feature_evidence_, 0,
710  class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
711  memset(proto_evidence_, 0,
712  class_template->NumProtos * sizeof(proto_evidence_[0]));
713 }
714 
716  memset(feature_evidence_, 0,
717  class_template->NumConfigs * sizeof(feature_evidence_[0]));
718 }
719 
720 
721 
728 void IMDebugConfiguration(int FeatureNum,
729  uinT16 ActualProtoNum,
730  uinT8 Evidence,
731  BIT_VECTOR ConfigMask,
732  uinT32 ConfigWord) {
733  cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
734  FeatureNum, (int) ActualProtoNum, (int) Evidence);
735  while (ConfigWord) {
736  if (ConfigWord & 1)
737  cprintf ("1");
738  else
739  cprintf ("0");
740  ConfigWord >>= 1;
741  }
742  cprintf ("\n");
743 }
744 
745 
752 void IMDebugConfigurationSum(int FeatureNum,
753  uinT8 *FeatureEvidence,
754  inT32 ConfigCount) {
755  cprintf("F=%3d, C=", FeatureNum);
756  for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
757  cprintf("%4d", FeatureEvidence[ConfigNum]);
758  }
759  cprintf("\n");
760 }
761 
773 int IntegerMatcher::UpdateTablesForFeature(
774  INT_CLASS ClassTemplate,
775  BIT_VECTOR ProtoMask,
776  BIT_VECTOR ConfigMask,
777  int FeatureNum,
778  const INT_FEATURE_STRUCT* Feature,
779  ScratchEvidence *tables,
780  int Debug) {
781  register uinT32 ConfigWord;
782  register uinT32 ProtoWord;
783  register uinT32 ProtoNum;
784  register uinT32 ActualProtoNum;
785  uinT8 proto_byte;
786  inT32 proto_word_offset;
787  inT32 proto_offset;
788  uinT8 config_byte;
789  inT32 config_offset;
790  PROTO_SET ProtoSet;
791  uinT32 *ProtoPrunerPtr;
792  INT_PROTO Proto;
793  int ProtoSetIndex;
794  uinT8 Evidence;
795  uinT32 XFeatureAddress;
796  uinT32 YFeatureAddress;
797  uinT32 ThetaFeatureAddress;
798  register uinT8 *UINT8Pointer;
799  register int ProtoIndex;
800  uinT8 Temp;
801  register int *IntPointer;
802  int ConfigNum;
803  register inT32 M3;
804  register inT32 A3;
805  register uinT32 A4;
806 
807  tables->ClearFeatureEvidence(ClassTemplate);
808 
809  /* Precompute Feature Address offset for Proto Pruning */
810  XFeatureAddress = ((Feature->X >> 2) << 1);
811  YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
812  ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
813 
814  for (ProtoSetIndex = 0, ActualProtoNum = 0;
815  ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
816  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
817  ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
818  for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
819  ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
820  (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
821  /* Prune Protos of current Proto Set */
822  ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
823  ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
824  ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
825  ProtoWord &= *ProtoMask;
826 
827  if (ProtoWord != 0) {
828  proto_byte = ProtoWord & 0xff;
829  ProtoWord >>= 8;
830  proto_word_offset = 0;
831  while (ProtoWord != 0 || proto_byte != 0) {
832  while (proto_byte == 0) {
833  proto_byte = ProtoWord & 0xff;
834  ProtoWord >>= 8;
835  proto_word_offset += 8;
836  }
837  proto_offset = offset_table[proto_byte] + proto_word_offset;
838  proto_byte = next_table[proto_byte];
839  Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
840  ConfigWord = Proto->Configs[0];
841  A3 = (((Proto->A * (Feature->X - 128)) << 1)
842  - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
843  M3 =
844  (((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
845 
846  if (A3 < 0)
847  A3 = ~A3;
848  if (M3 < 0)
849  M3 = ~M3;
850  A3 >>= mult_trunc_shift_bits_;
851  M3 >>= mult_trunc_shift_bits_;
852  if (A3 > evidence_mult_mask_)
853  A3 = evidence_mult_mask_;
854  if (M3 > evidence_mult_mask_)
855  M3 = evidence_mult_mask_;
856 
857  A4 = (A3 * A3) + (M3 * M3);
858  A4 >>= table_trunc_shift_bits_;
859  if (A4 > evidence_table_mask_)
860  Evidence = 0;
861  else
862  Evidence = similarity_evidence_table_[A4];
863 
864  if (PrintFeatureMatchesOn (Debug))
865  IMDebugConfiguration (FeatureNum,
866  ActualProtoNum + proto_offset,
867  Evidence, ConfigMask, ConfigWord);
868 
869  ConfigWord &= *ConfigMask;
870 
871  UINT8Pointer = tables->feature_evidence_ - 8;
872  config_byte = 0;
873  while (ConfigWord != 0 || config_byte != 0) {
874  while (config_byte == 0) {
875  config_byte = ConfigWord & 0xff;
876  ConfigWord >>= 8;
877  UINT8Pointer += 8;
878  }
879  config_offset = offset_table[config_byte];
880  config_byte = next_table[config_byte];
881  if (Evidence > UINT8Pointer[config_offset])
882  UINT8Pointer[config_offset] = Evidence;
883  }
884 
885  UINT8Pointer =
886  &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
887  for (ProtoIndex =
888  ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
889  ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
890  if (Evidence > *UINT8Pointer) {
891  Temp = *UINT8Pointer;
892  *UINT8Pointer = Evidence;
893  Evidence = Temp;
894  }
895  else if (Evidence == 0)
896  break;
897  }
898  }
899  }
900  }
901  }
902 
903  if (PrintFeatureMatchesOn(Debug)) {
904  IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
905  ClassTemplate->NumConfigs);
906  }
907 
908  IntPointer = tables->sum_feature_evidence_;
909  UINT8Pointer = tables->feature_evidence_;
910  int SumOverConfigs = 0;
911  for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
912  int evidence = *UINT8Pointer++;
913  SumOverConfigs += evidence;
914  *IntPointer++ += evidence;
915  }
916  return SumOverConfigs;
917 }
918 
919 
926 #ifndef GRAPHICS_DISABLED
927 void IntegerMatcher::DebugFeatureProtoError(
928  INT_CLASS ClassTemplate,
929  BIT_VECTOR ProtoMask,
930  BIT_VECTOR ConfigMask,
931  const ScratchEvidence& tables,
932  inT16 NumFeatures,
933  int Debug) {
934  FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
935  int ConfigNum;
936  uinT32 ConfigWord;
937  int ProtoSetIndex;
938  uinT16 ProtoNum;
939  uinT8 ProtoWordNum;
940  PROTO_SET ProtoSet;
941  uinT16 ActualProtoNum;
942 
943  if (PrintMatchSummaryOn(Debug)) {
944  cprintf("Configuration Mask:\n");
945  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
946  cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
947  cprintf("\n");
948 
949  cprintf("Feature Error for Configurations:\n");
950  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
951  cprintf(
952  " %5.1f",
953  100.0 * (1.0 -
954  (FLOAT32) tables.sum_feature_evidence_[ConfigNum]
955  / NumFeatures / 256.0));
956  }
957  cprintf("\n\n\n");
958  }
959 
960  if (PrintMatchSummaryOn (Debug)) {
961  cprintf ("Proto Mask:\n");
962  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
963  ProtoSetIndex++) {
964  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
965  for (ProtoWordNum = 0; ProtoWordNum < 2;
966  ProtoWordNum++, ProtoMask++) {
967  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
968  for (ProtoNum = 0;
969  ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
970  && (ActualProtoNum < ClassTemplate->NumProtos));
971  ProtoNum++, ActualProtoNum++)
972  cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
973  cprintf ("\n");
974  }
975  }
976  cprintf ("\n");
977  }
978 
979  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
980  ProtoConfigs[i] = 0;
981 
982  if (PrintProtoMatchesOn (Debug)) {
983  cprintf ("Proto Evidence:\n");
984  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
985  ProtoSetIndex++) {
986  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
987  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
988  for (ProtoNum = 0;
989  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
990  (ActualProtoNum < ClassTemplate->NumProtos));
991  ProtoNum++, ActualProtoNum++) {
992  cprintf ("P %3d =", ActualProtoNum);
993  int temp = 0;
994  for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
995  uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
996  cprintf(" %d", data);
997  temp += data;
998  }
999 
1000  cprintf(" = %6.4f%%\n",
1001  temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
1002 
1003  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1004  ConfigNum = 0;
1005  while (ConfigWord) {
1006  cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1007  if (ConfigWord & 1)
1008  ProtoConfigs[ConfigNum] += temp;
1009  ConfigNum++;
1010  ConfigWord >>= 1;
1011  }
1012  cprintf("\n");
1013  }
1014  }
1015  }
1016 
1017  if (PrintMatchSummaryOn (Debug)) {
1018  cprintf ("Proto Error for Configurations:\n");
1019  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1020  cprintf (" %5.1f",
1021  100.0 * (1.0 -
1022  ProtoConfigs[ConfigNum] /
1023  ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1024  cprintf ("\n\n");
1025  }
1026 
1027  if (PrintProtoMatchesOn (Debug)) {
1028  cprintf ("Proto Sum for Configurations:\n");
1029  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1030  cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1031  cprintf ("\n\n");
1032 
1033  cprintf ("Proto Length for Configurations:\n");
1034  for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1035  cprintf (" %4.1f",
1036  (float) ClassTemplate->ConfigLengths[ConfigNum]);
1037  cprintf ("\n\n");
1038  }
1039 
1040 }
1041 
1042 void IntegerMatcher::DisplayProtoDebugInfo(
1043  INT_CLASS ClassTemplate,
1044  BIT_VECTOR ProtoMask,
1045  BIT_VECTOR ConfigMask,
1046  const ScratchEvidence& tables,
1047  bool SeparateDebugWindows) {
1048  uinT16 ProtoNum;
1049  uinT16 ActualProtoNum;
1050  PROTO_SET ProtoSet;
1051  int ProtoSetIndex;
1052 
1054  if (SeparateDebugWindows) {
1057  }
1058 
1059 
1060  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1061  ProtoSetIndex++) {
1062  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1063  ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1064  for (ProtoNum = 0;
1065  ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1066  (ActualProtoNum < ClassTemplate->NumProtos));
1067  ProtoNum++, ActualProtoNum++) {
1068  /* Compute Average for Actual Proto */
1069  int temp = 0;
1070  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1071  temp += tables.proto_evidence_[ActualProtoNum][i];
1072 
1073  temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1074 
1075  if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1076  DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1077  }
1078  }
1079  }
1080 }
1081 
1082 
1083 void IntegerMatcher::DisplayFeatureDebugInfo(
1084  INT_CLASS ClassTemplate,
1085  BIT_VECTOR ProtoMask,
1086  BIT_VECTOR ConfigMask,
1087  inT16 NumFeatures,
1088  const INT_FEATURE_STRUCT* Features,
1089  int AdaptFeatureThreshold,
1090  int Debug,
1091  bool SeparateDebugWindows) {
1092  ScratchEvidence *tables = new ScratchEvidence();
1093 
1094  tables->Clear(ClassTemplate);
1095 
1097  if (SeparateDebugWindows) {
1100  }
1101 
1102  for (int Feature = 0; Feature < NumFeatures; Feature++) {
1103  UpdateTablesForFeature(
1104  ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1105  tables, 0);
1106 
1107  /* Find Best Evidence for Current Feature */
1108  int best = 0;
1109  for (int i = 0; i < ClassTemplate->NumConfigs; i++)
1110  if (tables->feature_evidence_[i] > best)
1111  best = tables->feature_evidence_[i];
1112 
1113  /* Update display for current feature */
1114  if (ClipMatchEvidenceOn(Debug)) {
1115  if (best < AdaptFeatureThreshold)
1116  DisplayIntFeature(&Features[Feature], 0.0);
1117  else
1118  DisplayIntFeature(&Features[Feature], 1.0);
1119  } else {
1120  DisplayIntFeature(&Features[Feature], best / 255.0);
1121  }
1122  }
1123 
1124  delete tables;
1125 }
1126 #endif
1127 
1132  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
1133 
1134  int *IntPointer;
1135  uinT32 ConfigWord;
1136  int ProtoSetIndex;
1137  uinT16 ProtoNum;
1138  PROTO_SET ProtoSet;
1139  int NumProtos;
1140  uinT16 ActualProtoNum;
1141 
1142  NumProtos = ClassTemplate->NumProtos;
1143 
1144  for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1145  ProtoSetIndex++) {
1146  ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1147  ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1148  for (ProtoNum = 0;
1149  ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1150  ProtoNum++, ActualProtoNum++) {
1151  int temp = 0;
1152  for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1153  temp += proto_evidence_[ActualProtoNum] [i];
1154 
1155  ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1156  ConfigWord &= *ConfigMask;
1157  IntPointer = sum_feature_evidence_;
1158  while (ConfigWord) {
1159  if (ConfigWord & 1)
1160  *IntPointer += temp;
1161  IntPointer++;
1162  ConfigWord >>= 1;
1163  }
1164  }
1165  }
1166 }
1167 
1168 
1169 
1175  INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
1176 
1177  for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
1179  (NumFeatures + ClassTemplate->ConfigLengths[i]);
1180  }
1181 }
1182 
1183 
1191 int IntegerMatcher::FindBestMatch(
1192  INT_CLASS class_template,
1193  const ScratchEvidence &tables,
1194  UnicharRating* result) {
1195  int best_match = 0;
1196  result->config = 0;
1197  result->fonts.truncate(0);
1198  result->fonts.reserve(class_template->NumConfigs);
1199 
1200  /* Find best match */
1201  for (int c = 0; c < class_template->NumConfigs; ++c) {
1202  int rating = tables.sum_feature_evidence_[c];
1203  if (*classify_debug_level_ > 2)
1204  tprintf("Config %d, rating=%d\n", c, rating);
1205  if (rating > best_match) {
1206  result->config = c;
1207  best_match = rating;
1208  }
1209  result->fonts.push_back(ScoredFont(c, rating));
1210  }
1211 
1212  // Compute confidence on a Probability scale.
1213  result->rating = best_match / 65536.0f;
1214 
1215  return best_match;
1216 }
1217 
1222 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1223  int normalization_factor,
1224  int matcher_multiplier) {
1225  return (rating * blob_length +
1226  matcher_multiplier * normalization_factor / 256.0) /
1227  (blob_length + matcher_multiplier);
1228 }
1229 
1241 void
1242 HeapSort (int n, register int ra[], register int rb[]) {
1243  register int i, rra, rrb;
1244  int l, j, ir;
1245 
1246  l = (n >> 1) + 1;
1247  ir = n;
1248  for (;;) {
1249  if (l > 1) {
1250  rra = ra[--l];
1251  rrb = rb[l];
1252  }
1253  else {
1254  rra = ra[ir];
1255  rrb = rb[ir];
1256  ra[ir] = ra[1];
1257  rb[ir] = rb[1];
1258  if (--ir == 1) {
1259  ra[1] = rra;
1260  rb[1] = rrb;
1261  return;
1262  }
1263  }
1264  i = l;
1265  j = l << 1;
1266  while (j <= ir) {
1267  if (j < ir && ra[j] < ra[j + 1])
1268  ++j;
1269  if (rra < ra[j]) {
1270  ra[i] = ra[j];
1271  rb[i] = rb[j];
1272  j += (i = j);
1273  }
1274  else
1275  j = ir + 1;
1276  }
1277  ra[i] = rra;
1278  rb[i] = rrb;
1279  }
1280 }
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uinT16 *expected_num_features, int norm_multiplier, const uinT8 *normalization_factors) const
Definition: intmatcher.cpp:336
#define NUM_CP_BUCKETS
Definition: intproto.h:52
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence)
Definition: intproto.cpp:630
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
float FLOAT32
Definition: host.h:111
inT16 PROTO_ID
Definition: matchdefs.h:41
void HeapSort(int n, register int ra[], register int rb[])
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:625
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:299
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
#define tprintf(...)
Definition: tprintf.h:31
#define INTMATCHER_OFFSET_TABLE_SIZE
Definition: intmatcher.cpp:66
UNICHARSET unicharset
Definition: ccutil.h:72
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1895
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1927
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:201
#define PrintMatchSummaryOn(D)
Definition: intproto.h:198
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence)
Definition: intproto.cpp:652
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
void IMDebugConfiguration(int FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
Definition: intmatcher.cpp:728
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
void NormalizeForXheight(int norm_multiplier, const uinT8 *normalization_factors)
Definition: intmatcher.cpp:246
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
void IMDebugConfigurationSum(int FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
Definition: intmatcher.cpp:752
#define BITS_PER_WERD
Definition: intproto.h:44
ShapeTable * shape_table_
Definition: classify.h:512
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1916
ClassPruner(int max_classes)
Definition: intmatcher.cpp:109
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:715
unsigned int uinT32
Definition: host.h:103
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
#define PrintProtoMatchesOn(D)
Definition: intproto.h:202
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
int classify_class_pruner_multiplier
Definition: classify.h:465
void init_to_size(int size, T t)
int classify_class_pruner_threshold
Definition: classify.h:463
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
#define next_table_entries
Definition: intmatcher.cpp:68
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:677
static const int kIntThetaFudge
Definition: intmatcher.h:86
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:224
uinT8 FEATURE_ID
Definition: matchdefs.h:47
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:708
#define NUM_PP_BUCKETS
Definition: intproto.h:51
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:140
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:203
static const int kEvidenceTableBits
Definition: intmatcher.h:88
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:409
#define MatchDebuggingOn(D)
Definition: intproto.h:197
void AdjustForExpectedNumFeatures(const uinT16 *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:211
uinT8 NumProtoSets
Definition: intproto.h:109
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:199
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
int RoundUp(int n, int block_size)
Definition: helpers.h:109
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:359
static const float kSimilarityCenter
Definition: intmatcher.h:94
uinT8 NumConfigs
Definition: intproto.h:110
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:554
void Print() const
Definition: shapetable.h:49
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
bool disable_character_fragments
Definition: classify.h:450
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:200
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:264
Definition: strngs.h:44
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:232
#define NULL
Definition: host.h:144
SIGNED char inT8
Definition: host.h:98
uinT8 * ProtoLengths
Definition: intproto.h:112
const char * string() const
Definition: strngs.cpp:193
int classify_cp_cutoff_strength
Definition: classify.h:467
#define SE_TABLE_BITS
Definition: intmatcher.h:66
#define offset_table_entries
Definition: intmatcher.cpp:53
uinT16 NumProtos
Definition: intproto.h:108
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:90
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
unsigned short uinT16
Definition: host.h:101
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:55
unsigned char uinT8
Definition: host.h:99