tesseract v5.3.3.20231005
tesseract::IntegerMatcher Class Reference

#include <intmatcher.h>

Public Member Functions

 IntegerMatcher (tesseract::IntParam *classify_debug_level)
 
void Match (INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
 
float ApplyCNCorrection (float rating, int blob_length, int normalization_factor, int matcher_multiplier)
 
int FindGoodProtos (INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
 
int FindBadFeatures (INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
 

Static Public Attributes

static const int kIntThetaFudge = 128
 
static const int kEvidenceTableBits = 9
 
static const int kIntEvidenceTruncBits = 14
 
static const float kSEExponentialMultiplier = 0.0f
 
static const float kSimilarityCenter = 0.0075f
 

Detailed Description

Definition at line 60 of file intmatcher.h.

Constructor & Destructor Documentation

◆ IntegerMatcher()

tesseract::IntegerMatcher::IntegerMatcher ( tesseract::IntParam classify_debug_level)

Definition at line 668 of file intmatcher.cpp.

669 : classify_debug_level_(classify_debug_level) {
670 /* Initialize table for evidence to similarity lookup */
671 for (int i = 0; i < SE_TABLE_SIZE; i++) {
672 uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS);
673 double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
674 double evidence = Similarity / kSimilarityCenter;
675 evidence = 255.0 / (evidence * evidence + 1.0);
676
677 if (kSEExponentialMultiplier > 0.0) {
678 double scale =
679 1.0 - std::exp(-kSEExponentialMultiplier) *
680 exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
681 evidence *= ClipToRange(scale, 0.0, 1.0);
682 }
683
684 similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
685 }
686
687 /* Initialize evidence computation variables */
688 evidence_table_mask_ = ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
689 mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
690 table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
691 evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
692}
#define SE_TABLE_SIZE
Definition: intmatcher.h:47
#define SE_TABLE_BITS
Definition: intmatcher.h:46
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:105
static const int kEvidenceTableBits
Definition: intmatcher.h:65
static const float kSEExponentialMultiplier
Definition: intmatcher.h:69
static const float kSimilarityCenter
Definition: intmatcher.h:71
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:67

Member Function Documentation

◆ ApplyCNCorrection()

float tesseract::IntegerMatcher::ApplyCNCorrection ( float  rating,
int  blob_length,
int  normalization_factor,
int  matcher_multiplier 
)

Applies the CN normalization factor to the given rating and returns the modified rating.

Definition at line 1156 of file intmatcher.cpp.

1157 {
1158 int divisor = blob_length + matcher_multiplier;
1159 return divisor == 0
1160 ? 1.0f
1161 : (rating * blob_length + matcher_multiplier * normalization_factor / 256.0f) /
1162 divisor;
1163}

◆ FindBadFeatures()

int tesseract::IntegerMatcher::FindBadFeatures ( INT_CLASS_STRUCT ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_ID FeatureArray,
int  AdaptFeatureThreshold,
int  Debug 
)

FindBadFeatures finds all features with maximum feature-evidence < AdaptFeatureThresh. The list is ordered by increasing feature number.

Parameters
ClassTemplatePrototypes & tables for a class
ProtoMaskAND Mask for proto word
ConfigMaskAND Mask for config word
NumFeaturesNumber of features in blob
FeaturesArray of features
FeatureArrayArray of bad features
AdaptFeatureThresholdThreshold for bad features
DebugDebugger flag: 1=debugger on
Returns
Number of bad features in FeatureArray.

Definition at line 619 of file intmatcher.cpp.

622 {
623 auto *tables = new ScratchEvidence();
624 int NumBadFeatures = 0;
625
626 /* DEBUG opening heading */
627 if (MatchDebuggingOn(Debug)) {
628 tprintf("Find Bad Features -------------------------------------------\n");
629 }
630
631 tables->Clear(ClassTemplate);
632
633 for (int Feature = 0; Feature < NumFeatures; Feature++) {
634 UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
635 tables, Debug);
636
637 /* Find Best Evidence for Current Feature */
638 int best = 0;
639 assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
640 for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) {
641 if (tables->feature_evidence_[i] > best) {
642 best = tables->feature_evidence_[i];
643 }
644 }
645
646 /* Find Bad Features */
647 if (best < AdaptFeatureThreshold) {
648 *FeatureArray = Feature;
649 FeatureArray++;
650 NumBadFeatures++;
651 }
652 }
653
654#ifndef GRAPHICS_DISABLED
655 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
656 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, NumFeatures, Debug);
657 }
658#endif
659
660 if (MatchDebuggingOn(Debug)) {
661 tprintf("Match Complete --------------------------------------------\n");
662 }
663
664 delete tables;
665 return NumBadFeatures;
666}
#define MatchDebuggingOn(D)
Definition: intproto.h:172
#define PrintMatchSummaryOn(D)
Definition: intproto.h:173
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define PrintProtoMatchesOn(D)
Definition: intproto.h:177
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ FindGoodProtos()

int tesseract::IntegerMatcher::FindGoodProtos ( INT_CLASS_STRUCT ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
INT_FEATURE_ARRAY  Features,
PROTO_ID ProtoArray,
int  AdaptProtoThreshold,
int  Debug 
)

FindGoodProtos finds all protos whose normalized proto-evidence exceed AdaptProtoThreshold. The list is ordered by increasing proto id number.

Globals:

  • local_matcher_multiplier_ Normalization factor multiplier param ClassTemplate Prototypes & tables for a class param ProtoMask AND Mask for proto word param ConfigMask AND Mask for config word param NumFeatures Number of features in blob param Features Array of features param ProtoArray Array of good protos param AdaptProtoThreshold Threshold for good protos param Debug Debugger flag: 1=debugger on
    Returns
    Number of good protos in ProtoArray.

Definition at line 555 of file intmatcher.cpp.

558 {
559 auto *tables = new ScratchEvidence();
560 int NumGoodProtos = 0;
561
562 /* DEBUG opening heading */
563 if (MatchDebuggingOn(Debug)) {
564 tprintf("Find Good Protos -------------------------------------------\n");
565 }
566
567 tables->Clear(ClassTemplate);
568
569 for (int Feature = 0; Feature < NumFeatures; Feature++) {
570 UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
571 tables, Debug);
572 }
573
574#ifndef GRAPHICS_DISABLED
575 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
576 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, NumFeatures, Debug);
577 }
578#endif
579
580 /* Average Proto Evidences & Find Good Protos */
581 for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
582 /* Compute Average for Actual Proto */
583 int Temp = 0;
584 for (uint8_t i = 0; i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++) {
585 Temp += tables->proto_evidence_[proto][i];
586 }
587
588 Temp /= ClassTemplate->ProtoLengths[proto];
589
590 /* Find Good Protos */
591 if (Temp >= AdaptProtoThreshold) {
592 *ProtoArray = proto;
593 ProtoArray++;
594 NumGoodProtos++;
595 }
596 }
597
598 if (MatchDebuggingOn(Debug)) {
599 tprintf("Match Complete --------------------------------------------\n");
600 }
601 delete tables;
602
603 return NumGoodProtos;
604}
#define MAX_PROTO_INDEX
Definition: intproto.h:44

◆ Match()

void tesseract::IntegerMatcher::Match ( INT_CLASS_STRUCT ClassTemplate,
BIT_VECTOR  ProtoMask,
BIT_VECTOR  ConfigMask,
int16_t  NumFeatures,
const INT_FEATURE_STRUCT Features,
tesseract::UnicharRating Result,
int  AdaptFeatureThreshold,
int  Debug,
bool  SeparateDebugWindows 
)

IntegerMatcher returns the best configuration and rating for a single class. The class matched against is determined by the uniqueness of the ClassTemplate parameter. The best rating and its associated configuration are returned.

Globals:

  • local_matcher_multiplier_ Normalization factor multiplier param ClassTemplate Prototypes & tables for a class param NumFeatures Number of features in blob param Features Array of features param NormalizationFactor Fudge factor from blob normalization process param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good param Debug Debugger flag: 1=debugger on

Definition at line 482 of file intmatcher.cpp.

485 {
486 auto *tables = new ScratchEvidence();
487 int Feature;
488
489 if (MatchDebuggingOn(Debug)) {
490 tprintf("Integer Matcher -------------------------------------------\n");
491 }
492
493 tables->Clear(ClassTemplate);
494 Result->feature_misses = 0;
495
496 for (Feature = 0; Feature < NumFeatures; Feature++) {
497 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask, Feature,
498 &Features[Feature], tables, Debug);
499 // Count features that were missed over all configs.
500 if (csum == 0) {
501 ++Result->feature_misses;
502 }
503 }
504
505#ifndef GRAPHICS_DISABLED
506 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
507 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables, NumFeatures, Debug);
508 }
509
510 if (DisplayProtoMatchesOn(Debug)) {
511 DisplayProtoDebugInfo(ClassTemplate, ConfigMask, *tables, SeparateDebugWindows);
512 }
513
514 if (DisplayFeatureMatchesOn(Debug)) {
515 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures, Features,
516 AdaptFeatureThreshold, Debug, SeparateDebugWindows);
517 }
518#endif
519
520 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
521 tables->NormalizeSums(ClassTemplate, NumFeatures);
522
523 FindBestMatch(ClassTemplate, *tables, Result);
524
525#ifndef GRAPHICS_DISABLED
526 if (PrintMatchSummaryOn(Debug)) {
527 Result->Print();
528 }
529
530 if (MatchDebuggingOn(Debug)) {
531 tprintf("Match Complete --------------------------------------------\n");
532 }
533#endif
534
535 delete tables;
536}
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:174
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:175

Member Data Documentation

◆ kEvidenceTableBits

const int tesseract::IntegerMatcher::kEvidenceTableBits = 9
static

Definition at line 65 of file intmatcher.h.

◆ kIntEvidenceTruncBits

const int tesseract::IntegerMatcher::kIntEvidenceTruncBits = 14
static

Definition at line 67 of file intmatcher.h.

◆ kIntThetaFudge

const int tesseract::IntegerMatcher::kIntThetaFudge = 128
static

Definition at line 63 of file intmatcher.h.

◆ kSEExponentialMultiplier

const float tesseract::IntegerMatcher::kSEExponentialMultiplier = 0.0f
static

Definition at line 69 of file intmatcher.h.

◆ kSimilarityCenter

const float tesseract::IntegerMatcher::kSimilarityCenter = 0.0075f
static

Definition at line 71 of file intmatcher.h.


The documentation for this class was generated from the following files: