All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (FILE *File)
 
FLOAT32 ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (FILE *File, inT64 end_offset)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (bool load_pre_trained_templates)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (FLOAT32 Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uinT8 *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (FILE *File)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()
 
 ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()
 
 ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

bool allow_blob_division = true
 
bool prioritize_division = FALSE
 
int tessedit_single_match = FALSE
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_min_norm_scale_x = 0.0
 
double classify_max_norm_scale_x = 0.325
 
double classify_min_norm_scale_y = 0.0
 
double classify_max_norm_scale_y = 0.325
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = TRUE
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = FALSE
 
bool matcher_debug_separate_windows = FALSE
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
int il1_adaption_test = 0
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
TessdataManager tessdata_manager
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
char * m_data_sub_dir = "tessdata/"
 
int ambigs_debug_level = 0
 
bool use_definite_ambigs_for_classifier = 0
 
bool use_ambigs_for_adaption = 0
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 61 of file classify.h.

Constructor & Destructor Documentation

tesseract::Classify::Classify ( )

Definition at line 35 of file classify.cpp.

36  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
37  this->params()),
39  "Prioritize blob division over chopping", this->params()),
40  INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP",
41  this->params()),
42  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
43  this->params()),
44  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
45  this->params()),
46  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
47  this->params()),
49  "Character Normalization Range ...", this->params()),
50  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
51  this->params()), /* PREV DEFAULT 0.1 */
53  "Max char x-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
56  this->params()), /* PREV DEFAULT 0.1 */
58  "Max char y-norm scale ...",
59  this->params()), /* PREV DEFAULT 0.3 */
61  "Veto ratio between classifier ratings", this->params()),
63  "Veto difference between classifier certainties",
64  this->params()),
65  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
66  this->params()),
67  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
68  this->params()),
70  "Enable adaptive classifier", this->params()),
72  "Use pre-adapted classifier templates", this->params()),
74  "Save adapted templates to a file", this->params()),
75  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
76  this->params()),
78  "Non-linear stroke-density normalization", this->params()),
79  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
80  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
81  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
82  this->params()),
83  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
84  this->params()),
85  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
86  this->params()),
87  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
88  this->params()),
89  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
90  this->params()),
91  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
92  this->params()),
93  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
94  this->params()),
95  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
96  this->params()),
98  "Reliable Config Threshold", this->params()),
100  "Enable adaption even if the ambiguities have not been seen",
101  this->params()),
103  "Maximum angle delta for prototype clustering",
104  this->params()),
106  "Penalty to apply when a non-alnum is vertically out of "
107  "its expected textline position",
108  this->params()),
109  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
110  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
111  this->params()),
113  "Scale factor for features not used", this->params()),
116  "Prune poor adapted results this much worse than best result",
117  this->params()),
119  "Threshold at which classify_adapted_pruning_factor starts",
120  this->params()),
122  "Threshold for good protos during adaptive 0-255",
123  this->params()),
125  "Threshold for good features during adaptive 0-255",
126  this->params()),
128  "Do not include character fragments in the"
129  " results of the classifier",
130  this->params()),
132  -3.0,
133  "Exclude fragments that do not look like whole"
134  " characters from training and adaption",
135  this->params()),
137  "Bring up graphical debugging windows for fragments training",
138  this->params()),
140  "Use two different windows for debugging the matching: "
141  "One for the protos and one for the features.",
142  this->params()),
143  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
144  this->params()),
146  "Class Pruner Threshold 0-255", this->params()),
148  "Class Pruner Multiplier 0-255: ", this->params()),
150  "Class Pruner CutoffStrength: ", this->params()),
152  "Integer Matcher Multiplier 0-255: ", this->params()),
153  EnableLearning(true),
154  INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
155  this->params()),
157  "Assume the input is numbers [0-9].", this->params()),
158  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
159  this->params()),
161  "Penalty to add to worst rating for noise", this->params()),
163  dict_(this),
164  static_classifier_(NULL) {
165  fontinfo_table_.set_compare_callback(
167  fontinfo_table_.set_clear_callback(
169  fontset_table_.set_compare_callback(
171  fontset_table_.set_clear_callback(
176  AllProtosOn = NULL;
177  AllConfigsOn = NULL;
180  NormProtos = NULL;
181 
182  NumAdaptationsFailed = 0;
183 
184  learn_debug_win_ = NULL;
185  learn_fragmented_word_debug_win_ = NULL;
186  learn_fragments_debug_win_ = NULL;
187 
188  CharNormCutoffs = new uinT16[MAX_NUM_CLASSES];
189  BaselineCutoffs = new uinT16[MAX_NUM_CLASSES];
190 }
bool matcher_debug_separate_windows
Definition: classify.h:458
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:307
int classify_integer_matcher_multiplier
Definition: classify.h:469
bool classify_bln_numeric_mode
Definition: classify.h:500
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
double classify_min_norm_scale_y
Definition: classify.h:399
bool classify_enable_adaptive_matcher
Definition: classify.h:409
double matcher_reliable_adaptive_result
Definition: classify.h:421
double tessedit_class_miss_scale
Definition: classify.h:439
double matcher_good_threshold
Definition: classify.h:420
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool prioritize_division
Definition: classify.h:387
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
bool classify_save_adapted_templates
Definition: classify.h:413
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
double classify_adapted_pruning_factor
Definition: classify.h:441
int matcher_min_examples_for_prototyping
Definition: classify.h:428
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:120
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:128
double speckle_rating_penalty
Definition: classify.h:503
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
double matcher_perfect_threshold
Definition: classify.h:422
double matcher_rating_margin
Definition: classify.h:424
bool classify_nonlinear_norm
Definition: classify.h:416
double speckle_large_max_size
Definition: classify.h:501
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
double classify_max_norm_scale_x
Definition: classify.h:398
int classify_adapt_proto_threshold
Definition: classify.h:445
int matcher_permanent_classes_min
Definition: classify.h:426
double certainty_scale
Definition: classify.h:437
char * classify_learn_debug_str
Definition: classify.h:459
int classify_class_pruner_multiplier
Definition: classify.h:465
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:146
int classify_class_pruner_threshold
Definition: classify.h:463
bool classify_use_pre_adapted_templates
Definition: classify.h:411
double matcher_avg_noise_size
Definition: classify.h:425
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
double matcher_bad_match_pad
Definition: classify.h:423
double classify_max_norm_scale_y
Definition: classify.h:400
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:139
bool classify_debug_character_fragments
Definition: classify.h:455
ParamsVectors * params()
Definition: ccutil.h:65
double classify_max_rating_ratio
Definition: classify.h:402
double classify_char_norm_range
Definition: classify.h:396
double classify_min_norm_scale_x
Definition: classify.h:397
#define FALSE
Definition: capi.h:29
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
double classify_adapted_pruning_threshold
Definition: classify.h:443
double classify_max_certainty_margin
Definition: classify.h:404
#define TRUE
Definition: capi.h:28
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
bool disable_character_fragments
Definition: classify.h:450
bool classify_enable_learning
Definition: classify.h:389
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
bool allow_blob_division
Definition: classify.h:382
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
bool classify_enable_adaptive_debugger
Definition: classify.h:414
double classify_misfit_junk_penalty
Definition: classify.h:435
double matcher_clustering_max_angle_delta
Definition: classify.h:432
int classify_cp_cutoff_strength
Definition: classify.h:467
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
int classify_adapt_feature_threshold
Definition: classify.h:447
unsigned short uinT16
Definition: host.h:101
NORM_PROTOS * NormProtos
Definition: classify.h:486
tesseract::Classify::~Classify ( )
virtual

Definition at line 192 of file classify.cpp.

192  {
194  delete learn_debug_win_;
195  delete learn_fragmented_word_debug_win_;
196  delete learn_fragments_debug_win_;
197  delete[] CharNormCutoffs;
198  delete[] BaselineCutoffs;
199 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456

Member Function Documentation

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
TRUE or FALSE
Note
Exceptions: none
History: Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 850 of file adaptmatch.cpp.

850  {
851  if (word->best_choice == NULL) return false;
852  int BestChoiceLength = word->best_choice->length();
853  float adaptable_score =
855  return // rules that apply in general - simplest to compute first
856  BestChoiceLength > 0 &&
857  BestChoiceLength == word->rebuild_word->NumBlobs() &&
858  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
859  // This basically ensures that the word is at least a dictionary match
860  // (freq word, user word, system dawg word, etc).
861  // Since all the other adjustments will make adjust factor higher
862  // than higher than adaptable_score=1.1+0.05=1.15
863  // Since these are other flags that ensure that the word is dict word,
864  // this check could be at times redundant.
865  word->best_choice->adjust_factor() <= adaptable_score &&
866  // Make sure that alternative choices are not dictionary words.
867  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
868 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
double segment_penalty_dict_case_ok
Definition: dict.h:574
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:73
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:71
int NumBlobs() const
Definition: blobs.h:425
TWERD * rebuild_word
Definition: pageres.h:244
Dict & getDict()
Definition: classify.h:65
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:430
float adjust_factor() const
Definition: ratngs.h:303
#define NULL
Definition: host.h:144
void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Note
Exceptions: none
History: Mon Mar 11 10:00:58 1991, DSJ, Created.
Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 185 of file adaptmatch.cpp.

185  {
186  assert(Choices != NULL);
187  ADAPT_RESULTS *Results = new ADAPT_RESULTS;
188  Results->Initialize();
189 
191 
192  DoAdaptiveMatch(Blob, Results);
193 
194  RemoveBadMatches(Results);
196  RemoveExtraPuncs(Results);
197  Results->ComputeBest();
198  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
199  Choices);
200 
201  // TODO(rays) Move to before ConvertMatchesToChoices!
202  if (LargeSpeckle(*Blob) || Choices->length() == 0)
203  AddLargeSpeckleTo(Results->BlobLength, Choices);
204 
205  if (matcher_debug_level >= 1) {
206  tprintf("AD Matches = ");
207  PrintAdaptiveMatchResults(*Results);
208  }
209 
210 #ifndef GRAPHICS_DISABLED
212  DebugAdaptiveClassifier(Blob, Results);
213 #endif
214 
215  delete Results;
216 } /* AdaptiveClassifier */
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:212
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:235
#define ASSERT_HOST(x)
Definition: errcode.h:84
void ComputeBest()
Definition: adaptmatch.cpp:99
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
void RemoveBadMatches(ADAPT_RESULTS *Results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
const DENORM & denorm() const
Definition: blobs.h:340
void Initialize()
Definition: adaptmatch.cpp:93
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
bool classify_enable_adaptive_debugger
Definition: classify.h:414
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 285 of file classify.h.

285  {
286  return AdaptedTemplates->NumPermClasses == 0;
287  }
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 284 of file classify.h.

284 { return NumAdaptationsFailed > 0; }
void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 886 of file adaptmatch.cpp.

888  {
889  int NumFeatures;
890  INT_FEATURE_ARRAY IntFeatures;
891  UnicharRating int_result;
892  INT_CLASS IClass;
893  ADAPT_CLASS Class;
894  TEMP_CONFIG TempConfig;
895  FEATURE_SET FloatFeatures;
896  int NewTempConfigId;
897 
898  if (!LegalClassId (ClassId))
899  return;
900 
901  int_result.unichar_id = ClassId;
902  Class = adaptive_templates->Class[ClassId];
903  assert(Class != NULL);
904  if (IsEmptyAdaptedClass(Class)) {
905  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
906  } else {
907  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
908 
909  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
910  if (NumFeatures <= 0)
911  return;
912 
913  // Only match configs with the matching font.
914  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
915  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
916  if (GetFontinfoId(Class, cfg) == FontinfoId) {
917  SET_BIT(MatchingFontConfigs, cfg);
918  } else {
919  reset_bit(MatchingFontConfigs, cfg);
920  }
921  }
922  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
923  NumFeatures, IntFeatures,
926  FreeBitVector(MatchingFontConfigs);
927 
928  SetAdaptiveThreshold(Threshold);
929 
930  if (1.0f - int_result.rating <= Threshold) {
931  if (ConfigIsPermanent(Class, int_result.config)) {
933  tprintf("Found good match to perm config %d = %4.1f%%.\n",
934  int_result.config, int_result.rating * 100.0);
935  FreeFeatureSet(FloatFeatures);
936  return;
937  }
938 
939  TempConfig = TempConfigFor(Class, int_result.config);
940  IncreaseConfidence(TempConfig);
941  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
942  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
943  }
945  tprintf("Increasing reliability of temp config %d to %d.\n",
946  int_result.config, TempConfig->NumTimesSeen);
947 
948  if (TempConfigReliable(ClassId, TempConfig)) {
949  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
950  UpdateAmbigsGroup(ClassId, Blob);
951  }
952  } else {
954  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
955  int_result.config, int_result.rating * 100.0);
957  DisplayAdaptedChar(Blob, IClass);
958  }
959  NewTempConfigId =
960  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
961  NumFeatures, IntFeatures, FloatFeatures);
962  if (NewTempConfigId >= 0 &&
963  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
964  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
965  UpdateAmbigsGroup(ClassId, Blob);
966  }
967 
968 #ifndef GRAPHICS_DISABLED
970  DisplayAdaptedChar(Blob, IClass);
971  }
972 #endif
973  }
974  FreeFeatureSet(FloatFeatures);
975  }
976 } /* AdaptToChar */
bool matcher_debug_separate_windows
Definition: classify.h:458
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
#define reset_bit(array, bit)
Definition: bitvec.h:59
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
#define tprintf(...)
Definition: tprintf.h:31
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:90
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
#define LegalClassId(c)
Definition: intproto.h:179
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define NO_DEBUG
Definition: adaptmatch.cpp:70
#define SET_BIT(array, bit)
Definition: bitvec.h:57
uinT8 NumTimesSeen
Definition: adaptive.h:41
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:978
#define ClassForClassId(T, c)
Definition: intproto.h:181
INT_TEMPLATES Templates
Definition: adaptive.h:77
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:812
IntegerMatcher im_
Definition: classify.h:503
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:717
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:108
uinT8 NumConfigs
Definition: intproto.h:110
#define NULL
Definition: host.h:144
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:78
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 212 of file classify.cpp.

212  {
213  BLOB_CHOICE_IT bc_it(choices);
214  // If there is no classifier result, we will use the worst possible certainty
215  // and corresponding rating.
216  float certainty = -getDict().certainty_scale;
217  float rating = rating_scale * blob_length;
218  if (!choices->empty() && blob_length > 0) {
219  bc_it.move_to_last();
220  BLOB_CHOICE* worst_choice = bc_it.data();
221  // Add speckle_rating_penalty to worst rating, matching old value.
222  rating = worst_choice->rating() + speckle_rating_penalty;
223  // Compute the rating to correspond to the certainty. (Used to be kept
224  // the same, but that messes up the language model search.)
225  certainty = -rating * getDict().certainty_scale /
226  (rating_scale * blob_length);
227  }
228  BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
229  -1, 0.0f, MAX_FLOAT32, 0,
231  bc_it.add_to_end(blob_choice);
232 }
double speckle_rating_penalty
Definition: classify.h:503
float rating() const
Definition: ratngs.h:79
Dict & getDict()
Definition: classify.h:65
#define MAX_FLOAT32
Definition: host.h:124
double certainty_scale
Definition: dict.h:601
void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
new_resultnew result to add
[out]resultsresults to add new result to
Note
Exceptions: none
History: Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1029 of file adaptmatch.cpp.

1030  {
1031  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
1032 
1033  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
1034  (old_match < results->match.size() &&
1035  new_result.rating <= results->match[old_match].rating))
1036  return; // New one not good enough.
1037 
1038  if (!unicharset.get_fragment(new_result.unichar_id))
1039  results->HasNonfragment = true;
1040 
1041  if (old_match < results->match.size()) {
1042  results->match[old_match].rating = new_result.rating;
1043  } else {
1044  results->match.push_back(new_result);
1045  }
1046 
1047  if (new_result.rating > results->best_rating &&
1048  // Ensure that fragments do not affect best rating, class and config.
1049  // This is needed so that at least one non-fragmented character is
1050  // always present in the results.
1051  // TODO(daria): verify that this helps accuracy and does not
1052  // hurt performance.
1053  !unicharset.get_fragment(new_result.unichar_id)) {
1054  results->best_match_index = old_match;
1055  results->best_rating = new_result.rating;
1056  results->best_unichar_id = new_result.unichar_id;
1057  }
1058 } /* AddNewResult */
int push_back(T object)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
int best_match_index
Definition: adaptmatch.cpp:86
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
bool HasNonfragment
Definition: adaptmatch.cpp:84
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info
Note
Exceptions: none
History: Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1083 of file adaptmatch.cpp.

1090  {
1091  if (int_features.empty()) return;
1092  uinT8* CharNormArray = new uinT8[unicharset.size()];
1093  UnicharRating int_result;
1094 
1095  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
1096  CharNormArray);
1097  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1098  if (debug)
1099  tprintf("AM Matches = ");
1100 
1101  int top = blob->bounding_box().top();
1102  int bottom = blob->bounding_box().bottom();
1103  while (*ambiguities >= 0) {
1104  CLASS_ID class_id = *ambiguities;
1105 
1106  int_result.unichar_id = class_id;
1107  im_.Match(ClassForClassId(templates, class_id),
1109  int_features.size(), &int_features[0],
1110  &int_result,
1113 
1114  ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
1115  results->BlobLength,
1117  CharNormArray, &int_result, results);
1118  ambiguities++;
1119  }
1120  delete [] CharNormArray;
1121 } /* AmbigClassifier */
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
int classify_integer_matcher_multiplier
Definition: classify.h:469
inT32 BlobLength
Definition: adaptmatch.cpp:83
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
inT16 bottom() const
Definition: rect.h:61
#define ClassForClassId(T, c)
Definition: intproto.h:181
bool empty() const
Definition: genericvector.h:84
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
int size() const
Definition: unicharset.h:297
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
inT16 top() const
Definition: rect.h:54
int classify_adapt_feature_threshold
Definition: classify.h:447
unsigned char uinT8
Definition: host.h:99
UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.
Note
Exceptions: none
History: Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1305 of file adaptmatch.cpp.

1308  {
1309  if (int_features.empty()) return NULL;
1310  uinT8* CharNormArray = new uinT8[unicharset.size()];
1311  ClearCharNormArray(CharNormArray);
1312 
1314  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1315  CharNormArray, BaselineCutoffs, &Results->CPResults);
1316 
1317  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1318  tprintf("BL Matches = ");
1319 
1320  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1321  CharNormArray,
1322  Templates->Class, matcher_debug_flags, 0,
1323  Blob->bounding_box(), Results->CPResults, Results);
1324 
1325  delete [] CharNormArray;
1326  CLASS_ID ClassId = Results->best_unichar_id;
1327  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1328  return NULL;
1329 
1330  return Templates->Class[ClassId]->
1331  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1332 } /* BaselineClassifier */
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
int size() const
Definition: genericvector.h:72
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
int best_match_index
Definition: adaptmatch.cpp:86
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES Templates
Definition: adaptive.h:77
bool empty() const
Definition: genericvector.h:84
CLUSTERCONFIG Config
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:409
int IntCastRounded(double x)
Definition: helpers.h:172
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
int size() const
Definition: unicharset.h:297
const double kStandardFeatureLength
Definition: intfx.h:46
unsigned char uinT8
Definition: host.h:99
int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Note
Exceptions: none
History: Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1354 of file adaptmatch.cpp.

1356  {
1357  // This is the length that is used for scaling ratings vs certainty.
1358  adapt_results->BlobLength =
1359  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1360  GenericVector<UnicharRating> unichar_results;
1361  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1362  -1, &unichar_results);
1363  // Convert results to the format used internally by AdaptiveClassifier.
1364  for (int r = 0; r < unichar_results.size(); ++r) {
1365  AddNewResult(unichar_results[r], adapt_results);
1366  }
1367  return sample.num_features();
1368 } /* CharNormClassifier */
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
Pix * pix() const
Definition: normalis.h:248
const DENORM & denorm() const
Definition: blobs.h:340
Definition: cluster.h:32
int IntCastRounded(double x)
Definition: helpers.h:172
const double kStandardFeatureLength
Definition: intfx.h:46
int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

Definition at line 1372 of file adaptmatch.cpp.

1375  {
1376  results->clear();
1377  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1378  adapt_results->Initialize();
1379  // Compute the bounding box of the features.
1380  int num_features = sample.num_features();
1381  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1382  // fabricate right and left using top and bottom.
1383  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1384  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1385  // Compute the char_norm_array from the saved cn_feature.
1386  FEATURE norm_feature = sample.GetCNFeature();
1387  uinT8* char_norm_array = new uinT8[unicharset.size()];
1388  int num_pruner_classes = MAX(unicharset.size(),
1390  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1391  adapt_results->BlobLength =
1392  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1393  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1394  pruner_norm_array);
1395 
1396  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1397  pruner_norm_array,
1398  shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1399  &adapt_results->CPResults);
1400  delete [] pruner_norm_array;
1401  if (keep_this >= 0) {
1402  adapt_results->CPResults[0].Class = keep_this;
1403  adapt_results->CPResults.truncate(1);
1404  }
1405  if (pruner_only) {
1406  // Convert pruner results to output format.
1407  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1408  int class_id = adapt_results->CPResults[i].Class;
1409  results->push_back(
1410  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1411  }
1412  } else {
1413  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1414  char_norm_array,
1417  blob_box, adapt_results->CPResults, adapt_results);
1418  // Convert master matcher results to output format.
1419  for (int i = 0; i < adapt_results->match.size(); i++) {
1420  results->push_back(adapt_results->match[i]);
1421  }
1423  }
1424  delete [] char_norm_array;
1425  delete adapt_results;
1426  return num_features;
1427 } /* CharNormTrainingSample */
int size() const
Definition: genericvector.h:72
void truncate(int size)
int classify_integer_matcher_multiplier
Definition: classify.h:469
#define MAX(x, y)
Definition: ndminx.h:24
int push_back(T object)
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
ShapeTable * shape_table_
Definition: classify.h:512
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:409
void Initialize()
Definition: adaptmatch.cpp:93
Definition: cluster.h:32
Definition: rect.h:30
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
unsigned char uinT8
Definition: host.h:99
int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2283 of file adaptmatch.cpp.

2284  {
2285  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2286  // Older inttemps have no font_ids.
2287  if (font_set_id < 0)
2288  return kBlankFontinfoId;
2289  const FontSet &fs = fontset_table_.get(font_set_id);
2290  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2291  return fs.configs[int_result_config];
2292 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define ASSERT_HOST(x)
Definition: errcode.h:84
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2270 of file adaptmatch.cpp.

2271  {
2272  STRING class_string;
2273  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2274  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2275  class_string = shape_table_->DebugStr(shape_id);
2276  } else {
2277  class_string = unicharset.debug_str(class_id);
2278  }
2279  return class_string;
2280 }
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
ShapeTable * shape_table_
Definition: classify.h:512
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob
Note
Exceptions: none
History: Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1445 of file adaptmatch.cpp.

1445  {
1446  float rating = results->BlobLength / matcher_avg_noise_size;
1447  rating *= rating;
1448  rating /= 1.0 + rating;
1449 
1450  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1451 } /* ClassifyAsNoise */
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
double matcher_avg_noise_size
Definition: classify.h:425
void tesseract::Classify::ClearCharNormArray ( uinT8 char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 48 of file float2int.cpp.

48  {
49  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
50 } /* ClearCharNormArray */
UNICHARSET unicharset
Definition: ccutil.h:72
int size() const
Definition: unicharset.h:297
void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uinT8 char_norm_array,
uinT8 pruner_array 
)

Definition at line 1747 of file adaptmatch.cpp.

1750  {
1751  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1752  if (pruner_array != NULL) {
1753  if (shape_table_ == NULL) {
1754  ComputeIntCharNormArray(*norm_feature, pruner_array);
1755  } else {
1756  memset(pruner_array, MAX_UINT8,
1757  templates->NumClasses * sizeof(pruner_array[0]));
1758  // Each entry in the pruner norm array is the MIN of all the entries of
1759  // the corresponding unichars in the CharNormArray.
1760  for (int id = 0; id < templates->NumClasses; ++id) {
1761  int font_set_id = templates->Class[id]->font_set_id;
1762  const FontSet &fs = fontset_table_.get(font_set_id);
1763  for (int config = 0; config < fs.size; ++config) {
1764  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1765  for (int c = 0; c < shape.size(); ++c) {
1766  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1767  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1768  }
1769  }
1770  }
1771  }
1772  }
1773  FreeFeature(norm_feature);
1774 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
ShapeTable * shape_table_
Definition: classify.h:512
#define MAX_UINT8
Definition: host.h:121
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:60
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
#define NULL
Definition: host.h:144
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors 
)

Definition at line 1240 of file adaptmatch.cpp.

1245  {
1246  // Compute class feature corrections.
1247  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1248  cn_factors[unichar_id],
1249  matcher_multiplier);
1250  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1251  double vertical_penalty = 0.0;
1252  // Penalize non-alnums for being vertical misfits.
1253  if (!unicharset.get_isalpha(unichar_id) &&
1254  !unicharset.get_isdigit(unichar_id) &&
1255  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1256  int min_bottom, max_bottom, min_top, max_top;
1257  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1258  &min_top, &max_top);
1259  if (debug) {
1260  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1261  top, min_top, max_top, bottom, min_bottom, max_bottom);
1262  }
1263  if (top < min_top || top > max_top ||
1264  bottom < min_bottom || bottom > max_bottom) {
1265  vertical_penalty = classify_misfit_junk_penalty;
1266  }
1267  }
1268  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1269  if (result < WORST_POSSIBLE_RATING)
1270  result = WORST_POSSIBLE_RATING;
1271  if (debug) {
1272  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1273  unicharset.id_to_unichar(unichar_id),
1274  result * 100.0,
1275  cp_rating * 100.0,
1276  (1.0 - im_rating) * 100.0,
1277  (cn_corrected - (1.0 - im_rating)) * 100.0,
1278  cn_factors[unichar_id],
1279  miss_penalty * 100.0,
1280  vertical_penalty * 100.0);
1281  }
1282  return result;
1283 }
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
#define tprintf(...)
Definition: tprintf.h:31
double tessedit_class_miss_scale
Definition: classify.h:439
UNICHARSET unicharset
Definition: ccutil.h:72
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:526
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
IntegerMatcher im_
Definition: classify.h:503
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:77
double classify_misfit_junk_penalty
Definition: classify.h:435
void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uinT8 char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 69 of file float2int.cpp.

70  {
71  for (int i = 0; i < unicharset.size(); i++) {
72  if (i < PreTrainedTemplates->NumClasses) {
73  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
74  ComputeNormMatch(i, norm_feature, FALSE));
75  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
76  } else {
77  // Classes with no templates (eg. ambigs & ligatures) default
78  // to worst match.
79  char_norm_array[i] = MAX_INT_CHAR_NORM;
80  }
81  }
82 } /* ComputeIntCharNormArray */
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:133
UNICHARSET unicharset
Definition: ccutil.h:72
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:28
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
Definition: normmatch.cpp:88
#define FALSE
Definition: capi.h:29
int size() const
Definition: unicharset.h:297
void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into
Note
Exceptions: none
History: Wed Feb 20 10:58:45 1991, DSJ, Created.

Definition at line 100 of file float2int.cpp.

101  {
102  int Fid;
103  FEATURE Feature;
104  FLOAT32 YShift;
105 
107  YShift = BASELINE_Y_SHIFT;
108  else
109  YShift = Y_SHIFT;
110 
111  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
112  Feature = Features->Features[Fid];
113 
114  IntFeatures[Fid].X =
116  IntFeatures[Fid].Y =
117  Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
118  IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
120  IntFeatures[Fid].CP_misses = 0;
121  }
122 } /* ComputeIntFeatures */
#define X_SHIFT
Definition: intproto.h:40
#define ANGLE_SHIFT
Definition: intproto.h:39
float FLOAT32
Definition: host.h:111
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:441
#define Y_SHIFT
Definition: intproto.h:41
FEATURE Features[1]
Definition: ocrfeatures.h:72
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:455
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define INT_FEAT_RANGE
Definition: float2int.h:27
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
FLOAT32 tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
BOOL8  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.
Note
Exceptions: none
History: Wed Dec 19 16:56:12 1990, DSJ, Created.

Definition at line 88 of file normmatch.cpp.

90  {
91  LIST Protos;
92  FLOAT32 BestMatch;
93  FLOAT32 Match;
94  FLOAT32 Delta;
95  PROTOTYPE *Proto;
96  int ProtoId;
97 
98  if (ClassId >= NormProtos->NumProtos) {
99  ClassId = NO_CLASS;
100  }
101 
102  /* handle requests for classification as noise */
103  if (ClassId == NO_CLASS) {
104  /* kludge - clean up constants and make into control knobs later */
105  Match = (feature.Params[CharNormLength] *
106  feature.Params[CharNormLength] * 500.0 +
107  feature.Params[CharNormRx] *
108  feature.Params[CharNormRx] * 8000.0 +
109  feature.Params[CharNormRy] *
110  feature.Params[CharNormRy] * 8000.0);
111  return (1.0 - NormEvidenceOf (Match));
112  }
113 
114  BestMatch = MAX_FLOAT32;
115  Protos = NormProtos->Protos[ClassId];
116 
117  if (DebugMatch) {
118  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
119  }
120 
121  ProtoId = 0;
122  iterate(Protos) {
123  Proto = (PROTOTYPE *) first_node (Protos);
124  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
125  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
126  if (DebugMatch) {
127  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
128  Proto->Mean[CharNormY], Delta,
129  Proto->Weight.Elliptical[CharNormY], Match);
130  }
131  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
132  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
133  if (DebugMatch) {
134  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
135  Proto->Mean[CharNormRx], Delta,
136  Proto->Weight.Elliptical[CharNormRx], Match);
137  }
138  // Ry is width! See intfx.cpp.
139  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
140  if (DebugMatch) {
141  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
142  Proto->Mean[CharNormRy], Delta,
143  Proto->Weight.Elliptical[CharNormRy]);
144  }
145  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
146  Delta *= kWidthErrorWeighting;
147  Match += Delta;
148  if (DebugMatch) {
149  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
150  Match, Match / classify_norm_adj_midpoint,
151  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
152  }
153 
154  if (Match < BestMatch)
155  BestMatch = Match;
156 
157  ProtoId++;
158  }
159  return 1.0 - NormEvidenceOf(BestMatch);
160 } /* ComputeNormMatch */
float FLOAT32
Definition: host.h:111
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_norm_adj_midpoint
Definition: normmatch.cpp:63
FLOAT32 * Mean
Definition: cluster.h:78
LIST * Protos
Definition: normmatch.cpp:42
FLOATUNION Weight
Definition: cluster.h:83
#define NO_CLASS
Definition: matchdefs.h:36
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
FLOAT32 * Elliptical
Definition: cluster.h:64
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
double NormEvidenceOf(register double NormAdj)
Definition: normmatch.cpp:184
#define MAX_FLOAT32
Definition: host.h:124
const double kWidthErrorWeighting
Definition: normmatch.cpp:66
NORM_PROTOS * NormProtos
Definition: classify.h:486
void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1459 of file adaptmatch.cpp.

1461  {
1462  assert(Choices != NULL);
1463  FLOAT32 Rating;
1464  FLOAT32 Certainty;
1465  BLOB_CHOICE_IT temp_it;
1466  bool contains_nonfrag = false;
1467  temp_it.set_to_list(Choices);
1468  int choices_length = 0;
1469  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1470  // number of returned results, but with a shape_table_ we want to have room
1471  // for at least the biggest shape (which might contain hundreds of Indic
1472  // grapheme fragments) and more, so use double the size of the biggest shape
1473  // if that is more than the default.
1474  int max_matches = MAX_MATCHES;
1475  if (shape_table_ != NULL) {
1476  max_matches = shape_table_->MaxNumUnichars() * 2;
1477  if (max_matches < MAX_MATCHES)
1478  max_matches = MAX_MATCHES;
1479  }
1480 
1481  float best_certainty = -MAX_FLOAT32;
1482  for (int i = 0; i < Results->match.size(); i++) {
1483  const UnicharRating& result = Results->match[i];
1484  bool adapted = result.adapted;
1485  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
1486  if (temp_it.length()+1 == max_matches &&
1487  !contains_nonfrag && current_is_frag) {
1488  continue; // look for a non-fragmented character to fill the
1489  // last spot in Choices if only fragments are present
1490  }
1491  // BlobLength can never be legally 0, this means recognition failed.
1492  // But we must return a classification result because some invoking
1493  // functions (chopper/permuter) do not anticipate a null blob choice.
1494  // So we need to assign a poor, but not infinitely bad score.
1495  if (Results->BlobLength == 0) {
1496  Certainty = -20;
1497  Rating = 100; // should be -certainty * real_blob_length
1498  } else {
1499  Rating = Certainty = (1.0f - result.rating);
1500  Rating *= rating_scale * Results->BlobLength;
1501  Certainty *= -(getDict().certainty_scale);
1502  }
1503  // Adapted results, by their very nature, should have good certainty.
1504  // Those that don't are at best misleading, and often lead to errors,
1505  // so don't accept adapted results that are too far behind the best result,
1506  // whether adapted or static.
1507  // TODO(rays) find some way of automatically tuning these constants.
1508  if (Certainty > best_certainty) {
1509  best_certainty = MIN(Certainty, classify_adapted_pruning_threshold);
1510  } else if (adapted &&
1511  Certainty / classify_adapted_pruning_factor < best_certainty) {
1512  continue; // Don't accept bad adapted results.
1513  }
1514 
1515  float min_xheight, max_xheight, yshift;
1516  denorm.XHeightRange(result.unichar_id, unicharset, box,
1517  &min_xheight, &max_xheight, &yshift);
1518  BLOB_CHOICE* choice =
1519  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1521  min_xheight, max_xheight, yshift,
1522  adapted ? BCC_ADAPTED_CLASSIFIER
1524  choice->set_fonts(result.fonts);
1525  temp_it.add_to_end(choice);
1526  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1527  choices_length++;
1528  if (choices_length >= max_matches) break;
1529  }
1530  Results->match.truncate(choices_length);
1531 } // ConvertMatchesToChoices
int size() const
Definition: genericvector.h:72
void truncate(int size)
float FLOAT32
Definition: host.h:111
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define MIN(x, y)
Definition: ndminx.h:28
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_adapted_pruning_factor
Definition: classify.h:441
int MaxNumUnichars() const
Definition: shapetable.cpp:465
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
ShapeTable * shape_table_
Definition: classify.h:512
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:611
Dict & getDict()
Definition: classify.h:65
double classify_adapted_pruning_threshold
Definition: classify.h:443
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
#define MAX_FLOAT32
Definition: host.h:124
#define MAX_MATCHES
Definition: adaptmatch.cpp:68
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
#define NULL
Definition: host.h:144
double certainty_scale
Definition: dict.h:601
void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Feb 8 11:22:43 1991, DSJ, Created.

Definition at line 522 of file intproto.cpp.

522  {
523  INT_PROTO P;
524  FLOAT32 Param;
525 
526  assert(ProtoId < Class->NumProtos);
527 
528  P = ProtoForProtoId(Class, ProtoId);
529 
530  Param = Proto->A * 128;
531  P->A = TruncateParam(Param, -128, 127, NULL);
532 
533  Param = -Proto->B * 256;
534  P->B = TruncateParam(Param, 0, 255, NULL);
535 
536  Param = Proto->C * 128;
537  P->C = TruncateParam(Param, -128, 127, NULL);
538 
539  Param = Proto->Angle * 256;
540  if (Param < 0 || Param >= 256)
541  P->Angle = 0;
542  else
543  P->Angle = (uinT8) Param;
544 
545  /* round proto length to nearest integer number of pico-features */
546  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
547  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL);
549  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
550  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
551 } /* ConvertProto */
float FLOAT32
Definition: host.h:111
int classify_learning_debug_level
Definition: classify.h:419
#define ProtoForProtoId(C, P)
Definition: intproto.h:171
FLOAT32 Angle
Definition: protos.h:49
#define GetPicoFeatureLength()
Definition: picofeat.h:59
FLOAT32 B
Definition: protos.h:45
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1874
FLOAT32 Length
Definition: protos.h:50
FLOAT32 C
Definition: protos.h:46
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define NULL
Definition: host.h:144
uinT8 * ProtoLengths
Definition: intproto.h:112
FLOAT32 A
Definition: protos.h:44
unsigned char uinT8
Definition: host.h:99
INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none
Exceptions: none
History: Thu Feb 7 14:40:42 1991, DSJ, Created.

Definition at line 564 of file intproto.cpp.

566  {
567  INT_TEMPLATES IntTemplates;
568  CLASS_TYPE FClass;
569  INT_CLASS IClass;
570  int ClassId;
571  int ProtoId;
572  int ConfigId;
573 
574  IntTemplates = NewIntTemplates();
575 
576  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
577  FClass = &(FloatProtos[ClassId]);
578  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
579  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
580  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
581  target_unicharset.id_to_unichar(ClassId));
582  }
583  assert(UnusedClassIdIn(IntTemplates, ClassId));
584  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
585  FontSet fs;
586  fs.size = FClass->font_set.size();
587  fs.configs = new int[fs.size];
588  for (int i = 0; i < fs.size; ++i) {
589  fs.configs[i] = FClass->font_set.get(i);
590  }
591  if (this->fontset_table_.contains(fs)) {
592  IClass->font_set_id = this->fontset_table_.get_id(fs);
593  delete[] fs.configs;
594  } else {
595  IClass->font_set_id = this->fontset_table_.push_back(fs);
596  }
597  AddIntClass(IntTemplates, ClassId, IClass);
598 
599  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
600  AddIntProto(IClass);
601  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
602  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
604  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
605  }
606 
607  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
608  AddIntConfig(IClass);
609  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
610  }
611  }
612  return (IntTemplates);
613 } /* CreateIntTemplates */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:389
#define ProtoIn(Class, Pid)
Definition: protos.h:123
inT16 NumConfigs
Definition: protos.h:62
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:522
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:346
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:672
UnicityTableEqEq< int > font_set
Definition: protos.h:65
inT16 NumProtos
Definition: protos.h:59
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:272
int classify_learning_debug_level
Definition: classify.h:419
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:295
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:493
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:732
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:240
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
const T & get(int id) const
Return the object from an id.
int size() const
Return the size used.
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
int size() const
Definition: unicharset.h:297
CONFIGS Configurations
Definition: protos.h:64
void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Note
Exceptions: none
History: Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 1546 of file adaptmatch.cpp.

1547  {
1548  if (static_classifier_ == NULL) return;
1549  INT_FX_RESULT_STRUCT fx_info;
1551  TrainingSample* sample =
1552  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1553  if (sample == NULL) return;
1554  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1555  Results->best_unichar_id);
1556 } /* DebugAdaptiveClassifier */
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
Pix * pix() const
Definition: normalis.h:248
const DENORM & denorm() const
Definition: blobs.h:340
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
#define NULL
Definition: host.h:144
void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 978 of file adaptmatch.cpp.

978  {
979 #ifndef GRAPHICS_DISABLED
980  INT_FX_RESULT_STRUCT fx_info;
982  TrainingSample* sample =
984  &bl_features);
985  if (sample == NULL) return;
986 
987  UnicharRating int_result;
988  im_.Match(int_class, AllProtosOn, AllConfigsOn,
989  bl_features.size(), &bl_features[0],
992  tprintf("Best match to temp config %d = %4.1f%%.\n",
993  int_result.config, int_result.rating * 100.0);
995  uinT32 ConfigMask;
996  ConfigMask = 1 << int_result.config;
998  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
999  bl_features.size(), &bl_features[0],
1000  &int_result, classify_adapt_feature_threshold,
1001  6 | 0x19, matcher_debug_separate_windows);
1003  }
1004 #endif
1005 }
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
#define tprintf(...)
Definition: tprintf.h:31
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
bool classify_nonlinear_norm
Definition: classify.h:416
unsigned int uinT32
Definition: host.h:103
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void UpdateMatchDisplay()
Definition: intproto.cpp:473
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match
Note
Exceptions: none
History: Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 1582 of file adaptmatch.cpp.

1582  {
1583  UNICHAR_ID *Ambiguities;
1584 
1585  INT_FX_RESULT_STRUCT fx_info;
1587  TrainingSample* sample =
1589  &bl_features);
1590  if (sample == NULL) return;
1591 
1593  tess_cn_matching) {
1594  CharNormClassifier(Blob, *sample, Results);
1595  } else {
1596  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1597  AdaptedTemplates, Results);
1598  if ((!Results->match.empty() &&
1599  MarginalMatch(Results->best_rating,
1601  !tess_bn_matching) ||
1602  Results->match.empty()) {
1603  CharNormClassifier(Blob, *sample, Results);
1604  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1605  AmbigClassifier(bl_features, fx_info, Blob,
1608  Ambiguities,
1609  Results);
1610  }
1611  }
1612 
1613  // Force the blob to be classified as noise
1614  // if the results contain only fragments.
1615  // TODO(daria): verify that this is better than
1616  // just adding a NULL classification.
1617  if (!Results->HasNonfragment || Results->match.empty())
1618  ClassifyAsNoise(Results);
1619  delete sample;
1620 } /* DoAdaptiveMatch */
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
double matcher_reliable_adaptive_result
Definition: classify.h:421
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool classify_nonlinear_norm
Definition: classify.h:416
bool HasNonfragment
Definition: adaptmatch.cpp:84
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int matcher_permanent_classes_min
Definition: classify.h:426
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
int UNICHAR_ID
Definition: unichar.h:33
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
bool empty() const
Definition: genericvector.h:84
Definition: cluster.h:32
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:122
#define NULL
Definition: host.h:144
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Note
Exceptions: none
History: Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 456 of file adaptmatch.cpp.

456  {
457  STRING Filename;
458  FILE *File;
459 
460  if (AdaptedTemplates != NULL &&
462  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
463  File = fopen (Filename.string(), "wb");
464  if (File == NULL)
465  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
466  else {
467  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
468  fflush(stdout);
470  cprintf ("\n");
471  fclose(File);
472  }
473  }
474 
475  if (AdaptedTemplates != NULL) {
478  }
479  if (BackupAdaptedTemplates != NULL) {
482  }
483 
484  if (PreTrainedTemplates != NULL) {
487  }
489  FreeNormProtos();
490  if (AllProtosOn != NULL) {
495  AllProtosOn = NULL;
496  AllConfigsOn = NULL;
499  }
500  delete shape_table_;
501  shape_table_ = NULL;
502  if (static_classifier_ != NULL) {
503  delete static_classifier_;
504  static_classifier_ = NULL;
505  }
506 } /* EndAdaptiveClassifier */
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
void EndDangerousAmbigs()
Definition: stopper.cpp:368
bool classify_enable_adaptive_matcher
Definition: classify.h:409
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool classify_save_adapted_templates
Definition: classify.h:413
STRING imagefile
Definition: ccutil.h:74
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:505
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
Dict & getDict()
Definition: classify.h:65
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:748
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
Definition: strngs.h:44
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
const char * string() const
Definition: strngs.cpp:193
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1166 of file adaptmatch.cpp.

1170  {
1171  if (classes != NULL) {
1172  // Adapted result. Convert configs to fontinfo_ids.
1173  int_result->adapted = true;
1174  for (int f = 0; f < int_result->fonts.size(); ++f) {
1175  int_result->fonts[f].fontinfo_id =
1176  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1177  }
1178  } else {
1179  // Pre-trained result. Map fonts using font_sets_.
1180  int_result->adapted = false;
1181  for (int f = 0; f < int_result->fonts.size(); ++f) {
1182  int_result->fonts[f].fontinfo_id =
1184  int_result->fonts[f].fontinfo_id);
1185  }
1186  if (shape_table_ != NULL) {
1187  // Two possible cases:
1188  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1189  // int_result->fonts are the same. In this case build a new vector of
1190  // mapped fonts and replace the fonts in int_result.
1191  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1192  // by int_result. In this case, build a vector of UnicharRating to
1193  // gather together different font-ids for each unichar. Also covers case1.
1194  GenericVector<UnicharRating> mapped_results;
1195  for (int f = 0; f < int_result->fonts.size(); ++f) {
1196  int shape_id = int_result->fonts[f].fontinfo_id;
1197  const Shape& shape = shape_table_->GetShape(shape_id);
1198  for (int c = 0; c < shape.size(); ++c) {
1199  int unichar_id = shape[c].unichar_id;
1200  if (!unicharset.get_enabled(unichar_id)) continue;
1201  // Find the mapped_result for unichar_id.
1202  int r = 0;
1203  for (r = 0; r < mapped_results.size() &&
1204  mapped_results[r].unichar_id != unichar_id; ++r) {}
1205  if (r == mapped_results.size()) {
1206  mapped_results.push_back(*int_result);
1207  mapped_results[r].unichar_id = unichar_id;
1208  mapped_results[r].fonts.truncate(0);
1209  }
1210  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1211  mapped_results[r].fonts.push_back(
1212  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1213  }
1214  }
1215  }
1216  for (int m = 0; m < mapped_results.size(); ++m) {
1217  mapped_results[m].rating =
1218  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1219  cp_rating, int_result->rating,
1220  int_result->feature_misses, bottom, top,
1221  blob_length, matcher_multiplier, cn_factors);
1222  AddNewResult(mapped_results[m], final_results);
1223  }
1224  return;
1225  }
1226  }
1227  if (unicharset.get_enabled(class_id)) {
1228  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1229  int_result->rating,
1230  int_result->feature_misses,
1231  bottom, top, blob_length,
1232  matcher_multiplier, cn_factors);
1233  AddNewResult(*int_result, final_results);
1234  }
1235 }
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
int size() const
Definition: genericvector.h:72
void truncate(int size)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
int push_back(T object)
UNICHARSET unicharset
Definition: ccutil.h:72
ShapeTable * shape_table_
Definition: classify.h:512
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
#define NULL
Definition: host.h:144
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

Definition at line 445 of file intfx.cpp.

450  {
451  DENORM bl_denorm, cn_denorm;
452  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
453  &bl_denorm, &cn_denorm, results);
454  if (outline_cn_counts != NULL)
455  outline_cn_counts->truncate(0);
456  // Iterate the outlines.
457  for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) {
458  // Iterate the polygon.
459  EDGEPT* loop_pt = ol->FindBestStartPt();
460  EDGEPT* pt = loop_pt;
461  if (pt == NULL) continue;
462  do {
463  if (pt->IsHidden()) continue;
464  // Find a run of equal src_outline.
465  EDGEPT* last_pt = pt;
466  do {
467  last_pt = last_pt->next;
468  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
469  last_pt->src_outline == pt->src_outline);
470  last_pt = last_pt->prev;
471  // Until the adaptive classifier can be weaned off polygon segments,
472  // we have to force extraction from the polygon for the bl_features.
473  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
474  true, bl_features);
475  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
476  false, cn_features);
477  pt = last_pt;
478  } while ((pt = pt->next) != loop_pt);
479  if (outline_cn_counts != NULL)
480  outline_cn_counts->push_back(cn_features->size());
481  }
482  results->NumBL = bl_features->size();
483  results->NumCN = cn_features->size();
484  results->YBottom = blob.bounding_box().bottom();
485  results->YTop = blob.bounding_box().top();
486  results->Width = blob.bounding_box().width();
487 }
int size() const
Definition: genericvector.h:72
void truncate(int size)
int push_back(T object)
EDGEPT * prev
Definition: blobs.h:170
EDGEPT * next
Definition: blobs.h:169
bool IsHidden() const
Definition: blobs.h:153
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
inT16 bottom() const
Definition: rect.h:61
C_OUTLINE * src_outline
Definition: blobs.h:171
inT16 width() const
Definition: rect.h:111
Definition: blobs.h:76
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
TESSLINE * outlines
Definition: blobs.h:377
const double kStandardFeatureLength
Definition: intfx.h:46
inT16 top() const
Definition: rect.h:54
FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 230 of file picofeat.cpp.

231  {
232  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
235  blob, false, &local_fx_info, &bl_features);
236  if (sample == NULL) return NULL;
237 
238  int num_features = sample->num_features();
239  const INT_FEATURE_STRUCT* features = sample->features();
240  FEATURE_SET feature_set = NewFeatureSet(num_features);
241  for (int f = 0; f < num_features; ++f) {
242  FEATURE feature = NewFeature(&IntFeatDesc);
243 
244  feature->Params[IntX] = features[f].X;
245  feature->Params[IntY] = features[f].Y;
246  feature->Params[IntDir] = features[f].Theta;
247  AddFeature(feature_set, feature);
248  }
249  delete sample;
250 
251  return feature_set;
252 } /* ExtractIntCNFeatures */
const INT_FEATURE_STRUCT * features() const
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:96
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: picofeat.h:29
const FEATURE_DESC_STRUCT IntFeatDesc
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:44
Definition: cluster.h:32
Definition: picofeat.h:30
#define NULL
Definition: host.h:144
FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 262 of file picofeat.cpp.

263  {
264  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
267  blob, false, &local_fx_info, &bl_features);
268  if (sample == NULL) return NULL;
269 
270  FEATURE_SET feature_set = NewFeatureSet(1);
271  FEATURE feature = NewFeature(&IntFeatDesc);
272 
273  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
274  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
275  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
276  AddFeature(feature_set, feature);
277  delete sample;
278 
279  return feature_set;
280 } /* ExtractIntGeoFeatures */
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:96
FEATURE_SET NewFeatureSet(int NumFeatures)
const FEATURE_DESC_STRUCT IntFeatDesc
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:44
Definition: cluster.h:32
#define NULL
Definition: host.h:144
int geo_feature(int index) const
FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none
Exceptions: none
History:
  • 11/13/90, DSJ, Created.
  • 05/24/91, DSJ, Updated for either char or baseline normalize.

Definition at line 47 of file outfeat.cpp.

47  {
48  LIST Outlines;
49  LIST RemainingOutlines;
50  MFOUTLINE Outline;
51  FEATURE_SET FeatureSet;
52  FLOAT32 XScale, YScale;
53 
54  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
55  if (Blob == NULL)
56  return (FeatureSet);
57 
58  Outlines = ConvertBlob (Blob);
59 
60  NormalizeOutlines(Outlines, &XScale, &YScale);
61  RemainingOutlines = Outlines;
62  iterate(RemainingOutlines) {
63  Outline = (MFOUTLINE) first_node (RemainingOutlines);
64  ConvertToOutlineFeatures(Outline, FeatureSet);
65  }
67  NormalizeOutlineX(FeatureSet);
68  FreeOutlines(Outlines);
69  return (FeatureSet);
70 } /* ExtractOutlineFeatures */
float FLOAT32
Definition: host.h:111
FEATURE_SET NewFeatureSet(int NumFeatures)
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:35
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:122
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:163
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:39
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:178
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
#define NULL
Definition: host.h:144
LIST MFOUTLINE
Definition: mfoutline.h:33
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:300
FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.
    Note
    Exceptions: none
    History: 9/4/90, DSJ, Created.

Definition at line 67 of file picofeat.cpp.

67  {
68  LIST Outlines;
69  LIST RemainingOutlines;
70  MFOUTLINE Outline;
71  FEATURE_SET FeatureSet;
72  FLOAT32 XScale, YScale;
73 
74  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
75  Outlines = ConvertBlob(Blob);
76  NormalizeOutlines(Outlines, &XScale, &YScale);
77  RemainingOutlines = Outlines;
78  iterate(RemainingOutlines) {
79  Outline = (MFOUTLINE) first_node (RemainingOutlines);
80  ConvertToPicoFeatures2(Outline, FeatureSet);
81  }
83  NormalizePicoX(FeatureSet);
84  FreeOutlines(Outlines);
85  return (FeatureSet);
86 
87 } /* ExtractPicoFeatures */
float FLOAT32
Definition: host.h:111
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:204
FEATURE_SET NewFeatureSet(int NumFeatures)
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:163
#define MAX_PICO_FEATURES
Definition: picofeat.h:47
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:39
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:178
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
LIST MFOUTLINE
Definition: mfoutline.h:33
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:300
void tesseract::Classify::FreeNormProtos ( )

Definition at line 162 of file normmatch.cpp.

162  {
163  if (NormProtos != NULL) {
164  for (int i = 0; i < NormProtos->NumProtos; i++)
168  Efree(NormProtos);
169  NormProtos = NULL;
170  }
171 }
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
LIST * Protos
Definition: normmatch.cpp:42
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:571
void Efree(void *ptr)
Definition: emalloc.cpp:79
#define NULL
Definition: host.h:144
NORM_PROTOS * NormProtos
Definition: classify.h:486
UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 345 of file classify.h.

345  {
346  return fontinfo_table_;
347  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 348 of file classify.h.

348  {
349  return fontinfo_table_;
350  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 351 of file classify.h.

351  {
352  return fontset_table_;
353  }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)
Note
Exceptions: none
History: Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 812 of file adaptmatch.cpp.

814  {
815  FEATURE_SET Features;
816  int NumFeatures;
817 
818  classify_norm_method.set_value(baseline);
819  Features = ExtractPicoFeatures(Blob);
820 
821  NumFeatures = Features->NumFeatures;
822  if (NumFeatures > UNLIKELY_NUM_FEAT) {
823  FreeFeatureSet(Features);
824  return 0;
825  }
826 
827  ComputeIntFeatures(Features, IntFeatures);
828  *FloatFeatures = Features;
829 
830  return NumFeatures;
831 } /* GetAdaptiveFeatures */
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:67
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:100
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:78
UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.
Note
Exceptions: none
History: Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 1639 of file adaptmatch.cpp.

1640  {
1641  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1642  UNICHAR_ID *Ambiguities;
1643  int i;
1644 
1645  Results->Initialize();
1646  INT_FX_RESULT_STRUCT fx_info;
1648  TrainingSample* sample =
1650  &bl_features);
1651  if (sample == NULL) {
1652  delete Results;
1653  return NULL;
1654  }
1655 
1656  CharNormClassifier(Blob, *sample, Results);
1657  delete sample;
1658  RemoveBadMatches(Results);
1660 
1661  /* copy the class id's into an string of ambiguities - don't copy if
1662  the correct class is the only class id matched */
1663  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1664  if (Results->match.size() > 1 ||
1665  (Results->match.size() == 1 &&
1666  Results->match[0].unichar_id != CorrectClass)) {
1667  for (i = 0; i < Results->match.size(); i++)
1668  Ambiguities[i] = Results->match[i].unichar_id;
1669  Ambiguities[i] = -1;
1670  } else {
1671  Ambiguities[0] = -1;
1672  }
1673 
1674  delete Results;
1675  return Ambiguities;
1676 } /* GetAmbiguities */
int size() const
Definition: genericvector.h:72
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
bool classify_nonlinear_norm
Definition: classify.h:416
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
void RemoveBadMatches(ADAPT_RESULTS *Results)
int UNICHAR_ID
Definition: unichar.h:33
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
void Initialize()
Definition: adaptmatch.cpp:93
Definition: cluster.h:32
#define NULL
Definition: host.h:144
int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uinT8 pruner_norm_array,
uinT8 char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occured.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 1727 of file adaptmatch.cpp.

1730  {
1731  FEATURE norm_feature = NewFeature(&CharNormDesc);
1732  float baseline = kBlnBaselineOffset;
1733  float scale = MF_SCALE_FACTOR;
1734  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1735  norm_feature->Params[CharNormLength] =
1736  fx_info.Length * scale / LENGTH_COMPRESSION;
1737  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1738  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1739  // Deletes norm_feature.
1740  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1741  pruner_norm_array);
1742  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1743 } /* GetCharNormFeature */
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:96
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const FEATURE_DESC_STRUCT CharNormDesc
const int kBlnBaselineOffset
Definition: normalis.h:29
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
int IntCastRounded(double x)
Definition: helpers.h:172
const double kStandardFeatureLength
Definition: intfx.h:46
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none
Exceptions: none
History: Thu Mar 21 16:55:13 1991, DSJ, Created.

Definition at line 1405 of file intproto.cpp.

1406  {
1407  tprintf("%s\n", Prompt);
1408  SVEvent* ev;
1409  SVEventType ev_type;
1410  int unichar_id = INVALID_UNICHAR_ID;
1411  // Wait until a click or popup event.
1412  do {
1414  ev_type = ev->type;
1415  if (ev_type == SVET_POPUP) {
1416  if (ev->command_id == IDA_SHAPE_INDEX) {
1417  if (shape_table_ != NULL) {
1418  *shape_id = atoi(ev->parameter);
1419  *adaptive_on = false;
1420  *pretrained_on = true;
1421  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1422  int font_id;
1423  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1424  &font_id);
1425  tprintf("Shape %d, first unichar=%d, font=%d\n",
1426  *shape_id, unichar_id, font_id);
1427  return unichar_id;
1428  }
1429  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1430  } else {
1431  tprintf("No shape table loaded!\n");
1432  }
1433  } else {
1435  unichar_id = unicharset.unichar_to_id(ev->parameter);
1436  if (ev->command_id == IDA_ADAPTIVE) {
1437  *adaptive_on = true;
1438  *pretrained_on = false;
1439  *shape_id = -1;
1440  } else if (ev->command_id == IDA_STATIC) {
1441  *adaptive_on = false;
1442  *pretrained_on = true;
1443  } else {
1444  *adaptive_on = true;
1445  *pretrained_on = true;
1446  }
1447  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == NULL) {
1448  *shape_id = -1;
1449  return unichar_id;
1450  }
1451  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1452  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1453  tprintf("%s\n", shape_table_->DebugStr(s).string());
1454  }
1455  }
1456  } else {
1457  tprintf("Char class '%s' not found in unicharset",
1458  ev->parameter);
1459  }
1460  }
1461  }
1462  delete ev;
1463  } while (ev_type != SVET_CLICK);
1464  return 0;
1465 } /* GetClassToDebug */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
int command_id
Definition: scrollview.h:70
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:156
SVEventType
Definition: scrollview.h:45
ShapeTable * shape_table_
Definition: classify.h:512
ScrollView * IntMatchWindow
Definition: intproto.cpp:181
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:449
char * parameter
Definition: scrollview.h:71
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291
SVEventType type
Definition: scrollview.h:64
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
#define NULL
Definition: host.h:144
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:414
const char * string() const
Definition: strngs.cpp:193
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
int NumShapes() const
Definition: shapetable.h:278
Dict& tesseract::Classify::getDict ( )
inline

Definition at line 65 of file classify.h.

65  {
66  return dict_;
67  }
int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uinT8  ConfigId 
)

Definition at line 190 of file adaptive.cpp.

190  {
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Note
Exceptions: none
History: Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 717 of file adaptmatch.cpp.

721  {
722  FEATURE_SET Features;
723  int Fid, Pid;
724  FEATURE Feature;
725  int NumFeatures;
726  TEMP_PROTO TempProto;
727  PROTO Proto;
728  INT_CLASS IClass;
730 
731  classify_norm_method.set_value(baseline);
732  Features = ExtractOutlineFeatures(Blob);
733  NumFeatures = Features->NumFeatures;
734  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
735  FreeFeatureSet(Features);
736  return;
737  }
738 
739  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
740  TempConfigFor(Class, 0) = Config;
741 
742  /* this is a kludge to construct cutoffs for adapted templates */
743  if (Templates == AdaptedTemplates)
744  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
745 
746  IClass = ClassForClassId (Templates->Templates, ClassId);
747 
748  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
749  Pid = AddIntProto (IClass);
750  assert (Pid != NO_PROTO);
751 
752  Feature = Features->Features[Fid];
753  TempProto = NewTempProto ();
754  Proto = &(TempProto->Proto);
755 
756  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
757  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
758  instead of the -0.25 to 0.75 used in baseline normalization */
759  Proto->Angle = Feature->Params[OutlineFeatDir];
760  Proto->X = Feature->Params[OutlineFeatX];
761  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
762  Proto->Length = Feature->Params[OutlineFeatLength];
763  FillABC(Proto);
764 
765  TempProto->ProtoId = Pid;
766  SET_BIT (Config->Protos, Pid);
767 
768  ConvertProto(Proto, Pid, IClass);
769  AddProtoToProtoPruner(Proto, Pid, IClass,
771 
772  Class->TempProtos = push (Class->TempProtos, TempProto);
773  }
774  FreeFeatureSet(Features);
775 
776  AddIntConfig(IClass);
777  ConvertConfig (AllProtosOn, 0, IClass);
778 
780  tprintf("Added new class '%s' with class id %d and %d protos.\n",
781  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
783  DisplayAdaptedChar(Blob, IClass);
784  }
785 
786  if (IsEmptyAdaptedClass(Class))
787  (Templates->NumNonEmptyClasses)++;
788 } /* InitAdaptedClass */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:389
#define tprintf(...)
Definition: tprintf.h:31
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
UNICHARSET unicharset
Definition: ccutil.h:72
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 ProtoId
Definition: adaptive.h:30
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:522
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
FEATURE Features[1]
Definition: ocrfeatures.h:72
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:272
int classify_learning_debug_level
Definition: classify.h:419
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:295
FLOAT32 X
Definition: protos.h:47
PROTO_STRUCT Proto
Definition: adaptive.h:32
FLOAT32 Angle
Definition: protos.h:49
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:493
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
#define NO_PROTO
Definition: matchdefs.h:42
#define SET_BIT(array, bit)
Definition: bitvec.h:57
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:978
#define ClassForClassId(T, c)
Definition: intproto.h:181
INT_TEMPLATES Templates
Definition: adaptive.h:77
CLUSTERCONFIG Config
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
void FillABC(PROTO Proto)
Definition: protos.cpp:198
FLOAT32 Length
Definition: protos.h:50
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:47
BIT_VECTOR Protos
Definition: adaptive.h:45
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:78
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
FLOAT32 Y
Definition: protos.h:48
void tesseract::Classify::InitAdaptiveClassifier ( bool  load_pre_trained_templates)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be lodaded. Should only be set to true if the necesary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Note
History: Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  if (AllProtosOn != NULL)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 &&
536  load_pre_trained_templates) {
540  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
541 
543  shape_table_ = new ShapeTable(unicharset);
546  tprintf("Error loading shape table!\n");
547  delete shape_table_;
548  shape_table_ = NULL;
549  } else if (tessdata_manager.DebugLevel() > 0) {
550  tprintf("Successfully loaded shape table!\n");
551  }
552  }
553 
558  CharNormCutoffs);
559  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
560 
562  NormProtos =
565  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
566  static_classifier_ = new TessClassifier(false, this);
567  }
568 
570  InitIntegerFX();
571 
579 
580  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
581  BaselineCutoffs[i] = 0;
582  }
583 
585  FILE *File;
586  STRING Filename;
587 
588  Filename = imagefile;
589  Filename += ADAPT_TEMPLATE_SUFFIX;
590  File = fopen(Filename.string(), "rb");
591  if (File == NULL) {
593  } else {
594  cprintf("\nReading pre-adapted templates from %s ...\n",
595  Filename.string());
596  fflush(stdout);
598  cprintf("\n");
599  fclose(File);
601 
602  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
603  BaselineCutoffs[i] = CharNormCutoffs[i];
604  }
605  }
606  } else {
607  if (AdaptedTemplates != NULL)
610  }
611 } /* InitAdaptiveClassifier */
FILE * GetDataFilePtr() const
#define zero_all_bits(array, length)
Definition: bitvec.h:33
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:52
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
bool classify_enable_adaptive_matcher
Definition: classify.h:409
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define tprintf(...)
Definition: tprintf.h:31
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:90
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:369
inT64 GetEndOffset(TessdataType tessdata_type) const
#define set_all_bits(array, length)
Definition: bitvec.h:41
inT32 length() const
Definition: strngs.cpp:188
bool DeSerialize(bool swap, FILE *fp)
Definition: shapetable.cpp:256
STRING imagefile
Definition: ccutil.h:74
TessdataManager tessdata_manager
Definition: ccutil.h:71
BIT_VECTOR AllProtosOn
Definition: classify.h:480
#define ASSERT_HOST(x)
Definition: errcode.h:84
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
bool classify_use_pre_adapted_templates
Definition: classify.h:411
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:677
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:770
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
STRING language_data_path_prefix
Definition: ccutil.h:70
INT_TEMPLATES Templates
Definition: adaptive.h:77
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
bool SeekToStart(TessdataType tessdata_type)
void InitIntegerFX()
Definition: intfx.cpp:55
IntegerMatcher im_
Definition: classify.h:503
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456
Definition: strngs.h:44
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
#define MAX_NUM_PROTOS
Definition: intproto.h:47
const char * string() const
Definition: strngs.cpp:193
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
Definition: normmatch.cpp:245
NORM_PROTOS * NormProtos
Definition: classify.h:486
bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 235 of file classify.cpp.

235  {
236  double speckle_size = kBlnXHeight * speckle_large_max_size;
237  TBOX bbox = blob.bounding_box();
238  return bbox.width() < speckle_size && bbox.height() < speckle_size;
239 }
const int kBlnXHeight
Definition: normalis.h:28
double speckle_large_max_size
Definition: classify.h:501
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
Definition: rect.h:30
TBOX bounding_box() const
Definition: blobs.cpp:482
void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 69 of file blobclass.cpp.

72  {
74  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
75  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
76  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
77  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
78 
79  if (ValidCharDescription(feature_defs_, CharDesc)) {
80  // Label the features with a class name and font name.
81  tr_file_data_ += "\n";
82  tr_file_data_ += fontname;
83  tr_file_data_ += " ";
84  tr_file_data_ += blob_text;
85  tr_file_data_ += "\n";
86 
87  // write micro-features to file and clean up
88  WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
89  } else {
90  tprintf("Blob learned was invalid!\n");
91  }
92  FreeCharDescription(CharDesc);
93 } // LearnBlob
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &bl_denorm, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info)
Definition: mf.cpp:47
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:62
void FreeCharDescription(CHAR_DESC CharDesc)
Definition: featdefs.cpp:141
#define tprintf(...)
Definition: tprintf.h:31
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
Definition: featdefs.cpp:197
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
Definition: featdefs.cpp:219
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:262
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:507
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
Definition: featdefs.cpp:164
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:230
void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 368 of file adaptmatch.cpp.

370  {
371  // TODO(daria) Remove/modify this if/when we want
372  // to train and/or adapt to n-grams.
373  if (segmentation != CST_WHOLE &&
374  (segmentation != CST_FRAGMENT || disable_character_fragments))
375  return;
376 
377  if (length > 1) {
378  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
379  start + length - 1);
380  }
381  TBLOB* blob = word->chopped_word->blobs[start];
382  // Rotate the blob if needed for classification.
383  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
384  if (rotated_blob == NULL)
385  rotated_blob = blob;
386 
387  #ifndef GRAPHICS_DISABLED
388  // Draw debug windows showing the blob that is being learned if needed.
389  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
390  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
391  word->chopped_word->bounding_box());
392  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
393  learn_debug_win_->Update();
394  window_wait(learn_debug_win_);
395  }
396  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
397  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
398  blob->plot(learn_fragments_debug_win_,
400  learn_fragments_debug_win_->Update();
401  }
402  #endif // GRAPHICS_DISABLED
403 
404  if (fontname != NULL) {
405  classify_norm_method.set_value(character); // force char norm spc 30/11/93
406  tess_bn_matching.set_value(false); // turn it off
407  tess_cn_matching.set_value(false);
408  DENORM bl_denorm, cn_denorm;
409  INT_FX_RESULT_STRUCT fx_info;
411  &bl_denorm, &cn_denorm, &fx_info);
412  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
413  } else if (unicharset.contains_unichar(correct_text)) {
414  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
415  int font_id = word->fontinfo != NULL
416  ? fontinfo_table_.get_id(*word->fontinfo)
417  : 0;
419  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
420  unicharset.id_to_unichar(class_id), threshold, font_id);
421  // If filename is not NULL we are doing recognition
422  // (as opposed to training), so we must have already set word fonts.
423  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
424  if (BackupAdaptedTemplates != NULL) {
425  // Adapt the backup templates too. They will be used if the primary gets
426  // too full.
427  AdaptToChar(rotated_blob, class_id, font_id, threshold,
429  }
430  } else if (classify_debug_level >= 1) {
431  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
432  }
433  if (rotated_blob != blob) {
434  delete rotated_blob;
435  }
436 
437  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
438  start + length - 1);
439 } // LearnPieces.
Definition: blobs.h:261
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
TWERD * chopped_word
Definition: pageres.h:201
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
const FontInfo * fontinfo
Definition: pageres.h:288
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
int classify_learning_debug_level
Definition: classify.h:419
#define ASSERT_HOST(x)
Definition: errcode.h:84
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:524
bool classify_nonlinear_norm
Definition: classify.h:416
char * classify_learn_debug_str
Definition: classify.h:459
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:886
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
int UNICHAR_ID
Definition: unichar.h:33
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:363
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:216
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
TBOX bounding_box() const
Definition: blobs.cpp:881
bool classify_debug_character_fragments
Definition: classify.h:455
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:69
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool disable_character_fragments
Definition: classify.h:450
#define NULL
Definition: host.h:144
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:194
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 244 of file adaptmatch.cpp.

244  {
245  int word_len = word->correct_text.size();
246  if (word_len == 0) return;
247 
248  float* thresholds = NULL;
249  if (fontname == NULL) {
250  // Adaption mode.
251  if (!EnableLearning || word->best_choice == NULL)
252  return; // Can't or won't adapt.
253 
255  tprintf("\n\nAdapting to word = %s\n",
256  word->best_choice->debug_string().string());
257  thresholds = new float[word_len];
261  matcher_rating_margin, thresholds);
262  }
263  int start_blob = 0;
264 
265  #ifndef GRAPHICS_DISABLED
267  if (learn_fragmented_word_debug_win_ != NULL) {
268  window_wait(learn_fragmented_word_debug_win_);
269  }
270  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
271  word->chopped_word->bounding_box());
272  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
273  word->chopped_word->bounding_box());
274  word->chopped_word->plot(learn_fragmented_word_debug_win_);
276  }
277  #endif // GRAPHICS_DISABLED
278 
279  for (int ch = 0; ch < word_len; ++ch) {
281  tprintf("\nLearning %s\n", word->correct_text[ch].string());
282  }
283  if (word->correct_text[ch].length() > 0) {
284  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
285 
286  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
287  CST_WHOLE, word->correct_text[ch].string(), word);
288 
289  if (word->best_state[ch] > 1 && !disable_character_fragments) {
290  // Check that the character breaks into meaningful fragments
291  // that each match a whole character with at least
292  // classify_character_fragments_garbage_certainty_threshold
293  bool garbage = false;
294  int frag;
295  for (frag = 0; frag < word->best_state[ch]; ++frag) {
296  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
298  garbage |= LooksLikeGarbage(frag_blob);
299  }
300  }
301  // Learn the fragments.
302  if (!garbage) {
303  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
304  word->best_state[ch]);
305  if (pieces_all_natural || !prioritize_division) {
306  for (frag = 0; frag < word->best_state[ch]; ++frag) {
307  GenericVector<STRING> tokens;
308  word->correct_text[ch].split(' ', &tokens);
309 
310  tokens[0] = CHAR_FRAGMENT::to_string(
311  tokens[0].string(), frag, word->best_state[ch],
312  pieces_all_natural);
313 
314  STRING full_string;
315  for (int i = 0; i < tokens.size(); i++) {
316  full_string += tokens[i];
317  if (i != tokens.size() - 1)
318  full_string += ' ';
319  }
320  LearnPieces(fontname, start_blob + frag, 1, threshold,
321  CST_FRAGMENT, full_string.string(), word);
322  }
323  }
324  }
325  }
326 
327  // TODO(rays): re-enable this part of the code when we switch to the
328  // new classifier that needs to see examples of garbage.
329  /*
330  if (word->best_state[ch] > 1) {
331  // If the next blob is good, make junk with the rightmost fragment.
332  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
333  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
334  word->best_state[ch + 1] + 1,
335  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
336  }
337  // If the previous blob is good, make junk with the leftmost fragment.
338  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
339  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
340  word->best_state[ch - 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  }
344  // If the next blob is good, make a join with it.
345  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
346  STRING joined_text = word->correct_text[ch];
347  joined_text += word->correct_text[ch + 1];
348  LearnPieces(fontname, start_blob,
349  word->best_state[ch] + word->best_state[ch + 1],
350  threshold, CST_NGRAM, joined_text.string(), word);
351  }
352  */
353  }
354  start_blob += word->best_state[ch];
355  }
356  delete [] thresholds;
357 } // LearnWord.
Definition: blobs.h:261
int size() const
Definition: genericvector.h:72
int length() const
Definition: genericvector.h:79
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:553
WERD_CHOICE * best_choice
Definition: pageres.h:219
TWERD * chopped_word
Definition: pageres.h:201
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
double matcher_good_threshold
Definition: classify.h:420
bool prioritize_division
Definition: classify.h:387
GenericVector< STRING > correct_text
Definition: pageres.h:259
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
int classify_learning_debug_level
Definition: classify.h:419
double matcher_perfect_threshold
Definition: classify.h:422
double matcher_rating_margin
Definition: classify.h:424
void plot(ScrollView *window)
Definition: blobs.cpp:918
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:368
double certainty_scale
Definition: classify.h:437
STRING to_string() const
Definition: unicharset.h:73
const STRING debug_string() const
Definition: ratngs.h:502
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1072
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
TBOX bounding_box() const
Definition: blobs.cpp:881
bool classify_debug_character_fragments
Definition: classify.h:455
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
bool LooksLikeGarbage(TBLOB *blob)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
bool disable_character_fragments
Definition: classify.h:450
Definition: strngs.h:44
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1680 of file adaptmatch.cpp.

1680  {
1681  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1682  AdaptiveClassifier(blob, ratings);
1683  BLOB_CHOICE_IT ratings_it(ratings);
1686  print_ratings_list("======================\nLooksLikeGarbage() got ",
1687  ratings, unicharset);
1688  }
1689  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1690  ratings_it.forward()) {
1691  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
1692  continue;
1693  }
1694  float certainty = ratings_it.data()->certainty();
1695  delete ratings;
1696  return certainty <
1698  }
1699  delete ratings;
1700  return true; // no whole characters in ratings
1701 }
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Dict & getDict()
Definition: classify.h:65
bool classify_debug_character_fragments
Definition: classify.h:455
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
#define NULL
Definition: host.h:144
int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.
Note
Exceptions: none
History: Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 1791 of file adaptmatch.cpp.

1796  {
1797  INT_CLASS IClass;
1798  ADAPT_CLASS Class;
1799  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1800  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1801  int NumOldProtos;
1802  int NumBadFeatures;
1803  int MaxProtoId, OldMaxProtoId;
1804  int BlobLength = 0;
1805  int MaskSize;
1806  int ConfigId;
1808  int i;
1809  int debug_level = NO_DEBUG;
1810 
1812  debug_level =
1814 
1815  IClass = ClassForClassId(Templates->Templates, ClassId);
1816  Class = Templates->Class[ClassId];
1817 
1818  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1819  ++NumAdaptationsFailed;
1821  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1822  return -1;
1823  }
1824 
1825  OldMaxProtoId = IClass->NumProtos - 1;
1826 
1827  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1828  BlobLength, NumFeatures, Features,
1829  OldProtos, classify_adapt_proto_threshold,
1830  debug_level);
1831 
1832  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1833  zero_all_bits(TempProtoMask, MaskSize);
1834  for (i = 0; i < NumOldProtos; i++)
1835  SET_BIT(TempProtoMask, OldProtos[i]);
1836 
1837  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1838  BlobLength, NumFeatures, Features,
1839  BadFeatures,
1841  debug_level);
1842 
1843  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1844  IClass, Class, TempProtoMask);
1845  if (MaxProtoId == NO_PROTO) {
1846  ++NumAdaptationsFailed;
1848  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1849  return -1;
1850  }
1851 
1852  ConfigId = AddIntConfig(IClass);
1853  ConvertConfig(TempProtoMask, ConfigId, IClass);
1854  Config = NewTempConfig(MaxProtoId, FontinfoId);
1855  TempConfigFor(Class, ConfigId) = Config;
1857 
1859  cprintf("Making new temp config %d fontinfo id %d"
1860  " using %d old and %d new protos.\n",
1861  ConfigId, Config->FontinfoId,
1862  NumOldProtos, MaxProtoId - OldMaxProtoId);
1863 
1864  return ConfigId;
1865 } /* MakeNewTemporaryConfig */
#define zero_all_bits(array, length)
Definition: bitvec.h:33
#define PRINT_PROTO_MATCHES
Definition: intproto.h:194
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
inT16 PROTO_ID
Definition: matchdefs.h:41
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:190
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:625
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT8 ProtoVectorSize
Definition: adaptive.h:42
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:272
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
int classify_adapt_proto_threshold
Definition: classify.h:445
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:493
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define NO_DEBUG
Definition: adaptmatch.cpp:70
#define NO_PROTO
Definition: matchdefs.h:42
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
#define SET_BIT(array, bit)
Definition: bitvec.h:57
uinT8 FEATURE_ID
Definition: matchdefs.h:47
#define ClassForClassId(T, c)
Definition: intproto.h:181
INT_TEMPLATES Templates
Definition: adaptive.h:77
CLUSTERCONFIG Config
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:193
IntegerMatcher im_
Definition: classify.h:503
uinT8 NumConfigs
Definition: intproto.h:110
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:554
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
BIT_VECTOR TempProtoMask
Definition: classify.h:483
#define MAX_NUM_PROTOS
Definition: intproto.h:47
BIT_VECTOR Protos
Definition: adaptive.h:45
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
int classify_adapt_feature_threshold
Definition: classify.h:447
uinT16 NumProtos
Definition: intproto.h:108
PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added. Exceptions: none History: Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 1888 of file adaptmatch.cpp.

1893  {
1894  FEATURE_ID *ProtoStart;
1895  FEATURE_ID *ProtoEnd;
1896  FEATURE_ID *LastBad;
1897  TEMP_PROTO TempProto;
1898  PROTO Proto;
1899  FEATURE F1, F2;
1900  FLOAT32 X1, X2, Y1, Y2;
1901  FLOAT32 A1, A2, AngleDelta;
1902  FLOAT32 SegmentLength;
1903  PROTO_ID Pid;
1904 
1905  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1906  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1907  F1 = Features->Features[*ProtoStart];
1908  X1 = F1->Params[PicoFeatX];
1909  Y1 = F1->Params[PicoFeatY];
1910  A1 = F1->Params[PicoFeatDir];
1911 
1912  for (ProtoEnd = ProtoStart + 1,
1913  SegmentLength = GetPicoFeatureLength();
1914  ProtoEnd < LastBad;
1915  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1916  F2 = Features->Features[*ProtoEnd];
1917  X2 = F2->Params[PicoFeatX];
1918  Y2 = F2->Params[PicoFeatY];
1919  A2 = F2->Params[PicoFeatDir];
1920 
1921  AngleDelta = fabs(A1 - A2);
1922  if (AngleDelta > 0.5)
1923  AngleDelta = 1.0 - AngleDelta;
1924 
1925  if (AngleDelta > matcher_clustering_max_angle_delta ||
1926  fabs(X1 - X2) > SegmentLength ||
1927  fabs(Y1 - Y2) > SegmentLength)
1928  break;
1929  }
1930 
1931  F2 = Features->Features[*(ProtoEnd - 1)];
1932  X2 = F2->Params[PicoFeatX];
1933  Y2 = F2->Params[PicoFeatY];
1934  A2 = F2->Params[PicoFeatDir];
1935 
1936  Pid = AddIntProto(IClass);
1937  if (Pid == NO_PROTO)
1938  return (NO_PROTO);
1939 
1940  TempProto = NewTempProto();
1941  Proto = &(TempProto->Proto);
1942 
1943  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1944  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1945  instead of the -0.25 to 0.75 used in baseline normalization */
1946  Proto->Length = SegmentLength;
1947  Proto->Angle = A1;
1948  Proto->X = (X1 + X2) / 2.0;
1949  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1950  FillABC(Proto);
1951 
1952  TempProto->ProtoId = Pid;
1953  SET_BIT(TempProtoMask, Pid);
1954 
1955  ConvertProto(Proto, Pid, IClass);
1956  AddProtoToProtoPruner(Proto, Pid, IClass,
1958 
1959  Class->TempProtos = push(Class->TempProtos, TempProto);
1960  }
1961  return IClass->NumProtos - 1;
1962 } /* MakeNewTempProtos */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:389
float FLOAT32
Definition: host.h:111
inT16 PROTO_ID
Definition: matchdefs.h:41
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
uinT16 ProtoId
Definition: adaptive.h:30
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:522
FEATURE Features[1]
Definition: ocrfeatures.h:72
int classify_learning_debug_level
Definition: classify.h:419
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:295
FLOAT32 X
Definition: protos.h:47
PROTO_STRUCT Proto
Definition: adaptive.h:32
FLOAT32 Angle
Definition: protos.h:49
#define NO_PROTO
Definition: matchdefs.h:42
#define GetPicoFeatureLength()
Definition: picofeat.h:59
#define SET_BIT(array, bit)
Definition: bitvec.h:57
uinT8 FEATURE_ID
Definition: matchdefs.h:47
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
void FillABC(PROTO Proto)
Definition: protos.cpp:198
FLOAT32 Length
Definition: protos.h:50
BIT_VECTOR TempProtoMask
Definition: classify.h:483
double matcher_clustering_max_angle_delta
Definition: classify.h:432
uinT16 NumProtos
Definition: intproto.h:108
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
FLOAT32 Y
Definition: protos.h:48
void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Note
Exceptions: none
History: Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 1977 of file adaptmatch.cpp.

1980  {
1981  UNICHAR_ID *Ambigs;
1983  ADAPT_CLASS Class;
1984  PROTO_KEY ProtoKey;
1985 
1986  Class = Templates->Class[ClassId];
1987  Config = TempConfigFor(Class, ConfigId);
1988 
1989  MakeConfigPermanent(Class, ConfigId);
1990  if (Class->NumPermConfigs == 0)
1991  Templates->NumPermClasses++;
1992  Class->NumPermConfigs++;
1993 
1994  // Initialize permanent config.
1995  Ambigs = GetAmbiguities(Blob, ClassId);
1997  "PERM_CONFIG_STRUCT");
1998  Perm->Ambigs = Ambigs;
1999  Perm->FontinfoId = Config->FontinfoId;
2000 
2001  // Free memory associated with temporary config (since ADAPTED_CONFIG
2002  // is a union we need to clean up before we record permanent config).
2003  ProtoKey.Templates = Templates;
2004  ProtoKey.ClassId = ClassId;
2005  ProtoKey.ConfigId = ConfigId;
2006  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
2007  FreeTempConfig(Config);
2008 
2009  // Record permanent config.
2010  PermConfigFor(Class, ConfigId) = Perm;
2011 
2012  if (classify_learning_debug_level >= 1) {
2013  tprintf("Making config %d for %s (ClassId %d) permanent:"
2014  " fontinfo id %d, ambiguities '",
2015  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2016  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2017  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2018  *AmbigsPointer >= 0; ++AmbigsPointer)
2019  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2020  tprintf("'.\n");
2021  }
2022 } /* MakePermanent */
int MakeTempProtoPerm(void *item1, void *item2)
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:114
CLASS_ID ClassId
Definition: adaptmatch.cpp:115
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
Dict & getDict()
Definition: classify.h:65
int UNICHAR_ID
Definition: unichar.h:33
CLUSTERCONFIG Config
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:96
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:125
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
inT16  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1126 of file adaptmatch.cpp.

1135  {
1136  int top = blob_box.top();
1137  int bottom = blob_box.bottom();
1138  UnicharRating int_result;
1139  for (int c = 0; c < results.size(); c++) {
1140  CLASS_ID class_id = results[c].Class;
1141  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1142  : AllProtosOn;
1143  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1144  : AllConfigsOn;
1145 
1146  int_result.unichar_id = class_id;
1147  im_.Match(ClassForClassId(templates, class_id),
1148  protos, configs,
1149  num_features, features,
1150  &int_result, classify_adapt_feature_threshold, debug,
1152  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1153  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1154  results[c].Rating,
1155  final_results->BlobLength,
1156  matcher_multiplier, norm_factors,
1157  &int_result, final_results);
1158  }
1159 }
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
inT32 BlobLength
Definition: adaptmatch.cpp:83
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
inT16 bottom() const
Definition: rect.h:61
#define ClassForClassId(T, c)
Definition: intproto.h:181
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
inT16 top() const
Definition: rect.h:54
int classify_adapt_feature_threshold
Definition: classify.h:447
BIT_VECTOR PermProtos
Definition: adaptive.h:68
ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none
Exceptions: none
History: Fri Mar 8 10:15:28 1991, DSJ, Created.

Definition at line 167 of file adaptive.cpp.

167  {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:732
void * Emalloc(int Size)
Definition: emalloc.cpp:47
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
INT_TEMPLATES Templates
Definition: adaptive.h:77
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
FLOAT32 XScale,
FLOAT32 YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine
    Returns
    none (Outlines are changed and XScale and YScale are updated)
    Note
    Exceptions: none
    History: Fri Dec 14 08:14:55 1990, DSJ, Created.

Definition at line 300 of file mfoutline.cpp.

302  {
303  MFOUTLINE Outline;
304 
305  switch (classify_norm_method) {
306  case character:
307  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
308  break;
309 
310  case baseline:
311  iterate(Outlines) {
312  Outline = (MFOUTLINE) first_node(Outlines);
313  NormalizeOutline(Outline, 0.0);
314  }
315  *XScale = *YScale = MF_SCALE_FACTOR;
316  break;
317  }
318 } /* NormalizeOutlines */
#define ASSERT_HOST(x)
Definition: errcode.h:84
void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin)
Definition: mfoutline.cpp:264
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
LIST MFOUTLINE
Definition: mfoutline.h:33
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none
Exceptions: none
History: Wed Mar 20 13:35:29 1991, DSJ, Created.

Definition at line 273 of file adaptive.cpp.

273  {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
279  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
280  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
281  fprintf (File, " Id NC NPC NP NPP\n");
282  fprintf (File, "------------------------\n");
283 
284  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
285  IClass = Templates->Templates->Class[i];
286  AClass = Templates->Class[i];
287  if (!IsEmptyAdaptedClass (AClass)) {
288  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
290  IClass->NumConfigs, AClass->NumPermConfigs,
291  IClass->NumProtos,
292  IClass->NumProtos - count (AClass->TempProtos));
293  }
294  }
295  fprintf (File, "\n");
296 
297 } /* PrintAdaptedTemplates */
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
UNICHARSET unicharset
Definition: ccutil.h:72
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
INT_TEMPLATES Templates
Definition: adaptive.h:77
int count(LIST var_list)
Definition: oldlist.cpp:108
uinT8 NumConfigs
Definition: intproto.h:110
uinT16 NumProtos
Definition: intproto.h:108
void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

Note
Exceptions: none
History: Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2076 of file adaptmatch.cpp.

2076  {
2077  for (int i = 0; i < results.match.size(); ++i) {
2078  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
2079  results.match[i].Print();
2080  }
2081 } /* PrintAdaptiveMatchResults */
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
int size() const
Definition: genericvector.h:72
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
const char * string() const
Definition: strngs.cpp:193
int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uinT8 normalization_factors,
const uinT16 expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

Definition at line 409 of file intmatcher.cpp.

414  {
415  ClassPruner pruner(int_templates->NumClasses);
416  // Compute initial match scores for all classes.
417  pruner.ComputeScores(int_templates, num_features, features);
418  // Adjust match scores for number of expected features.
419  pruner.AdjustForExpectedNumFeatures(expected_num_features,
421  // Apply disabled classes in unicharset - only works without a shape_table.
422  if (shape_table_ == NULL)
423  pruner.DisableDisabledClasses(unicharset);
424  // If fragments are disabled, remove them, also only without a shape table.
426  pruner.DisableFragments(unicharset);
427 
428  // If we have good x-heights, apply the given normalization factors.
429  if (normalization_factors != NULL) {
430  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
431  normalization_factors);
432  } else {
433  pruner.NoNormalization();
434  }
435  // Do the actual pruning and sort the short-list.
436  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
438 
439  if (classify_debug_level > 2) {
440  pruner.DebugMatch(*this, int_templates, features);
441  }
442  if (classify_debug_level > 1) {
443  pruner.SummarizeResult(*this, int_templates, expected_num_features,
445  normalization_factors);
446  }
447  // Convert to the expected output format.
448  return pruner.SetupResults(results);
449 }
UNICHARSET unicharset
Definition: ccutil.h:72
ShapeTable * shape_table_
Definition: classify.h:512
int classify_class_pruner_multiplier
Definition: classify.h:465
int classify_class_pruner_threshold
Definition: classify.h:463
bool disable_character_fragments
Definition: classify.h:450
#define NULL
Definition: host.h:144
int classify_cp_cutoff_strength
Definition: classify.h:467
ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( FILE *  File)

Read a set of adapted templates from File and return a ptr to the templates.

Parameters
Fileopen text file to read adapted templates from
Returns
Ptr to adapted templates read from File.
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:18:10 1991, DSJ, Created.

Definition at line 369 of file adaptive.cpp.

369  {
370  int i;
371  ADAPT_TEMPLATES Templates;
372 
373  /* first read the high level adaptive template struct */
374  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
375  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
376 
377  /* then read in the basic integer templates */
378  Templates->Templates = ReadIntTemplates (File);
379 
380  /* then read in the adaptive info for each class */
381  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
382  Templates->Class[i] = ReadAdaptedClass (File);
383  }
384  return (Templates);
385 
386 } /* ReadAdaptedTemplates */
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void * Emalloc(int Size)
Definition: emalloc.cpp:47
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:770
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:313
INT_TEMPLATES Templates
Definition: adaptive.h:77
INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( FILE *  File)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
Fileopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 770 of file intproto.cpp.

770  {
771  int i, j, w, x, y, z;
772  BOOL8 swap;
773  int nread;
774  int unicharset_size;
775  int version_id = 0;
776  INT_TEMPLATES Templates;
777  CLASS_PRUNER_STRUCT* Pruner;
778  INT_CLASS Class;
779  uinT8 *Lengths;
780  PROTO_SET ProtoSet;
781 
782  /* variables for conversion from older inttemp formats */
783  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
784  CLASS_ID class_id, max_class_id;
785  inT16 *IndexFor = new inT16[MAX_NUM_CLASSES];
786  CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
787  CLASS_PRUNER_STRUCT **TempClassPruner =
789  uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS
790  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
791  uinT32 Mask, NewMask, ClassBits;
792  int MaxNumConfigs = MAX_NUM_CONFIGS;
793  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
794 
795  /* first read the high level template struct */
796  Templates = NewIntTemplates();
797  // Read Templates in parts for 64 bit compatibility.
798  if (fread(&unicharset_size, sizeof(int), 1, File) != 1)
799  cprintf("Bad read of inttemp!\n");
800  if (fread(&Templates->NumClasses,
801  sizeof(Templates->NumClasses), 1, File) != 1 ||
802  fread(&Templates->NumClassPruners,
803  sizeof(Templates->NumClassPruners), 1, File) != 1)
804  cprintf("Bad read of inttemp!\n");
805  // Swap status is determined automatically.
806  swap = Templates->NumClassPruners < 0 ||
808  if (swap) {
809  Reverse32(&Templates->NumClassPruners);
810  Reverse32(&Templates->NumClasses);
811  Reverse32(&unicharset_size);
812  }
813  if (Templates->NumClasses < 0) {
814  // This file has a version id!
815  version_id = -Templates->NumClasses;
816  if (fread(&Templates->NumClasses, sizeof(Templates->NumClasses),
817  1, File) != 1)
818  cprintf("Bad read of inttemp!\n");
819  if (swap)
820  Reverse32(&Templates->NumClasses);
821  }
822 
823  if (version_id < 3) {
824  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
825  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
826  }
827 
828  if (version_id < 2) {
829  for (i = 0; i < unicharset_size; ++i) {
830  if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1)
831  cprintf("Bad read of inttemp!\n");
832  }
833  for (i = 0; i < Templates->NumClasses; ++i) {
834  if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1)
835  cprintf("Bad read of inttemp!\n");
836  }
837  if (swap) {
838  for (i = 0; i < Templates->NumClasses; i++)
839  Reverse16(&IndexFor[i]);
840  for (i = 0; i < Templates->NumClasses; i++)
841  Reverse32(&ClassIdFor[i]);
842  }
843  }
844 
845  /* then read in the class pruners */
846  for (i = 0; i < Templates->NumClassPruners; i++) {
847  Pruner = new CLASS_PRUNER_STRUCT;
848  if ((nread =
849  fread(Pruner, 1, sizeof(CLASS_PRUNER_STRUCT),
850  File)) != sizeof(CLASS_PRUNER_STRUCT))
851  cprintf("Bad read of inttemp!\n");
852  if (swap) {
853  for (x = 0; x < NUM_CP_BUCKETS; x++) {
854  for (y = 0; y < NUM_CP_BUCKETS; y++) {
855  for (z = 0; z < NUM_CP_BUCKETS; z++) {
856  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
857  Reverse32(&Pruner->p[x][y][z][w]);
858  }
859  }
860  }
861  }
862  }
863  if (version_id < 2) {
864  TempClassPruner[i] = Pruner;
865  } else {
866  Templates->ClassPruners[i] = Pruner;
867  }
868  }
869 
870  /* fix class pruners if they came from an old version of inttemp */
871  if (version_id < 2) {
872  // Allocate enough class pruners to cover all the class ids.
873  max_class_id = 0;
874  for (i = 0; i < Templates->NumClasses; i++)
875  if (ClassIdFor[i] > max_class_id)
876  max_class_id = ClassIdFor[i];
877  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
878  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
879  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
880  }
881  // Convert class pruners from the old format (indexed by class index)
882  // to the new format (indexed by class id).
883  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
884  for (i = 0; i < Templates->NumClassPruners; i++) {
885  for (x = 0; x < NUM_CP_BUCKETS; x++)
886  for (y = 0; y < NUM_CP_BUCKETS; y++)
887  for (z = 0; z < NUM_CP_BUCKETS; z++)
888  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
889  if (TempClassPruner[i]->p[x][y][z][w] == 0)
890  continue;
891  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
892  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
893  if (bit_number > last_cp_bit_number)
894  break; // the rest of the bits in this word are not used
895  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
896  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
897  Mask = SetBitsForMask << b;
898  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
899  // Move these bits to the new position in which they should
900  // appear (indexed corresponding to the class_id).
901  new_i = CPrunerIdFor(class_id);
902  new_w = CPrunerWordIndexFor(class_id);
903  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
904  if (new_b > b) {
905  ClassBits <<= (new_b - b);
906  } else {
907  ClassBits >>= (b - new_b);
908  }
909  // Copy bits relating to class_id to the correct position
910  // in Templates->ClassPruner.
911  NewMask = SetBitsForMask << new_b;
912  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
913  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
914  }
915  }
916  }
917  for (i = 0; i < Templates->NumClassPruners; i++) {
918  delete TempClassPruner[i];
919  }
920  }
921 
922  /* then read in each class */
923  for (i = 0; i < Templates->NumClasses; i++) {
924  /* first read in the high level struct for the class */
925  Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
926  if (fread(&Class->NumProtos, sizeof(Class->NumProtos), 1, File) != 1 ||
927  fread(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File) != 1 ||
928  fread(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File) != 1)
929  cprintf ("Bad read of inttemp!\n");
930  if (version_id == 0) {
931  // Only version 0 writes 5 pointless pointers to the file.
932  for (j = 0; j < 5; ++j) {
933  int junk;
934  if (fread(&junk, sizeof(junk), 1, File) != 1)
935  cprintf ("Bad read of inttemp!\n");
936  }
937  }
938  if (version_id < 4) {
939  for (j = 0; j < MaxNumConfigs; ++j) {
940  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
941  cprintf ("Bad read of inttemp!\n");
942  }
943  if (swap) {
944  Reverse16(&Class->NumProtos);
945  for (j = 0; j < MaxNumConfigs; j++)
946  Reverse16(&Class->ConfigLengths[j]);
947  }
948  } else {
949  ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
950  for (j = 0; j < Class->NumConfigs; ++j) {
951  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
952  cprintf ("Bad read of inttemp!\n");
953  }
954  if (swap) {
955  Reverse16(&Class->NumProtos);
956  for (j = 0; j < MaxNumConfigs; j++)
957  Reverse16(&Class->ConfigLengths[j]);
958  }
959  }
960  if (version_id < 2) {
961  ClassForClassId (Templates, ClassIdFor[i]) = Class;
962  } else {
963  ClassForClassId (Templates, i) = Class;
964  }
965 
966  /* then read in the proto lengths */
967  Lengths = NULL;
968  if (MaxNumIntProtosIn (Class) > 0) {
969  Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
970  if ((nread =
971  fread((char *)Lengths, sizeof(uinT8),
972  MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class))
973  cprintf ("Bad read of inttemp!\n");
974  }
975  Class->ProtoLengths = Lengths;
976 
977  /* then read in the proto sets */
978  for (j = 0; j < Class->NumProtoSets; j++) {
979  ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
980  if (version_id < 3) {
981  if ((nread =
982  fread((char *) &ProtoSet->ProtoPruner, 1,
983  sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER))
984  cprintf("Bad read of inttemp!\n");
985  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
986  if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1,
987  sizeof(inT8), File)) != sizeof(inT8) ||
988  (nread = fread((char *) &ProtoSet->Protos[x].B, 1,
989  sizeof(uinT8), File)) != sizeof(uinT8) ||
990  (nread = fread((char *) &ProtoSet->Protos[x].C, 1,
991  sizeof(inT8), File)) != sizeof(inT8) ||
992  (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1,
993  sizeof(uinT8), File)) != sizeof(uinT8))
994  cprintf("Bad read of inttemp!\n");
995  for (y = 0; y < WerdsPerConfigVec; y++)
996  if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1,
997  sizeof(uinT32), File)) != sizeof(uinT32))
998  cprintf("Bad read of inttemp!\n");
999  }
1000  } else {
1001  if ((nread =
1002  fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT),
1003  File)) != sizeof(PROTO_SET_STRUCT))
1004  cprintf("Bad read of inttemp!\n");
1005  }
1006  if (swap) {
1007  for (x = 0; x < NUM_PP_PARAMS; x++)
1008  for (y = 0; y < NUM_PP_BUCKETS; y++)
1009  for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
1010  Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
1011  for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
1012  for (y = 0; y < WerdsPerConfigVec; y++)
1013  Reverse32(&ProtoSet->Protos[x].Configs[y]);
1014  }
1015  Class->ProtoSets[j] = ProtoSet;
1016  }
1017  if (version_id < 4)
1018  Class->font_set_id = -1;
1019  else {
1020  fread(&Class->font_set_id, sizeof(int), 1, File);
1021  if (swap)
1022  Reverse32(&Class->font_set_id);
1023  }
1024  }
1025 
1026  if (version_id < 2) {
1027  /* add an empty NULL class with class id 0 */
1028  assert(UnusedClassIdIn (Templates, 0));
1029  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
1030  ClassForClassId (Templates, 0)->font_set_id = -1;
1031  Templates->NumClasses++;
1032  /* make sure the classes are contiguous */
1033  for (i = 0; i < MAX_NUM_CLASSES; i++) {
1034  if (i < Templates->NumClasses) {
1035  if (ClassForClassId (Templates, i) == NULL) {
1036  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
1037  exit(1);
1038  }
1039  } else {
1040  if (ClassForClassId (Templates, i) != NULL) {
1041  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
1042  i, Templates->NumClasses);
1043  exit(1);
1044  }
1045  }
1046  }
1047  }
1048  if (version_id >= 4) {
1049  this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
1050  if (version_id >= 5) {
1051  this->fontinfo_table_.read(File,
1053  swap);
1054  }
1055  this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
1056  }
1057 
1058  // Clean up.
1059  delete[] IndexFor;
1060  delete[] ClassIdFor;
1061  delete[] TempClassPruner;
1062 
1063  return (Templates);
1064 } /* ReadIntTemplates */
#define NUM_CP_BUCKETS
Definition: intproto.h:52
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
bool read_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:152
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
struct INT_CLASS_STRUCT * INT_CLASS
void Reverse32(void *ptr)
Definition: helpers.h:193
unsigned char BOOL8
Definition: host.h:113
uinT32 PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]
Definition: intproto.h:92
#define CPrunerIdFor(c)
Definition: intproto.h:183
bool read_set(FILE *f, FontSet *fs, bool swap)
Definition: fontinfo.cpp:240
#define CPrunerWordIndexFor(c)
Definition: intproto.h:185
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:672
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
#define ASSERT_HOST(x)
Definition: errcode.h:84
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:59
#define BITS_PER_WERD
Definition: intproto.h:44
unsigned int uinT32
Definition: host.h:103
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:732
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:115
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define NUM_PP_PARAMS
Definition: intproto.h:50
void Reverse16(void *ptr)
Definition: helpers.h:188
#define NUM_PP_BUCKETS
Definition: intproto.h:51
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define CPrunerBitIndexFor(c)
Definition: intproto.h:186
uinT8 NumProtoSets
Definition: intproto.h:109
PROTO_PRUNER ProtoPruner
Definition: intproto.h:96
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
#define BITS_PER_CP_VECTOR
Definition: intproto.h:58
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
uinT8 NumConfigs
Definition: intproto.h:110
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define NULL
Definition: host.h:144
SIGNED char inT8
Definition: host.h:98
uinT8 * ProtoLengths
Definition: intproto.h:112
bool read_spacing_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:177
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:68
uinT16 NumProtos
Definition: intproto.h:108
struct PROTO_SET_STRUCT * PROTO_SET
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
unsigned short uinT16
Definition: host.h:101
short inT16
Definition: host.h:100
unsigned char uinT8
Definition: host.h:99
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:114
void tesseract::Classify::ReadNewCutoffs ( FILE *  CutoffFile,
bool  swap,
inT64  end_offset,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Open Filename, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
CutoffFilename of file containing cutoff definitions
Cutoffsarray to put cutoffs into
swap
end_offset
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 20 09:38:26 1991, DSJ, Created.

Definition at line 52 of file cutoffs.cpp.

53  {
54  char Class[UNICHAR_LEN + 1];
55  CLASS_ID ClassId;
56  int Cutoff;
57  int i;
58 
59  if (shape_table_ != NULL) {
60  if (!shapetable_cutoffs_.DeSerialize(swap, CutoffFile)) {
61  tprintf("Error during read of shapetable pffmtable!\n");
62  }
63  }
64  for (i = 0; i < MAX_NUM_CLASSES; i++)
65  Cutoffs[i] = MAX_CUTOFF;
66 
67  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
68  tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
69  Class, &Cutoff) == 2) {
70  if (strcmp(Class, "NULL") == 0) {
71  ClassId = unicharset.unichar_to_id(" ");
72  } else {
73  ClassId = unicharset.unichar_to_id(Class);
74  }
75  Cutoffs[ClassId] = Cutoff;
76  SkipNewline(CutoffFile);
77  }
78 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:229
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define REALLY_QUOTE_IT(x)
Definition: cutoffs.cpp:33
ShapeTable * shape_table_
Definition: classify.h:512
bool DeSerialize(bool swap, FILE *fp)
void SkipNewline(FILE *file)
Definition: helpers.h:84
#define MAX_CUTOFF
Definition: cutoffs.cpp:35
#define NULL
Definition: host.h:144
#define UNICHAR_LEN
Definition: unichar.h:30
NORM_PROTOS * tesseract::Classify::ReadNormProtos ( FILE *  File,
inT64  end_offset 
)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
Fileopen text file to read normalization protos from
end_offsetGlobals: none
Returns
Character normalization protos.
Note
Exceptions: none
History: Wed Dec 19 16:38:49 1990, DSJ, Created.

Definition at line 245 of file normmatch.cpp.

245  {
247  int i;
248  char unichar[2 * UNICHAR_LEN + 1];
249  UNICHAR_ID unichar_id;
250  LIST Protos;
251  int NumProtos;
252 
253  /* allocate and initialization data structure */
254  NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS));
255  NormProtos->NumProtos = unicharset.size();
256  NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST));
257  for (i = 0; i < NormProtos->NumProtos; i++)
258  NormProtos->Protos[i] = NIL_LIST;
259 
260  /* read file header and save in data structure */
261  NormProtos->NumParams = ReadSampleSize (File);
262  NormProtos->ParamDesc = ReadParamDesc (File, NormProtos->NumParams);
263 
264  /* read protos for each class into a separate list */
265  while ((end_offset < 0 || ftell(File) < end_offset) &&
266  tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
267  if (unicharset.contains_unichar(unichar)) {
268  unichar_id = unicharset.unichar_to_id(unichar);
269  Protos = NormProtos->Protos[unichar_id];
270  for (i = 0; i < NumProtos; i++)
271  Protos =
272  push_last (Protos, ReadPrototype (File, NormProtos->NumParams));
273  NormProtos->Protos[unichar_id] = Protos;
274  } else {
275  cprintf("Error: unichar %s in normproto file is not in unichar set.\n",
276  unichar);
277  for (i = 0; i < NumProtos; i++)
278  FreePrototype(ReadPrototype (File, NormProtos->NumParams));
279  }
280  SkipNewline(File);
281  }
282  return (NormProtos);
283 } /* ReadNormProtos */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
#define NIL_LIST
Definition: oldlist.h:126
UNICHARSET unicharset
Definition: ccutil.h:72
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:229
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:43
LIST * Protos
Definition: normmatch.cpp:42
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:338
void * Emalloc(int Size)
Definition: emalloc.cpp:47
void SkipNewline(FILE *file)
Definition: helpers.h:84
int UNICHAR_ID
Definition: unichar.h:33
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
void FreePrototype(void *arg)
Definition: cluster.cpp:586
#define UNICHAR_LEN
Definition: unichar.h:30
int size() const
Definition: unicharset.h:297
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:66
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:113
NORM_PROTOS * NormProtos
Definition: classify.h:486
void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 220 of file adaptmatch.cpp.

221  {
222  #ifndef GRAPHICS_DISABLED
223  const int kSampleSpaceWidth = 500;
224  if (*win == NULL) {
225  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226  kSampleSpaceWidth * 2, 200, true);
227  }
228  (*win)->Clear();
229  (*win)->Pen(64, 64, 64);
230  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
231  kSampleSpaceWidth, kBlnBaselineOffset);
232  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
233  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
234  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
235  wbox.right(), wbox.bottom());
236  #endif // GRAPHICS_DISABLED
237 }
const int kBlnXHeight
Definition: normalis.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
const int kBlnBaselineOffset
Definition: normalis.h:29
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"
Note
Exceptions: none
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2099 of file adaptmatch.cpp.

2099  {
2100  int Next, NextGood;
2101  FLOAT32 BadMatchThreshold;
2102  static const char* romans = "i v x I V X";
2103  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2104 
2106  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2107  unicharset.unichar_to_id("1") : -1;
2108  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2109  unicharset.unichar_to_id("0") : -1;
2110  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2111  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2112 
2113  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2114  const UnicharRating& match = Results->match[Next];
2115  if (match.rating >= BadMatchThreshold) {
2116  if (!unicharset.get_isalpha(match.unichar_id) ||
2117  strstr(romans,
2118  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2119  } else if (unicharset.eq(match.unichar_id, "l") &&
2120  scored_one < BadMatchThreshold) {
2121  Results->match[Next].unichar_id = unichar_id_one;
2122  } else if (unicharset.eq(match.unichar_id, "O") &&
2123  scored_zero < BadMatchThreshold) {
2124  Results->match[Next].unichar_id = unichar_id_zero;
2125  } else {
2126  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2127  }
2128  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2129  if (NextGood == Next) {
2130  ++NextGood;
2131  } else {
2132  Results->match[NextGood++] = Results->match[Next];
2133  }
2134  }
2135  }
2136  }
2137  } else {
2138  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2139  if (Results->match[Next].rating >= BadMatchThreshold) {
2140  if (NextGood == Next) {
2141  ++NextGood;
2142  } else {
2143  Results->match[NextGood++] = Results->match[Next];
2144  }
2145  }
2146  }
2147  }
2148  Results->match.truncate(NextGood);
2149 } /* RemoveBadMatches */
int size() const
Definition: genericvector.h:72
void truncate(int size)
bool classify_bln_numeric_mode
Definition: classify.h:500
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
float FLOAT32
Definition: host.h:111
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
int UNICHAR_ID
Definition: unichar.h:33
double matcher_bad_match_pad
Definition: classify.h:423
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
#define NULL
Definition: host.h:144
void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered
Note
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2161 of file adaptmatch.cpp.

2161  {
2162  int Next, NextGood;
2163  int punc_count; /*no of garbage characters */
2164  int digit_count;
2165  /*garbage characters */
2166  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2167  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2168 
2169  punc_count = 0;
2170  digit_count = 0;
2171  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2172  const UnicharRating& match = Results->match[Next];
2173  bool keep = true;
2174  if (strstr(punc_chars,
2175  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2176  if (punc_count >= 2)
2177  keep = false;
2178  punc_count++;
2179  } else {
2180  if (strstr(digit_chars,
2181  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2182  if (digit_count >= 1)
2183  keep = false;
2184  digit_count++;
2185  }
2186  }
2187  if (keep) {
2188  if (NextGood == Next) {
2189  ++NextGood;
2190  } else {
2191  Results->match[NextGood++] = match;
2192  }
2193  }
2194  }
2195  Results->match.truncate(NextGood);
2196 } /* RemoveExtraPuncs */
int size() const
Definition: genericvector.h:72
void truncate(int size)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define NULL
Definition: host.h:144
void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 613 of file adaptmatch.cpp.

613  {
615  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
616  NumAdaptationsFailed);
617  }
623  NumAdaptationsFailed = 0;
624 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::SetAdaptiveThreshold ( FLOAT32  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating
Note
Exceptions: none
History: Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 2212 of file adaptmatch.cpp.

2212  {
2213  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2215  ClipToRange<int>(255 * Threshold, 0, 255));
2217  ClipToRange<int>(255 * Threshold, 0, 255));
2218 } /* SetAdaptiveThreshold */
double matcher_good_threshold
Definition: classify.h:420
int classify_adapt_proto_threshold
Definition: classify.h:445
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 204 of file classify.cpp.

204  {
205  delete static_classifier_;
206  static_classifier_ = static_classifier;
207 }
void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 670 of file adaptmatch.cpp.

670  {
672 
674 
675 } /* SettupPass1 */
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:370
Dict & getDict()
Definition: classify.h:65
bool classify_enable_learning
Definition: classify.h:389
void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 690 of file adaptmatch.cpp.

690  {
693 
694 } /* SettupPass2 */
Dict & getDict()
Definition: classify.h:65
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:374
#define FALSE
Definition: capi.h:29
void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 133 of file intfx.cpp.

135  {
136  // Compute 1st and 2nd moments of the original outline.
137  FCOORD center, second_moments;
138  int length = blob.ComputeMoments(&center, &second_moments);
139  if (fx_info != NULL) {
140  fx_info->Length = length;
141  fx_info->Rx = IntCastRounded(second_moments.y());
142  fx_info->Ry = IntCastRounded(second_moments.x());
143 
144  fx_info->Xmean = IntCastRounded(center.x());
145  fx_info->Ymean = IntCastRounded(center.y());
146  }
147  // Setup the denorm for Baseline normalization.
148  bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f,
149  1.0f, 1.0f, 128.0f, 128.0f);
150  // Setup the denorm for character normalization.
151  if (nonlinear_norm) {
154  TBOX box;
155  blob.GetPreciseBoundingBox(&box);
156  box.pad(1, 1);
157  blob.GetEdgeCoords(box, &x_coords, &y_coords);
158  cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8,
159  0.0f, 0.0f, x_coords, y_coords);
160  } else {
161  cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(),
162  center.x(), center.y(),
163  51.2f / second_moments.x(),
164  51.2f / second_moments.y(),
165  128.0f, 128.0f);
166  }
167 }
float x() const
Definition: points.h:209
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:535
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
void pad(int xpad, int ypad)
Definition: rect.h:127
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:554
#define MAX_UINT8
Definition: host.h:121
const DENORM & denorm() const
Definition: blobs.h:340
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:570
int IntCastRounded(double x)
Definition: helpers.h:172
Definition: rect.h:30
float y() const
Definition: points.h:212
#define NULL
Definition: host.h:144
Definition: points.h:189
const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 69 of file classify.h.

69  {
70  return shape_table_;
71  }
ShapeTable * shape_table_
Definition: classify.h:512
int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2296 of file adaptmatch.cpp.

2296  {
2297  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2298  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2299  ASSERT_HOST(font_set_id >= 0);
2300  const FontSet &fs = fontset_table_.get(font_set_id);
2301  for (int config = 0; config < fs.size; ++config) {
2302  if (fs.configs[config] == shape_id)
2303  return id;
2304  }
2305  }
2306  tprintf("Shape %d not found\n", shape_id);
2307  return -1;
2308 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
#define tprintf(...)
Definition: tprintf.h:31
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define ASSERT_HOST(x)
Definition: errcode.h:84
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.
Note
Exceptions: none
History: Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 2233 of file adaptmatch.cpp.

2235  {
2236 #ifndef GRAPHICS_DISABLED
2237  uinT32 config_mask;
2238  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2239  tprintf("No built-in templates for class/shape %d\n", shape_id);
2240  return;
2241  }
2242  if (num_features <= 0) {
2243  tprintf("Illegal blob (char norm features)!\n");
2244  return;
2245  }
2246  UnicharRating cn_result;
2247  classify_norm_method.set_value(character);
2250  num_features, features, &cn_result,
2253  tprintf("\n");
2254  config_mask = 1 << cn_result.config;
2255 
2256  tprintf("Static Shape ID: %d\n", shape_id);
2257  ShowMatchDisplay();
2259  AllProtosOn, reinterpret_cast<BIT_VECTOR>(&config_mask),
2260  num_features, features, &cn_result,
2265 #endif // GRAPHICS_DISABLED
2266 } /* ShowBestMatchFor */
bool matcher_debug_separate_windows
Definition: classify.h:458
#define tprintf(...)
Definition: tprintf.h:31
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
BIT_VECTOR AllProtosOn
Definition: classify.h:480
unsigned int uinT32
Definition: host.h:103
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:472
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void UpdateMatchDisplay()
Definition: intproto.cpp:473
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define ClassForClassId(T, c)
Definition: intproto.h:181
IntegerMatcher im_
Definition: classify.h:503
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches
    Returns
    none
    Note
    Exceptions: none
    History: Thu Mar 21 15:47:33 1991, DSJ, Created.

Definition at line 1079 of file intproto.cpp.

1079  {
1081  if (ProtoDisplayWindow) {
1083  }
1084  if (FeatureDisplayWindow) {
1086  }
1088  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
1089  IntMatchWindow);
1091  INT_MAX_X, INT_MAX_Y);
1092  if (ProtoDisplayWindow) {
1094  INT_MAX_X, INT_MAX_Y);
1095  }
1096  if (FeatureDisplayWindow) {
1098  INT_MAX_X, INT_MAX_Y);
1099  }
1100 } /* ShowMatchDisplay */
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:1104
#define INT_MIN_Y
Definition: intproto.cpp:66
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1895
ScrollView * ProtoDisplayWindow
Definition: intproto.cpp:183
#define INT_MAX_X
Definition: intproto.cpp:67
void Clear()
Definition: scrollview.cpp:595
ScrollView * IntMatchWindow
Definition: intproto.cpp:181
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:765
#define INT_MAX_Y
Definition: intproto.cpp:68
#define INT_MIN_X
Definition: intproto.cpp:65
ScrollView * FeatureDisplayWindow
Definition: intproto.cpp:182
void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 644 of file adaptmatch.cpp.

644  {
648 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
#define NULL
Definition: host.h:144
void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 628 of file adaptmatch.cpp.

628  {
629  if (BackupAdaptedTemplates == NULL) {
631  return;
632  }
634  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
635  NumAdaptationsFailed);
636  }
640  NumAdaptationsFailed = 0;
641 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:613
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2312 of file adaptmatch.cpp.

2313  {
2314  if (classify_learning_debug_level >= 1) {
2315  tprintf("NumTimesSeen for config of %s is %d\n",
2316  getDict().getUnicharset().debug_str(class_id).string(),
2317  config->NumTimesSeen);
2318  }
2320  return true;
2321  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2322  return false;
2323  } else if (use_ambigs_for_adaption) {
2324  // Go through the ambigs vector and see whether we have already seen
2325  // enough times all the characters represented by the ambigs vector.
2326  const UnicharIdVector *ambigs =
2328  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2329  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2330  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2331  assert(ambig_class != NULL);
2332  if (ambig_class->NumPermConfigs == 0 &&
2333  ambig_class->MaxNumTimesSeen <
2335  if (classify_learning_debug_level >= 1) {
2336  tprintf("Ambig %s has not been seen enough times,"
2337  " not making config for %s permanent\n",
2338  getDict().getUnicharset().debug_str(
2339  (*ambigs)[ambig]).string(),
2340  getDict().getUnicharset().debug_str(class_id).string());
2341  }
2342  return false;
2343  }
2344  }
2345  }
2346  return true;
2347 }
int size() const
Definition: genericvector.h:72
#define tprintf(...)
Definition: tprintf.h:31
int matcher_min_examples_for_prototyping
Definition: classify.h:428
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:102
Dict & getDict()
Definition: classify.h:65
uinT8 NumTimesSeen
Definition: adaptive.h:41
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:191
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
#define NULL
Definition: host.h:144
bool use_ambigs_for_adaption
Definition: ccutil.h:93
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2349 of file adaptmatch.cpp.

2349  {
2350  const UnicharIdVector *ambigs =
2352  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2353  if (classify_learning_debug_level >= 1) {
2354  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2355  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2356  }
2357  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2358  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2359  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2360  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2361  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2362  const TEMP_CONFIG config =
2363  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2364  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2365  if (classify_learning_debug_level >= 1) {
2366  tprintf("Making config %d of %s permanent\n", cfg,
2367  getDict().getUnicharset().debug_str(
2368  ambig_class_id).string());
2369  }
2370  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2371  }
2372  }
2373  }
2374 }
int size() const
Definition: genericvector.h:72
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define tprintf(...)
Definition: tprintf.h:31
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:102
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
Dict & getDict()
Definition: classify.h:65
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:200
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:07:32 1991, DSJ, Created.

Definition at line 505 of file adaptive.cpp.

505  {
506  int i;
507 
508  /* first write the high level adaptive template struct */
509  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
510 
511  /* then write out the basic integer templates */
512  WriteIntTemplates (File, Templates->Templates, unicharset);
513 
514  /* then write out the adaptive info for each class */
515  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
516  WriteAdaptedClass (File, Templates->Class[i],
517  Templates->Templates->Class[i]->NumConfigs);
518  }
519 } /* WriteAdaptedTemplates */
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
UNICHARSET unicharset
Definition: ccutil.h:72
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:459
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 NumConfigs
Definition: intproto.h:110
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1138
void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 1138 of file intproto.cpp.

1139  {
1140  int i, j;
1141  INT_CLASS Class;
1142  int unicharset_size = target_unicharset.size();
1143  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1144 
1145  if (Templates->NumClasses != unicharset_size) {
1146  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1147  " Templates, while target_unicharset size is %d\n",
1148  Templates->NumClasses, unicharset_size);
1149  }
1150 
1151  /* first write the high level template struct */
1152  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1153  fwrite(&version_id, sizeof(version_id), 1, File);
1154  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1155  1, File);
1156  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1157 
1158  /* then write out the class pruners */
1159  for (i = 0; i < Templates->NumClassPruners; i++)
1160  fwrite(Templates->ClassPruners[i],
1161  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1162 
1163  /* then write out each class */
1164  for (i = 0; i < Templates->NumClasses; i++) {
1165  Class = Templates->Class[i];
1166 
1167  /* first write out the high level struct for the class */
1168  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1169  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1170  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1171  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1172  for (j = 0; j < Class->NumConfigs; ++j) {
1173  fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File);
1174  }
1175 
1176  /* then write out the proto lengths */
1177  if (MaxNumIntProtosIn (Class) > 0) {
1178  fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8),
1179  MaxNumIntProtosIn (Class), File);
1180  }
1181 
1182  /* then write out the proto sets */
1183  for (j = 0; j < Class->NumProtoSets; j++)
1184  fwrite ((char *) Class->ProtoSets[j],
1185  sizeof (PROTO_SET_STRUCT), 1, File);
1186 
1187  /* then write the fonts info */
1188  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1189  }
1190 
1191  /* Write the fonts info tables */
1193  this->fontinfo_table_.write(File,
1196 } /* WriteIntTemplates */
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:168
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
#define ASSERT_HOST(x)
Definition: errcode.h:84
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:211
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
uinT8 NumProtoSets
Definition: intproto.h:109
uinT8 NumConfigs
Definition: intproto.h:110
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
uinT8 * ProtoLengths
Definition: intproto.h:112
int size() const
Definition: unicharset.h:297
uinT16 NumProtos
Definition: intproto.h:108
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:253
unsigned short uinT16
Definition: host.h:101
unsigned char uinT8
Definition: host.h:99
bool tesseract::Classify::WriteTRFile ( const STRING filename)

Definition at line 97 of file blobclass.cpp.

97  {
98  STRING tr_filename = filename + ".tr";
99  FILE* fp = Efopen(tr_filename.string(), "wb");
100  int len = tr_file_data_.length();
101  bool result =
102  fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
103  fclose(fp);
104  tr_file_data_.truncate_at(0);
105  return result;
106 }
FILE * Efopen(const char *Name, const char *Mode)
Definition: efio.cpp:43
inT32 length() const
Definition: strngs.cpp:188
void truncate_at(inT32 index)
Definition: strngs.cpp:264
Definition: strngs.h:44
const char * string() const
Definition: strngs.cpp:193

Member Data Documentation

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

Definition at line 473 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOff

Definition at line 482 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOn

Definition at line 481 of file classify.h.

bool tesseract::Classify::allow_blob_division = true

"Use divisible blobs chopping"

Definition at line 382 of file classify.h.

BIT_VECTOR tesseract::Classify::AllProtosOn

Definition at line 480 of file classify.h.

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates

Definition at line 477 of file classify.h.

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 437 of file classify.h.

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 447 of file classify.h.

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 445 of file classify.h.

double tesseract::Classify::classify_adapted_pruning_factor = 2.5

"Prune poor adapted results this much worse than best result"

Definition at line 441 of file classify.h.

double tesseract::Classify::classify_adapted_pruning_threshold = -1.0

"Threshold at which classify_adapted_pruning_factor starts"

Definition at line 443 of file classify.h.

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 500 of file classify.h.

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 396 of file classify.h.

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 453 of file classify.h.

int tesseract::Classify::classify_class_pruner_multiplier = 15

"Class Pruner Multiplier 0-255: "

Definition at line 465 of file classify.h.

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 463 of file classify.h.

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 467 of file classify.h.

bool tesseract::Classify::classify_debug_character_fragments = FALSE

"Bring up graphical debugging windows for fragments training"

Definition at line 455 of file classify.h.

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 390 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 414 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 409 of file classify.h.

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 389 of file classify.h.

int tesseract::Classify::classify_integer_matcher_multiplier = 10

"Integer Matcher Multiplier 0-255: "

Definition at line 469 of file classify.h.

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 459 of file classify.h.

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 419 of file classify.h.

double tesseract::Classify::classify_max_certainty_margin = 5.5

"Veto difference between classifier certainties"

Definition at line 404 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_x = 0.325

"Max char x-norm scale ..."

Definition at line 398 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_y = 0.325

"Max char y-norm scale ..."

Definition at line 400 of file classify.h.

double tesseract::Classify::classify_max_rating_ratio = 1.5

"Veto ratio between classifier ratings"

Definition at line 402 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_x = 0.0

"Min char x-norm scale ..."

Definition at line 397 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_y = 0.0

"Min char y-norm scale ..."

Definition at line 399 of file classify.h.

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 435 of file classify.h.

bool tesseract::Classify::classify_nonlinear_norm = 0

"Non-linear stroke-density normalization"

Definition at line 416 of file classify.h.

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 394 of file classify.h.

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 413 of file classify.h.

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 411 of file classify.h.

bool tesseract::Classify::disable_character_fragments = TRUE

"Do not include character fragments in the" " results of the classifier"

Definition at line 450 of file classify.h.

bool tesseract::Classify::EnableLearning

Definition at line 484 of file classify.h.

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 507 of file classify.h.

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 488 of file classify.h.

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 496 of file classify.h.

int tesseract::Classify::il1_adaption_test = 0

"Dont adapt to i/I at beginning of word"

Definition at line 498 of file classify.h.

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 503 of file classify.h.

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 425 of file classify.h.

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 423 of file classify.h.

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 432 of file classify.h.

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 418 of file classify.h.

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 417 of file classify.h.

bool tesseract::Classify::matcher_debug_separate_windows = FALSE

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 458 of file classify.h.

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 420 of file classify.h.

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 428 of file classify.h.

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 422 of file classify.h.

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 426 of file classify.h.

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 424 of file classify.h.

double tesseract::Classify::matcher_reliable_adaptive_result = 0.0

"Great Match (0-1)"

Definition at line 421 of file classify.h.

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 430 of file classify.h.

NORM_PROTOS* tesseract::Classify::NormProtos

Definition at line 486 of file classify.h.

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

Definition at line 469 of file classify.h.

bool tesseract::Classify::prioritize_division = FALSE

"Prioritize blob division over chopping"

Definition at line 387 of file classify.h.

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 436 of file classify.h.

ShapeTable* tesseract::Classify::shape_table_
protected

Definition at line 512 of file classify.h.

double tesseract::Classify::speckle_large_max_size = 0.30

"Max large speckle size"

Definition at line 501 of file classify.h.

double tesseract::Classify::speckle_rating_penalty = 10.0

"Penalty to add to worst rating for noise"

Definition at line 503 of file classify.h.

BIT_VECTOR tesseract::Classify::TempProtoMask

Definition at line 483 of file classify.h.

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 408 of file classify.h.

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 407 of file classify.h.

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 439 of file classify.h.

int tesseract::Classify::tessedit_single_match = FALSE

"Top choice only from CP"

Definition at line 388 of file classify.h.


The documentation for this class was generated from the following files: