19#ifndef TESSERACT_CLASSIFY_CLASSIFY_H_
20#define TESSERACT_CLASSIFY_CLASSIFY_H_
24# include "config_auto.h"
27#ifdef DISABLED_LEGACY_ENGINE
34class Classify :
public CCStruct {
77static const int kUnknownFontinfoId = -1;
78static const int kBlankFontinfoId = -2;
112 void AddLargeSpeckleTo(
int blob_length, BLOB_CHOICE_LIST *choices);
115 bool LargeSpeckle(
const TBLOB &blob);
135 const uint16_t *expected_num_features, std::vector<CP_RESULT_STRUCT> *results);
136 void ReadNewCutoffs(
TFile *fp, uint16_t *Cutoffs);
142 void FreeNormProtos();
154 void LearnWord(
const char *fontname,
WERD_RES *word);
165 void LearnPieces(
const char *fontname,
int start,
int length,
float threshold,
170 void AmbigClassifier(
const std::vector<INT_FEATURE_STRUCT> &int_features,
177 const std::vector<CP_RESULT_STRUCT> &results,
ADAPT_RESULTS *final_results);
183 void ExpandShapesAndApplyCorrections(
ADAPT_CLASS_STRUCT **classes,
bool debug,
int class_id,
int bottom,
184 int top,
float cp_rating,
int blob_length,
185 int matcher_multiplier,
const uint8_t *cn_factors,
190 double ComputeCorrectedRating(
bool debug,
int unichar_id,
double cp_rating,
double im_rating,
191 int feature_misses,
int bottom,
int top,
int blob_length,
192 int matcher_multiplier,
const uint8_t *cn_factors);
194 BLOB_CHOICE_LIST *Choices);
198# ifndef GRAPHICS_DISABLED
207 void PrintAdaptiveMatchResults(
const ADAPT_RESULTS &results);
211 void ShowBestMatchFor(
int shape_id,
const INT_FEATURE_STRUCT *features,
int num_features);
215 int config_id)
const;
226 int ClassAndConfigIDToFontOrShapeID(
int class_id,
int int_result_config)
const;
229 int ShapeIDToClassID(
int shape_id)
const;
230 UNICHAR_ID *BaselineClassifier(
TBLOB *Blob,
const std::vector<INT_FEATURE_STRUCT> &int_features,
237 int CharNormTrainingSample(
bool pruner_only,
int keep_this,
const TrainingSample &sample,
238 std::vector<UnicharRating> *results);
241 void AdaptToChar(
TBLOB *Blob,
CLASS_ID ClassId,
int FontinfoId,
float Threshold,
245 void EndAdaptiveClassifier();
248 void AdaptiveClassifier(
TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
250 void ResetAdaptiveClassifierInternal();
251 void SwitchAdaptiveClassifier();
252 void StartBackupAdaptiveClassifier();
255 uint8_t *pruner_norm_array, uint8_t *char_norm_array);
260 uint8_t *char_norm_array, uint8_t *pruner_array);
266 return NumAdaptationsFailed > 0;
269 return AdaptedTemplates->NumPermClasses == 0;
271 bool LooksLikeGarbage(
TBLOB *blob);
272#ifndef GRAPHICS_DISABLED
273 void RefreshDebugWindow(
ScrollView **win,
const char *msg,
int y_offset,
const TBOX &wbox);
298 static void SetupBLCNDenorms(
const TBLOB &blob,
bool nonlinear_norm,
DENORM *bl_denorm,
309 static void ExtractFeatures(
const TBLOB &blob,
bool nonlinear_norm,
310 std::vector<INT_FEATURE_STRUCT> *bl_features,
311 std::vector<INT_FEATURE_STRUCT> *cn_features,
314 void ClearCharNormArray(uint8_t *char_norm_array);
315 void ComputeIntCharNormArray(
const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array);
320 CLASS_ID GetClassToDebug(
const char *Prompt,
bool *adaptive_on,
bool *pretrained_on,
325 return fontinfo_table_;
328 return fontinfo_table_;
331 return fontset_table_;
334 void NormalizeOutlines(
LIST Outlines,
float *XScale,
float *YScale);
348 void LearnBlob(
const std::string &fontname,
TBLOB *Blob,
const DENORM &cn_denorm,
352 bool WriteTRFile(
const char *filename);
403 double_VAR_H(classify_character_fragments_garbage_certainty_threshold);
456#ifndef GRAPHICS_DISABLED
458 ScrollView *learn_fragmented_word_debug_win_ =
nullptr;
459 ScrollView *learn_fragments_debug_win_ =
nullptr;
463 std::string tr_file_data_;
467 std::vector<uint16_t> shapetable_cutoffs_;
470 int NumAdaptationsFailed = 0;
484 bool EnableLearning =
true;
#define classify_enable_adaptive_matcher
void SetAdaptiveThreshold(float Threshold)
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
INT_VAR_H(classify_learning_debug_level)
BOOL_VAR_H(classify_use_pre_adapted_templates)
bool AdaptiveClassifierIsEmpty() const
INT_VAR_H(classify_class_pruner_multiplier)
BOOL_VAR_H(disable_character_fragments)
BOOL_VAR_H(prioritize_division)
double_VAR_H(classify_max_certainty_margin)
double_VAR_H(classify_adapted_pruning_factor)
const ShapeTable * shape_table() const
INT_VAR_H(classify_norm_method)
double_VAR_H(classify_character_fragments_garbage_certainty_threshold)
INT_VAR_H(classify_class_pruner_threshold)
double_VAR_H(matcher_good_threshold)
double_VAR_H(speckle_rating_penalty)
UnicityTable< FontSet > & get_fontset_table()
double_VAR_H(speckle_large_max_size)
double_VAR_H(matcher_avg_noise_size)
STRING_VAR_H(classify_learn_debug_str)
BOOL_VAR_H(classify_enable_adaptive_matcher)
BOOL_VAR_H(tess_bn_matching)
double_VAR_H(matcher_perfect_threshold)
INT_VAR_H(classify_debug_level)
INT_VAR_H(classify_integer_matcher_multiplier)
FEATURE_DEFS_STRUCT feature_defs_
double_VAR_H(classify_char_norm_range)
const UnicityTable< FontInfo > & get_fontinfo_table() const
double_VAR_H(classify_adapted_pruning_threshold)
UnicityTable< FontSet > fontset_table_
double_VAR_H(classify_max_rating_ratio)
BOOL_VAR_H(classify_nonlinear_norm)
INT_VAR_H(matcher_min_examples_for_prototyping)
BOOL_VAR_H(classify_bln_numeric_mode)
INT_VAR_H(classify_adapt_feature_threshold)
double_VAR_H(rating_scale)
double_VAR_H(matcher_clustering_max_angle_delta)
BOOL_VAR_H(classify_enable_adaptive_debugger)
INT_VAR_H(matcher_debug_level)
BOOL_VAR_H(matcher_debug_separate_windows)
bool AdaptiveClassifierIsFull() const
INT_VAR_H(matcher_permanent_classes_min)
INT_VAR_H(classify_adapt_proto_threshold)
BOOL_VAR_H(tess_cn_matching)
INT_VAR_H(classify_cp_cutoff_strength)
double_VAR_H(matcher_reliable_adaptive_result)
double_VAR_H(matcher_rating_margin)
INT_VAR_H(matcher_sufficient_examples_for_prototyping)
INT_VAR_H(matcher_debug_flags)
BOOL_VAR_H(classify_debug_character_fragments)
BOOL_VAR_H(classify_enable_learning)
double_VAR_H(tessedit_class_miss_scale)
double_VAR_H(classify_misfit_junk_penalty)
UnicityTable< FontInfo > & get_fontinfo_table()
UnicityTable< FontInfo > fontinfo_table_
BOOL_VAR_H(classify_save_adapted_templates)
BOOL_VAR_H(allow_blob_division)
double_VAR_H(matcher_bad_match_pad)