22# include "config_auto.h"
78#define classify_enable_adaptive_matcher true
80#define ADAPT_TEMPLATE_SUFFIX ".a"
83#define UNLIKELY_NUM_FEAT 200
85#define MAX_ADAPTABLE_WERD_SIZE 40
87#define ADAPTABLE_WERD_ADJUSTMENT (0.05)
89#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
91#define WORST_POSSIBLE_RATING (0.0f)
99 std::vector<UnicharRating>
match;
114 for (
unsigned i = 0;
i <
match.size(); ++
i) {
143 return (1.0f - confidence) > matcher_great_threshold;
151static unsigned FindScoredUnichar(
UNICHAR_ID id,
const ADAPT_RESULTS &results) {
152 for (
unsigned i = 0;
i < results.match.size();
i++) {
153 if (results.match[
i].unichar_id ==
id) {
157 return results.match.size();
162static float ScoredUnichar(
UNICHAR_ID id,
const ADAPT_RESULTS &results) {
163 unsigned index = FindScoredUnichar(
id, results);
164 if (index >= results.match.size()) {
167 return results.match[index].rating;
203 assert(Choices !=
nullptr);
212 std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
214 Results->ComputeBest();
222 if (matcher_debug_level >= 1) {
227#ifndef GRAPHICS_DISABLED
228 if (classify_enable_adaptive_debugger) {
236#ifndef GRAPHICS_DISABLED
242 const int kSampleSpaceWidth = 500;
243 if (*win ==
nullptr) {
244 *win =
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200, kSampleSpaceWidth * 2,
248 (*win)->Pen(64, 64, 64);
268 float *thresholds =
nullptr;
269 if (fontname ==
nullptr) {
275 if (classify_learning_debug_level >= 1) {
278 thresholds =
new float[word_len];
280 matcher_good_threshold, matcher_rating_margin, thresholds);
284#ifndef GRAPHICS_DISABLED
285 if (classify_debug_character_fragments) {
286 if (learn_fragmented_word_debug_win_ !=
nullptr) {
287 learn_fragmented_word_debug_win_->
Wait();
298 for (
int ch = 0;
ch < word_len; ++
ch) {
299 if (classify_debug_character_fragments) {
303 float threshold = thresholds !=
nullptr ? thresholds[
ch] : 0.0f;
308 if (word->
best_state[
ch] > 1 && !disable_character_fragments) {
312 bool garbage =
false;
316 if (classify_character_fragments_garbage_certainty_threshold < 0) {
323 if (pieces_all_natural || !prioritize_division) {
330 std::string full_string;
331 for (
unsigned i = 0;
i < tokens.size();
i++) {
332 full_string += tokens[
i];
333 if (
i != tokens.size() - 1) {
338 full_string.c_str(), word);
400 if (rotated_blob ==
nullptr) {
404#ifndef GRAPHICS_DISABLED
406 if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) {
409 learn_debug_win_->
Update();
410 learn_debug_win_->
Wait();
412 if (classify_debug_character_fragments && segmentation ==
CST_FRAGMENT) {
413 ASSERT_HOST(learn_fragments_debug_win_ !=
nullptr);
415 learn_fragments_debug_win_->
Update();
419 if (fontname !=
nullptr) {
420 classify_norm_method.set_value(
character);
421 tess_bn_matching.set_value(
false);
422 tess_cn_matching.set_value(
false);
423 DENORM bl_denorm, cn_denorm;
425 SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, &bl_denorm, &cn_denorm, &fx_info);
426 LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
430 if (classify_learning_debug_level >= 1) {
442 }
else if (classify_debug_level >= 1) {
443 tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
445 if (rotated_blob != blob) {
465 std::string Filename;
469 classify_save_adapted_templates) {
471 File = fopen(Filename.c_str(),
"wb");
472 if (
File ==
nullptr) {
473 tprintf(
"Unable to save adapted templates to %s!\n", Filename.c_str());
475 tprintf(
"\nSaving adapted templates to %s ...", Filename.c_str());
506 delete static_classifier_;
507 static_classifier_ =
nullptr;
545 tprintf(
"Error loading shape table!\n");
569 for (uint16_t &BaselineCutoff : BaselineCutoffs) {
573 if (classify_use_pre_adapted_templates) {
577 if (!fp.
Open(Filename.c_str(),
nullptr)) {
580 tprintf(
"\nReading pre-adapted templates from %s ...\n", Filename.c_str());
587 BaselineCutoffs[
i] = CharNormCutoffs[
i];
597 if (classify_learning_debug_level > 0) {
598 tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n", NumAdaptationsFailed);
604 NumAdaptationsFailed = 0;
614 if (classify_learning_debug_level > 0) {
615 tprintf(
"Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
616 NumAdaptationsFailed);
621 NumAdaptationsFailed = 0;
696 classify_norm_method.set_value(
baseline);
709 BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
714 for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
720 Proto = &(TempProto->Proto);
731 TempProto->ProtoId = Pid;
744 if (classify_learning_debug_level >= 1) {
745 tprintf(
"Added new class '%s' with class id %d and %d protos.\n",
747#ifndef GRAPHICS_DISABLED
748 if (classify_learning_debug_level > 1) {
783 classify_norm_method.set_value(
baseline);
793 *FloatFeatures = Features;
859 Class = adaptive_templates->
Class[ClassId];
860 assert(Class !=
nullptr);
867 if (NumFeatures <= 0) {
873 for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
875 SET_BIT(MatchingFontConfigs, cfg);
881 classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows);
882 FreeBitVector(MatchingFontConfigs);
886 if (1.0f - int_result.
rating <= Threshold) {
888 if (classify_learning_debug_level >= 1) {
889 tprintf(
"Found good match to perm config %d = %4.1f%%.\n", int_result.
config,
890 int_result.
rating * 100.0);
892 delete FloatFeatures;
901 if (classify_learning_debug_level >= 1) {
902 tprintf(
"Increasing reliability of temp config %d to %d.\n", int_result.
config,
911 if (classify_learning_debug_level >= 1) {
912 tprintf(
"Found poor match to temp config %d = %4.1f%%.\n", int_result.
config,
913 int_result.
rating * 100.0);
914#ifndef GRAPHICS_DISABLED
915 if (classify_learning_debug_level > 2) {
921 IntFeatures, FloatFeatures);
922 if (NewTempConfigId >= 0 &&
924 MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
928#ifndef GRAPHICS_DISABLED
929 if (classify_learning_debug_level > 1) {
934 delete FloatFeatures;
938#ifndef GRAPHICS_DISABLED
942 std::vector<INT_FEATURE_STRUCT> bl_features;
945 if (sample ==
nullptr) {
951 classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows);
952 tprintf(
"Best match to temp config %d = %4.1f%%.\n", int_result.
config,
953 int_result.
rating * 100.0);
954 if (classify_learning_debug_level >= 2) {
956 ConfigMask = 1 << int_result.
config;
959 &bl_features[0], &int_result, classify_adapt_feature_threshold, 6 | 0x19,
960 matcher_debug_separate_windows);
987 auto old_match = FindScoredUnichar(new_result.
unichar_id, *results);
989 if (new_result.
rating + matcher_bad_match_pad < results->best_rating ||
990 (old_match < results->match.size() &&
991 new_result.
rating <= results->
match[old_match].rating)) {
999 if (old_match < results->match.size()) {
1000 results->
match[old_match].rating = new_result.
rating;
1002 results->
match.push_back(new_result);
1041 if (int_features.empty()) {
1048 bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1055 while (*ambiguities >= 0) {
1060 &int_features[0], &int_result, classify_adapt_feature_threshold,
NO_DEBUG,
1061 matcher_debug_separate_windows);
1064 classify_integer_matcher_multiplier, CharNormArray, &int_result,
1068 delete[] CharNormArray;
1077 const TBOX &blob_box,
const std::vector<CP_RESULT_STRUCT> &results,
1079 int top = blob_box.
top();
1080 int bottom = blob_box.
bottom();
1082 for (
auto &&result : results) {
1089 &int_result, classify_adapt_feature_threshold, debug, matcher_debug_separate_windows);
1090 bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1092 final_results->
BlobLength, matcher_multiplier, norm_factors,
1093 &int_result, final_results);
1103 int bottom,
int top,
float cp_rating,
1104 int blob_length,
int matcher_multiplier,
1107 if (classes !=
nullptr) {
1110 for (
auto &font : int_result->
fonts) {
1111 font.fontinfo_id =
GetFontinfoId(classes[class_id], font.fontinfo_id);
1116 for (
auto &font : int_result->
fonts) {
1127 std::vector<UnicharRating> mapped_results;
1128 for (
auto &f : int_result->
fonts) {
1129 int shape_id = f.fontinfo_id;
1131 for (
int c = 0; c < shape.
size(); ++c) {
1132 int unichar_id = shape[c].unichar_id;
1138 for (r = 0; r < mapped_results.size() && mapped_results[r].unichar_id != unichar_id;
1141 if (r == mapped_results.size()) {
1142 mapped_results.push_back(*int_result);
1143 mapped_results[r].unichar_id = unichar_id;
1144 mapped_results[r].fonts.clear();
1146 for (
int font_id : shape[c].font_ids) {
1147 mapped_results[r].fonts.emplace_back(font_id, f.score);
1151 for (
auto &m : mapped_results) {
1154 matcher_multiplier, cn_factors);
1163 blob_length, matcher_multiplier, cn_factors);
1172 double im_rating,
int feature_misses,
int bottom,
int top,
1173 int blob_length,
int matcher_multiplier,
1174 const uint8_t *cn_factors) {
1176 double cn_corrected =
im_.
ApplyCNCorrection(1.0 - im_rating, blob_length, cn_factors[unichar_id],
1177 matcher_multiplier);
1178 double miss_penalty = tessedit_class_miss_scale * feature_misses;
1179 double vertical_penalty = 0.0;
1182 cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1183 int min_bottom, max_bottom, min_top, max_top;
1186 tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", top, min_top, max_top, bottom,
1187 min_bottom, max_bottom);
1189 if (top < min_top || top > max_top || bottom < min_bottom || bottom > max_bottom) {
1190 vertical_penalty = classify_misfit_junk_penalty;
1193 double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1198 tprintf(
"%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1200 (1.0 - im_rating) * 100.0, (cn_corrected - (1.0 - im_rating)) * 100.0,
1201 cn_factors[unichar_id], miss_penalty * 100.0, vertical_penalty * 100.0);
1225 const std::vector<INT_FEATURE_STRUCT> &int_features,
1228 if (int_features.empty()) {
1238 if (matcher_debug_level >= 2 || classify_debug_level > 1) {
1246 delete[] CharNormArray;
1252 return Templates->
Class[ClassId]
1277 std::vector<UnicharRating> unichar_results;
1280 for (
auto &r : unichar_results) {
1289 std::vector<UnicharRating> *results) {
1291 std::unique_ptr<ADAPT_RESULTS> adapt_results(
new ADAPT_RESULTS());
1292 adapt_results->Initialize();
1303 std::vector<uint8_t> pruner_norm_array(num_pruner_classes);
1304 adapt_results->BlobLength =
static_cast<int>(
ActualOutlineLength(norm_feature) * 20 + 0.5f);
1308 shape_table_ !=
nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1309 &adapt_results->CPResults);
1310 if (keep_this >= 0) {
1311 adapt_results->CPResults[0].Class = keep_this;
1312 adapt_results->CPResults.resize(1);
1316 for (
auto &it : adapt_results->CPResults) {
1317 int class_id = it.Class;
1318 results->push_back(
UnicharRating(class_id, 1.0f - it.Rating));
1322 matcher_debug_flags, classify_integer_matcher_multiplier, blob_box,
1323 adapt_results->CPResults, adapt_results.get());
1325 for (
auto &
i : adapt_results->match) {
1326 results->push_back(
i);
1328 if (results->size() > 1) {
1329 std::sort(results->begin(), results->end(), SortDescendingRating);
1332 return num_features;
1348 float rating = results->
BlobLength / matcher_avg_noise_size;
1350 rating /= 1 + rating;
1363 assert(Choices !=
nullptr);
1366 BLOB_CHOICE_IT temp_it;
1367 bool contains_nonfrag =
false;
1368 temp_it.set_to_list(Choices);
1369 int choices_length = 0;
1383 float best_certainty = -FLT_MAX;
1384 for (
auto &it : Results->
match) {
1386 bool adapted = result.
adapted;
1388 if (temp_it.length() + 1 == max_matches && !contains_nonfrag && current_is_frag) {
1400 Rating = Certainty = (1.0f - result.
rating);
1401 Rating *= rating_scale * Results->
BlobLength;
1402 Certainty *= -(
getDict().certainty_scale);
1409 if (Certainty > best_certainty) {
1410 best_certainty = std::min(Certainty,
static_cast<float>(classify_adapted_pruning_threshold));
1411 }
else if (adapted && Certainty / classify_adapted_pruning_factor < best_certainty) {
1415 float min_xheight, max_xheight, yshift;
1420 choice->set_fonts(result.
fonts);
1421 temp_it.add_to_end(choice);
1422 contains_nonfrag |= !current_is_frag;
1424 if (choices_length >= max_matches) {
1428 Results->
match.resize(choices_length);
1432#ifndef GRAPHICS_DISABLED
1441 if (static_classifier_ ==
nullptr) {
1445 std::vector<INT_FEATURE_STRUCT> bl_features;
1447 if (sample ==
nullptr) {
1478 std::vector<INT_FEATURE_STRUCT> bl_features;
1481 if (sample ==
nullptr) {
1487 if (static_classifier_ ==
nullptr) {
1496 if ((!Results->
match.empty() &&
1498 !tess_bn_matching) ||
1499 Results->
match.empty()) {
1501 }
else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1503 Ambiguities, Results);
1536 Results->Initialize();
1538 std::vector<INT_FEATURE_STRUCT> bl_features;
1541 if (sample ==
nullptr) {
1549 std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
1553 Ambiguities =
new UNICHAR_ID[Results->match.size() + 1];
1554 if (Results->match.size() > 1 ||
1555 (Results->match.size() == 1 && Results->match[0].unichar_id != CorrectClass)) {
1557 for (
i = 0;
i < Results->match.size();
i++) {
1558 Ambiguities[
i] = Results->match[
i].unichar_id;
1560 Ambiguities[
i] = -1;
1562 Ambiguities[0] = -1;
1572 auto *ratings =
new BLOB_CHOICE_LIST();
1574 BLOB_CHOICE_IT ratings_it(ratings);
1576 if (classify_debug_character_fragments) {
1579 for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list(); ratings_it.forward()) {
1583 float certainty = ratings_it.data()->certainty();
1585 return certainty < classify_character_fragments_garbage_certainty_threshold;
1614 uint8_t *pruner_norm_array, uint8_t *char_norm_array) {
1630 uint8_t *char_norm_array, uint8_t *pruner_array) {
1636 memset(&pruner_array[0], UINT8_MAX, templates->
NumClasses *
sizeof(pruner_array[0]));
1639 for (
unsigned id = 0;
id < templates->
NumClasses; ++id) {
1644 for (
int c = 0; c < shape.
size(); ++c) {
1645 if (char_norm_array[shape[c].unichar_id] < pruner_array[
id]) {
1646 pruner_array[id] = char_norm_array[shape[c].unichar_id];
1653 delete norm_feature;
1678 int MaxProtoId, OldMaxProtoId;
1684 if (classify_learning_debug_level >= 3) {
1689 Class = Templates->
Class[ClassId];
1692 ++NumAdaptationsFailed;
1693 if (classify_learning_debug_level >= 1) {
1694 tprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
1702 OldProtos, classify_adapt_proto_threshold, debug_level);
1706 for (
i = 0;
i < NumOldProtos;
i++) {
1711 BadFeatures, classify_adapt_feature_threshold, debug_level);
1716 ++NumAdaptationsFailed;
1717 if (classify_learning_debug_level >= 1) {
1718 tprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
1729 if (classify_learning_debug_level >= 1) {
1731 "Making new temp config %d fontinfo id %d"
1732 " using %d old and %d new protos.\n",
1733 ConfigId,
Config->FontinfoId, NumOldProtos, MaxProtoId - OldMaxProtoId);
1766 float X1, X2, Y1, Y2;
1767 float A1, A2, AngleDelta;
1768 float SegmentLength;
1771 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; ProtoStart < LastBad;
1772 ProtoStart = ProtoEnd) {
1773 F1 = Features->
Features[*ProtoStart];
1780 F2 = Features->
Features[*ProtoEnd];
1785 AngleDelta = std::fabs(A1 - A2);
1786 if (AngleDelta > 0.5f) {
1787 AngleDelta = 1 - AngleDelta;
1790 if (AngleDelta > matcher_clustering_max_angle_delta || std::fabs(X1 - X2) > SegmentLength ||
1791 std::fabs(Y1 - Y2) > SegmentLength) {
1796 F2 = Features->
Features[*(ProtoEnd - 1)];
1807 Proto = &(TempProto->Proto);
1812 Proto->
Length = SegmentLength;
1814 Proto->
X = (X1 + X2) / 2;
1818 TempProto->ProtoId = Pid;
1844 auto Class = Templates->
Class[ClassId];
1848 if (Class->NumPermConfigs == 0) {
1851 Class->NumPermConfigs++;
1857 Perm->FontinfoId =
Config->FontinfoId;
1870 if (classify_learning_debug_level >= 1) {
1872 "Making config %d for %s (ClassId %d) permanent:"
1873 " fontinfo id %d, ambiguities '",
1874 ConfigId,
getDict().getUnicharset().debug_str(ClassId).c_str(), ClassId,
1876 for (
UNICHAR_ID *AmbigsPointer = Ambigs; *AmbigsPointer >= 0; ++AmbigsPointer) {
1898 auto ProtoKey =
static_cast<PROTO_KEY *
>(item2);
1903 if (TempProto->ProtoId >
Config->MaxProtoId || !
test_bit(
Config->Protos, TempProto->ProtoId)) {
1923 for (
auto &it : results.
match) {
1943 unsigned Next, NextGood;
1944 float BadMatchThreshold;
1945 static const char *romans =
"i v x I V X";
1946 BadMatchThreshold = Results->
best_rating - matcher_bad_match_pad;
1948 if (classify_bln_numeric_mode) {
1953 float scored_one = ScoredUnichar(unichar_id_one, *Results);
1954 float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
1956 for (Next = NextGood = 0; Next < Results->
match.size(); Next++) {
1958 if (match.
rating >= BadMatchThreshold) {
1962 Results->
match[Next].unichar_id = unichar_id_one;
1964 Results->
match[Next].unichar_id = unichar_id_zero;
1966 Results->
match[Next].unichar_id = INVALID_UNICHAR_ID;
1968 if (Results->
match[Next].unichar_id != INVALID_UNICHAR_ID) {
1969 if (NextGood == Next) {
1972 Results->
match[NextGood++] = Results->
match[Next];
1978 for (Next = NextGood = 0; Next < Results->
match.size(); Next++) {
1979 if (Results->
match[Next].rating >= BadMatchThreshold) {
1980 if (NextGood == Next) {
1983 Results->
match[NextGood++] = Results->
match[Next];
1988 Results->
match.resize(NextGood);
2000 unsigned Next, NextGood;
2004 static char punc_chars[] =
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2005 static char digit_chars[] =
"0 1 2 3 4 5 6 7 8 9";
2009 for (Next = NextGood = 0; Next < Results->
match.size(); Next++) {
2013 if (punc_count >= 2) {
2019 if (digit_count >= 1) {
2026 if (NextGood == Next) {
2029 Results->
match[NextGood++] = match;
2033 Results->
match.resize(NextGood);
2048 Threshold = (Threshold == matcher_good_threshold) ? 0.9f : (1 - Threshold);
2049 classify_adapt_proto_threshold.set_value(ClipToRange<int>(255 * Threshold, 0, 255));
2050 classify_adapt_feature_threshold.set_value(ClipToRange<int>(255 * Threshold, 0, 255));
2053#ifndef GRAPHICS_DISABLED
2067 uint32_t config_mask;
2069 tprintf(
"No built-in templates for class/shape %d\n", shape_id);
2072 if (num_features <= 0) {
2073 tprintf(
"Illegal blob (char norm features)!\n");
2077 classify_norm_method.set_value(
character);
2079 features, &cn_result, classify_adapt_feature_threshold,
NO_DEBUG,
2080 matcher_debug_separate_windows);
2082 config_mask = 1 << cn_result.
config;
2084 tprintf(
"Static Shape ID: %d\n", shape_id);
2087 features, &cn_result, classify_adapt_feature_threshold, matcher_debug_flags,
2088 matcher_debug_separate_windows);
2097 int config_id)
const {
2098 std::string class_string;
2105 return class_string;
2112 if (font_set_id < 0) {
2113 return kBlankFontinfoId;
2116 return fs.at(int_result_config);
2127 if (f == shape_id) {
2132 tprintf(
"Shape %d not found\n", shape_id);
2139 if (classify_learning_debug_level >= 1) {
2140 tprintf(
"NumTimesSeen for config of %s is %d\n",
2143 if (config->
NumTimesSeen >= matcher_sufficient_examples_for_prototyping) {
2145 }
else if (config->
NumTimesSeen < matcher_min_examples_for_prototyping) {
2147 }
else if (use_ambigs_for_adaption) {
2151 int ambigs_size = (ambigs ==
nullptr) ? 0 : ambigs->size();
2152 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2154 assert(ambig_class !=
nullptr);
2156 ambig_class->
MaxNumTimesSeen < matcher_min_examples_for_prototyping) {
2157 if (classify_learning_debug_level >= 1) {
2159 "Ambig %s has not been seen enough times,"
2160 " not making config for %s permanent\n",
2161 getDict().getUnicharset().debug_str((*ambigs)[ambig]).c_str(),
2162 getDict().getUnicharset().debug_str(class_id).c_str());
2173 int ambigs_size = (ambigs ==
nullptr) ? 0 : ambigs->size();
2174 if (classify_learning_debug_level >= 1) {
2175 tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
2176 getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
2178 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2179 CLASS_ID ambig_class_id = (*ambigs)[ambig];
2187 if (classify_learning_debug_level >= 1) {
2188 tprintf(
"Making config %d of %s permanent\n", cfg,
2189 getDict().getUnicharset().debug_str(ambig_class_id).c_str());
#define reset_bit(array, bit)
#define test_bit(array, bit)
#define SET_BIT(array, bit)
#define UnusedClassIdIn(T, c)
#define MAX_NUM_INT_FEATURES
#define ClassForClassId(T, c)
#define PRINT_MATCH_SUMMARY
#define PRINT_PROTO_MATCHES
#define PRINT_FEATURE_MATCHES
#define LENGTH_COMPRESSION
#define MAX_ADAPTABLE_WERD_SIZE
#define UNLIKELY_NUM_FEAT
#define ADAPTABLE_WERD_ADJUSTMENT
#define ADAPT_TEMPLATE_SUFFIX
#define WORST_POSSIBLE_RATING
#define classify_enable_adaptive_matcher
#define IsEmptyAdaptedClass(Class)
#define MakeProtoPermanent(Class, ProtoId)
#define MakeConfigPermanent(Class, ConfigId)
#define ConfigIsPermanent(Class, ConfigId)
#define IncreaseConfidence(TempConfig)
#define PermConfigFor(Class, ConfigId)
#define TempConfigFor(Class, ConfigId)
#define GetPicoFeatureLength()
bool MarginalMatch(float confidence, float matcher_great_threshold)
const double kStandardFeatureLength
void AddProtoToProtoPruner(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class, bool debug)
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS_STRUCT *Class)
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
void tprintf(const char *format,...)
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)
int IntCastRounded(double x)
LIST delete_d(LIST list, void *key, int_compare is_equal)
std::vector< int > FontSet
const float MF_SCALE_FACTOR
void SetAdaptiveThreshold(float Threshold)
int MakeTempProtoPerm(void *item1, void *item2)
void AddProtoToClassPruner(PROTO_STRUCT *Proto, CLASS_ID ClassId, INT_TEMPLATES_STRUCT *Templates)
const FEATURE_DESC_STRUCT CharNormDesc
void UpdateMatchDisplay()
float ActualOutlineLength(FEATURE Feature)
std::vector< UNICHAR_ID > UnicharIdVector
LIST push(LIST list, void *element)
int AddIntConfig(INT_CLASS_STRUCT *Class)
const std::vector< std::string > split(const std::string &s, char c)
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
const int kBlnBaselineOffset
void FillABC(PROTO_STRUCT *Proto)
int AddIntProto(INT_CLASS_STRUCT *Class)
void InitMatcherRatings(float *Rating)
TBOX bounding_box() const
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
const DENORM & denorm() const
TBLOB * ClassifyNormalizeIfNeeded() const
TBOX bounding_box() const
std::vector< TBLOB * > blobs
unsigned NumBlobs() const
void plot(ScrollView *window)
const T & at(int id) const
Return the object from an id.
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
WERD_CHOICE * best_choice
std::vector< std::string > correct_text
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
const FontInfo * fontinfo
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
std::vector< int > best_state
bool PiecesAllNatural(int start, int count) const
std::vector< SEAM * > seam_array
std::string debug_string() const
float adjust_factor() const
TDimension bottom() const
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
std::string language_data_path_prefix
bool Open(const char *filename, FileReader reader)
bool GetComponent(TessdataType type, TFile *fp)
std::string to_string() const
int get_script(UNICHAR_ID unichar_id) const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
bool get_isalpha(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
bool contains_unichar(const char *const unichar_repr) const
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
bool get_isdigit(UNICHAR_ID unichar_id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool get_enabled(UNICHAR_ID unichar_id) const
std::string debug_str(UNICHAR_ID id) const
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
PERM_CONFIG_STRUCT * Perm
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
ADAPT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
INT_TEMPLATES_STRUCT * Templates
UNICHAR_ID best_unichar_id
std::vector< UnicharRating > match
std::vector< CP_RESULT_STRUCT > CPResults
ADAPT_TEMPLATES_STRUCT * Templates
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES_STRUCT *templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, std::vector< CP_RESULT_STRUCT > *results)
bool AdaptableWord(WERD_RES *word)
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
void RemoveBadMatches(ADAPT_RESULTS *Results)
bool LooksLikeGarbage(TBLOB *blob)
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
void LearnBlob(const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
void LearnWord(const char *fontname, WERD_RES *word)
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
ADAPT_TEMPLATES_STRUCT * AdaptedTemplates
void ResetAdaptiveClassifierInternal()
void StartBackupAdaptiveClassifier()
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_RESULTS *Results)
ShapeTable * shape_table_
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
void ClearCharNormArray(uint8_t *char_norm_array)
bool LargeSpeckle(const TBLOB &blob)
void ConvertProto(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS_STRUCT *IClass, ADAPT_CLASS_STRUCT *Class, BIT_VECTOR TempProtoMask)
UnicityTable< FontSet > fontset_table_
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
INT_TEMPLATES_STRUCT * ReadIntTemplates(TFile *fp)
ADAPT_TEMPLATES_STRUCT * ReadAdaptedTemplates(TFile *File)
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
void ExpandShapesAndApplyCorrections(ADAPT_CLASS_STRUCT **classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
NORM_PROTOS * ReadNormProtos(TFile *fp)
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
ADAPT_TEMPLATES_STRUCT * BackupAdaptedTemplates
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS_STRUCT *Class, ADAPT_TEMPLATES_STRUCT *Templates)
void SwitchAdaptiveClassifier()
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG_STRUCT *config)
void AmbigClassifier(const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES_STRUCT *templates, ADAPT_CLASS_STRUCT **classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
INT_TEMPLATES_STRUCT * PreTrainedTemplates
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
int MakeNewTemporaryConfig(ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
void InitAdaptiveClassifier(TessdataManager *mgr)
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
void MasterMatcher(INT_TEMPLATES_STRUCT *templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS_STRUCT **classes, int debug, int matcher_multiplier, const TBOX &blob_box, const std::vector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
void SetAdaptiveThreshold(float Threshold)
int GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
std::string ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, std::vector< UnicharRating > *results)
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
int ShapeIDToClassID(int shape_id) const
void EndAdaptiveClassifier()
UnicityTable< FontInfo > fontinfo_table_
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES_STRUCT *adaptive_templates)
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void MakePermanent(ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
INT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
std::vector< float > Params
std::vector< FEATURE_STRUCT * > Features
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector< UnicharRating > *results)
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id)
std::vector< ScoredFont > fonts
bool DeSerialize(TFile *fp)
std::string DebugStr(unsigned shape_id) const
const Shape & GetShape(unsigned shape_id) const
int MaxNumUnichars() const
const INT_FEATURE_STRUCT * features() const
uint32_t num_features() const
FEATURE_STRUCT * GetCNFeature() const
int outline_length() const
int geo_feature(int index) const
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
const UnicharAmbigs & getUnicharAmbigs() const
void EndDangerousAmbigs()
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
const UNICHARSET & getUnicharset() const