50static const double kStopperAmbiguityThresholdGain = 8.0;
53static const double kStopperAmbiguityThresholdOffset = 1.5;
67static double StopperAmbigThreshold(
double f1,
double f2) {
68 return (f2 - f1) * kStopperAmbiguityThresholdGain -
69 kStopperAmbiguityThresholdOffset;
80 BLOCK_IT block_it(the_block_list);
82 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
83 block_res_it.add_to_end(
84 new BLOCK_RES(merge_similar_words, block_it.data()));
96 ROW_IT row_it(the_block->
row_list());
108 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
109 row_res_it.add_to_end(
new ROW_RES(merge_similar_words, row_it.data()));
130 bool add_next_word =
false;
134 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
135 auto *word_res =
new WERD_RES(word_it.data());
136 word_res->x_height = the_row->
x_height();
140 word_res->part_of_combo =
true;
142 }
else if (merge_similar_words) {
143 union_box = word_res->word->bounding_box();
144 add_next_word = !word_res->word->flag(
W_REP_CHAR) &&
146 word_res->odd_size = !add_next_word;
148 WERD *next_word = word_it.data_relative(1);
149 if (merge_similar_words) {
157 int prev_right = union_box.
right();
158 union_box += next_box;
162 add_next_word =
false;
170 if (combo ==
nullptr) {
171 copy_word =
new WERD;
172 *copy_word = *(word_it.data());
176 word_res_it.add_to_end(combo);
178 word_res->part_of_combo =
true;
182 word_res_it.add_to_end(word_res);
218 WERD_CHOICE_IT wc_it(
const_cast<WERD_CHOICE_LIST *
>(&source.
best_choices));
220 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
222 wc_dest_it.add_after_then_move(
new WERD_CHOICE(*choice));
224 if (!wc_dest_it.empty()) {
225 wc_dest_it.move_to_first();
306 int norm_mode,
const TBOX *norm_box,
307 bool numeric_mode,
bool use_body_size,
308 bool allow_detailed_fx,
ROW *row,
309 const BLOCK *block) {
315 (pb !=
nullptr && !pb->
IsText())) {
326 use_body_size && row !=
nullptr && row->
body_size() > 0.0f
331 norm_mode_hint, norm_box, &
denorm);
361 if (blob_count > 0) {
362 auto **fake_choices =
new BLOB_CHOICE *[blob_count];
367 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
368 TBOX box = b_it.data()->bounding_box();
373 delete[] fake_choices;
405 for (
int b = 0; b < num_blobs; ++b) {
409 if (b + 1 < num_blobs) {
431 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
443 WERD_CHOICE_IT wc_it(
const_cast<WERD_CHOICE_LIST *
>(&
best_choices));
444 for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
464 tprintf(
"raw_choice has total of states = %u vs ratings dim of %u\n",
470 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
473 tprintf(
"Cooked #%u has total of states = %u vs ratings dim of %u\n",
484 if (debug || (word_to_debug !=
nullptr && *word_to_debug !=
'\0' &&
493 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
496 label +=
"\nCooked Choice #" + std::to_string(index);
497 choice->
print(label.c_str());
523 if (debug_level >= 2) {
528 for (it.forward(); !it.at_first(); it.forward(), ++index) {
537 unsigned i = 0, j = 0, chunk = 0;
543 while (i < choice->length() && j < best_choice->length()) {
546 if (debug_level >= 2) {
547 choice->
print(
"WorstCertaintyDiffWorseThan");
549 "i %u j %u Choice->Blob[i].Certainty %.4g"
550 " WorstOtherChoiceCertainty %g Threshold %g\n",
552 tprintf(
"Discarding bad choice #%d\n", index);
559 while (choice_chunk < chunk && ++i < choice->length()) {
560 choice_chunk += choice->
state(
i);
563 while (best_chunk < chunk && ++j < best_choice->length()) {
571 float min_rating,
float max_rating,
579 float avg_rating = 0.0f;
580 int num_error_chunks = 0;
583 while (chunk < end_chunk) {
584 if (chunk >= end_raw_chunk) {
595 if (num_error_chunks > 0) {
596 avg_rating /= num_error_chunks;
597 *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
599 *thresholds = max_rating;
602 if (*thresholds > max_rating) {
603 *thresholds = max_rating;
605 if (*thresholds < min_rating) {
606 *thresholds = min_rating;
637 float max_certainty_delta = StopperAmbigThreshold(
639 if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) {
640 max_certainty_delta = -kStopperAmbiguityThresholdOffset;
643 max_certainty_delta) {
645 std::string bad_string;
648 "Discarding choice \"%s\" with an overly low certainty"
649 " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
650 bad_string.c_str(), word_choice->
certainty(),
663 bool inserted =
false;
668 if (choice->
rating() > word_choice->
rating() && !inserted) {
670 it.add_before_stay_put(word_choice);
672 if (num_choices == 0) {
684 tprintf(
"Discarding duplicate choice \"%s\", rating %g vs %g\n",
685 new_str.c_str(), word_choice->
rating(), choice->
rating());
692 if (num_choices > max_num_choices) {
697 }
while (!it.at_first());
699 if (!inserted && num_choices < max_num_choices) {
700 it.add_to_end(word_choice);
702 if (num_choices == 0) {
712 word_choice->
print(
" Word Choice");
724static void MovePointerData(T **
dest, T **src) {
732 std::string alternates_str;
733 WERD_CHOICE_IT it(
const_cast<WERD_CHOICE_LIST *
>(&
best_choices));
734 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
735 if (!it.at_first()) {
736 alternates_str +=
"\", \"";
738 alternates_str += it.data()->unichar_string();
740 tprintf(
"Alternates for \"%s\": {\"%s\"}\n",
748 for (
int b = start_blob; b <= last_blob; ++b) {
795 word->seam_array.clear();
798 word->best_state.clear();
800 word->correct_text.clear();
802 word->blob_widths.clear();
804 word->blob_gaps.clear();
813 wc_it.add_list_after(&
word->best_choices);
815 if (
word->blamer_bundle !=
nullptr) {
872 for (
unsigned i = 0;
i < word_len; ++
i) {
898 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
899 wc_it.data()->SetAllScriptPositions(position);
915 for (
unsigned c = 0; c < blob_count; ++c) {
916 auto *choice_list =
new BLOB_CHOICE_LIST;
917 BLOB_CHOICE_IT choice_it(choice_list);
918 choice_it.add_after_then_move(choices[c]);
933 word_choice->set_permuter(permuter);
934 for (
int b = 0; b < num_blobs; ++b) {
938 float certainty = -FLT_MAX;
939 BLOB_CHOICE_LIST *choices =
ratings->
get(b, b);
940 if (choices !=
nullptr && !choices->empty()) {
941 BLOB_CHOICE_IT bc_it(choices);
944 rating = choice->
rating();
947 word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
973 const std::function<
bool(
const TBOX &,
const TBOX &)> &box_cb) {
975 bool modified =
false;
979 if (new_id != INVALID_UNICHAR_ID &&
980 (box_cb ==
nullptr ||
995 BLOB_CHOICE_IT bc_it(blob_choices);
996 bc_it.add_before_then_move(blob_choice);
1024static int is_simple_quote(
const char *signed_str,
int length) {
1025 const auto *str =
reinterpret_cast<const unsigned char *
>(signed_str);
1027 return (length == 1 && (*str ==
'\'' || *str ==
'`')) ||
1030 ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
1031 (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
1039 if (is_simple_quote(
ch, strlen(
ch)) &&
1040 is_simple_quote(next_ch, strlen(next_ch))) {
1043 return INVALID_UNICHAR_ID;
1053 using namespace std::placeholders;
1062 if (strlen(
ch) == 1 && strlen(next_ch) == 1 && (*
ch ==
'-' || *
ch ==
'~') &&
1063 (*next_ch ==
'-' || *next_ch ==
'~')) {
1066 return INVALID_UNICHAR_ID;
1083 using namespace std::placeholders;
1094 return INVALID_UNICHAR_ID;
1100 using namespace std::placeholders;
1113 for (
int index = start; index < start +
count - 1; ++index) {
1114 if (index >= 0 &&
static_cast<size_t>(index) <
seam_array.size()) {
1185 if (other.block_res ==
nullptr) {
1187 if (block_res ==
nullptr) {
1192 if (block_res ==
nullptr) {
1195 if (block_res == other.block_res) {
1196 if (other.row_res ==
nullptr || row_res ==
nullptr) {
1200 if (row_res == other.row_res) {
1202 ASSERT_HOST(other.word_res !=
nullptr && word_res !=
nullptr);
1203 if (word_res == other.word_res) {
1209 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1210 word_res_it.forward()) {
1211 if (word_res_it.data() == word_res) {
1213 }
else if (word_res_it.data() == other.word_res) {
1217 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1222 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1223 row_res_it.forward()) {
1224 if (row_res_it.data() == row_res) {
1226 }
else if (row_res_it.data() == other.row_res) {
1230 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1235 for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
1236 block_res_it.forward()) {
1237 if (block_res_it.data() == block_res) {
1239 }
else if (block_res_it.data() == other.block_res) {
1244 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1255 auto *new_res =
new WERD_RES(new_word);
1256 new_res->CopySimpleFields(clone_res);
1257 new_res->combination =
true;
1259 WERD_RES_IT wr_it(&
row()->word_res_list);
1260 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1262 if (
word == word_res) {
1267 wr_it.add_before_then_move(new_res);
1268 if (wr_it.at_first()) {
1279static void ComputeBlobEnds(
const WERD_RES &word,
const TBOX &clip_box,
1280 C_BLOB_LIST *next_word_blobs,
1281 std::vector<int> *blob_ends) {
1285 TBOX blob_box = blob_it.data()->bounding_box();
1287 for (
int b = 1; b < length; ++b) {
1288 blob_box += blob_it.data()->bounding_box();
1293 int blob_end = INT32_MAX;
1294 if (!blob_it.at_first() || next_word_blobs !=
nullptr) {
1295 if (blob_it.at_first()) {
1296 blob_it.set_to_list(next_word_blobs);
1298 blob_end = (blob_box.
right() + blob_it.data()->bounding_box().left()) / 2;
1300 blob_end = ClipToRange<int>(blob_end, clip_box.
left(), clip_box.
right());
1301 blob_ends->push_back(blob_end);
1303 blob_ends->back() = clip_box.
right();
1309 int w_index,
TBOX prev_box, WERD_RES_IT w_it) {
1310 constexpr int kSignificantOverlapFraction = 4;
1312 TBOX current_box = words[w_index]->word->bounding_box();
1314 if (
static_cast<size_t>(w_index + 1) < words.
size() &&
1315 words[w_index + 1] !=
nullptr && words[w_index + 1]->word !=
nullptr) {
1316 next_box = words[w_index + 1]->word->bounding_box();
1318 for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo;
1320 if (w_it.data() ==
nullptr || w_it.data()->word ==
nullptr) {
1323 TBOX w_box = w_it.data()->word->bounding_box();
1324 int height_limit = std::min<int>(w_box.height(), w_box.width() / 2);
1325 int width_limit = w_box.width() / kSignificantOverlapFraction;
1326 int min_significant_overlap = std::max(height_limit, width_limit);
1327 int overlap = w_box.intersection(current_box).width();
1328 int prev_overlap = w_box.intersection(prev_box).width();
1329 int next_overlap = w_box.intersection(next_box).width();
1330 if (overlap > min_significant_overlap) {
1331 if (prev_overlap > min_significant_overlap) {
1333 clipped_box.set_left(current_box.left());
1334 }
else if (next_overlap > min_significant_overlap) {
1336 clipped_box.set_right(current_box.right());
1338 clipped_box += w_box;
1342 if (clipped_box.height() <= 0) {
1343 clipped_box.set_top(current_box.top());
1344 clipped_box.set_bottom(current_box.bottom());
1346 if (clipped_box.width() <= 0) {
1347 clipped_box = current_box;
1354static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it,
1355 const TBOX &clip_box) {
1356 C_BLOB *src_blob = src_it->extract();
1357 TBOX box = src_blob->bounding_box();
1358 if (!clip_box.contains(box)) {
1360 ClipToRange<int>(box.left(), clip_box.left(), clip_box.right() - 1);
1362 ClipToRange<int>(box.right(), clip_box.left() + 1, clip_box.right());
1364 ClipToRange<int>(box.top(), clip_box.bottom() + 1, clip_box.top());
1366 ClipToRange<int>(box.bottom(), clip_box.bottom(), clip_box.top() - 1);
1367 box =
TBOX(left, bottom, right, top);
1371 dest_it->add_after_then_move(src_blob);
1380 if (words->
empty()) {
1387 (*words)[0]->word->set_flag(
W_BOL,
true);
1389 (*words)[0]->word->set_blanks(input_word->
word->
space());
1399 WERD_IT w_it(
row()->
row->word_list());
1401 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1411 WERD_RES_IT wr_it(&
row()->word_res_list);
1412 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1414 if (
word == input_word) {
1426 for (
size_t w = 0; w < words->
size(); ++w) {
1428 clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
1430 std::vector<int> blob_ends;
1431 C_BLOB_LIST *next_word_blobs =
1432 w + 1 < words->
size() ? (*words)[w + 1]->word->cblob_list() :
nullptr;
1433 ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
1436 C_BLOB_LIST fake_blobs;
1437 C_BLOB_IT fake_b_it(&fake_blobs);
1439 fake_b_it.move_to_first();
1444 for (
size_t i = 0;
i < blob_ends.size(); ++
i, fake_b_it.forward()) {
1445 int end_x = blob_ends[
i];
1448 while (!src_b_it.empty() &&
1449 src_b_it.data()->bounding_box().x_middle() < end_x) {
1450 blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
1453 while (!rej_b_it.empty() &&
1454 rej_b_it.data()->bounding_box().x_middle() < end_x) {
1455 blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
1460 blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
1462 box_word->InsertBox(
i, blob_box);
1469 w_it.add_before_stay_put(word_w->
word);
1472 (*words)[w] =
nullptr;
1473 wr_it.add_before_stay_put(word_w);
1481 delete w_it.extract();
1483 delete wr_it.extract();
1496 WERD_IT w_it(
row()->
row->word_list());
1497 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1498 if (w_it.data() == word_res->
word) {
1503 delete w_it.extract();
1507 WERD_RES_IT wr_it(&
row()->word_res_list);
1508 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1509 if (wr_it.data() == word_res) {
1515 delete wr_it.extract();
1528 WERD_RES_IT wr_it(&
row()->word_res_list);
1529 for (wr_it.mark_cycle_pt();
1530 !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1534 real_word = wr_it.data()->word;
1550 block_res_it.mark_cycle_pt();
1551 prev_block_res =
nullptr;
1552 prev_row_res =
nullptr;
1553 prev_word_res =
nullptr;
1554 block_res =
nullptr;
1557 next_block_res =
nullptr;
1558 next_row_res =
nullptr;
1559 next_word_res =
nullptr;
1560 internal_forward(
true, empty_ok);
1561 return internal_forward(
false, empty_ok);
1572 if (row_res == next_row_res) {
1575 word_res_it.move_to_first();
1576 for (word_res_it.mark_cycle_pt();
1577 !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1578 word_res_it.forward()) {
1579 if (!word_res_it.data()->part_of_combo) {
1580 if (prev_row_res == row_res) {
1581 prev_word_res = word_res;
1583 word_res = word_res_it.data();
1587 wr_it_of_next_word = word_res_it;
1588 word_res_it.forward();
1592 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1593 if (!wr_it.data()->part_of_combo) {
1594 if (prev_row_res == row_res) {
1595 prev_word_res = word_res;
1597 word_res = wr_it.data();
1617WERD_RES *PAGE_RES_IT::internal_forward(
bool new_block,
bool empty_ok) {
1618 bool new_row =
false;
1620 prev_block_res = block_res;
1621 prev_row_res = row_res;
1622 prev_word_res = word_res;
1623 block_res = next_block_res;
1624 row_res = next_row_res;
1625 word_res = next_word_res;
1626 wr_it_of_current_word = wr_it_of_next_word;
1627 next_block_res =
nullptr;
1628 next_row_res =
nullptr;
1629 next_word_res =
nullptr;
1631 while (!block_res_it.cycled_list()) {
1634 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
1635 row_res_it.mark_cycle_pt();
1636 if (row_res_it.empty() && empty_ok) {
1637 next_block_res = block_res_it.data();
1642 while (!row_res_it.cycled_list()) {
1645 word_res_it.set_to_list(&row_res_it.data()->word_res_list);
1646 word_res_it.mark_cycle_pt();
1649 while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo) {
1650 word_res_it.forward();
1652 if (!word_res_it.cycled_list()) {
1653 next_block_res = block_res_it.data();
1654 next_row_res = row_res_it.data();
1655 next_word_res = word_res_it.data();
1656 wr_it_of_next_word = word_res_it;
1657 word_res_it.forward();
1661 row_res_it.forward();
1665 block_res_it.forward();
1701 while (block_res == next_block_res &&
1702 (next_row_res !=
nullptr && next_row_res->
row !=
nullptr &&
1704 internal_forward(
false,
true);
1706 return internal_forward(
false,
true);
1716 while (block_res == next_block_res) {
1717 internal_forward(
false,
true);
1719 return internal_forward(
false,
true);
1723 int16_t chars_in_word;
1724 int16_t rejects_in_word = 0;
1734 block_res->
rej_count += rejects_in_word;
1736 if (chars_in_word == rejects_in_word) {
@ W_INVERSE
white on black
@ W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
@ W_REP_CHAR
repeated character
@ W_FUZZY_NON
fuzzy nonspace
void tprintf(const char *format,...)
const double kMaxWordSizeRatio
const double kMaxLineSizeRatio
const int kWordrecMaxNumJoinChunks
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
void start_seam_list(TWERD *word, std::vector< SEAM * > *seam_array)
const double kMaxWordGapRatio
void put(ICOORD pos, const T &thing)
void delete_matrix_pointers()
void CopyTruth(const BlamerBundle &other)
void CopyResults(const BlamerBundle &other)
void SetupNormTruthWord(const DENORM &denorm)
TBOX bounding_box() const
void ComputeBoundingBoxes()
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
std::vector< TBLOB * > blobs
void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
unsigned NumBlobs() const
void MergeBlobs(unsigned start, unsigned end)
void MergeBoxes(unsigned start, unsigned end)
static BoxWord * CopyFromNormalized(TWERD *tessword)
const TBOX & BlobBox(unsigned index) const
void InsertBox(unsigned index, const TBOX &box)
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
MATRIX * ConsumeAndMakeBigger(int ind)
void IncreaseBandSize(int bandwidth)
bool Valid(const MATRIX &m) const
const BLOCK * block() const
PDBLK pdblk
Page Description Block.
ROW_LIST * row_list()
get rows
BLOCK_RES_LIST block_res_list
WERD_CHOICE ** prev_word_best_choice
ROW_RES_LIST row_res_list
WERD_RES_LIST word_res_list
int32_t whole_word_rej_count
void copy_on(WERD_RES *word_res)
const FontInfo * fontinfo2
void CloneChoppedToRebuild()
void FakeWordFromRatings(PermuterType permuter)
tesseract::Tesseract * tesseract
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
WERD_CHOICE * best_choice
int8_t fontinfo_id2_count
std::vector< std::string > correct_text
int GetBlobsGap(unsigned blob_index) const
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
void FilterWordChoices(int debug_level)
void FakeClassifyWord(unsigned blob_count, BLOB_CHOICE **choices)
CRUNCH_MODE unlv_crunch_mode
void SetupWordScript(const UNICHARSET &unicharset_in)
void InsertSeam(int blob_number, SEAM *seam)
void InitForRetryRecognition(const WERD_RES &source)
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
bool ConditionalBlobMerge(const std::function< UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> &class_cb, const std::function< bool(const TBOX &, const TBOX &)> &box_cb)
tesseract::BoxWord * bln_boxes
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
void ConsumeWordResults(WERD_RES *word)
void DebugTopChoice(const char *msg) const
void SetScriptPositions()
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
BlamerBundle * blamer_bundle
const UNICHARSET * uch_set
const FontInfo * fontinfo
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
void BestChoiceToCorrectText()
WERD_CHOICE_LIST best_choices
std::vector< int > best_state
void SetupBlobWidthsAndGaps()
void SetAllScriptPositions(tesseract::ScriptPos position)
tesseract::BoxWord * box_word
void ReplaceBestChoice(WERD_CHOICE *choice)
void SetupFake(const UNICHARSET &uch)
std::vector< int > blob_widths
void DebugWordChoices(bool debug, const char *word_to_debug)
void PrintBestChoices() const
BLOB_CHOICE * GetBlobChoice(unsigned index) const
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
int GetBlobsWidth(int start_blob, int last_blob) const
void MergeAdjacentBlobs(unsigned index)
bool LogNewRawChoice(WERD_CHOICE *word_choice)
std::vector< int > blob_gaps
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
bool PiecesAllNatural(int start, int count) const
void CopySimpleFields(const WERD_RES &source)
WERD_RES & operator=(const WERD_RES &source)
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
std::vector< SEAM * > seam_array
void MakeCurrentWordFuzzy()
WERD_RES * start_page(bool empty_ok)
WERD_RES * restart_page()
WERD_RES * forward_paragraph()
WERD_RES * forward_block()
int cmp(const PAGE_RES_IT &other) const
void ReplaceCurrentWord(PointerVector< WERD_RES > *words)
WERD_RES * InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word)
POLY_BLOCK * poly_block() const
void set_unichar_id(UNICHAR_ID newunichar_id)
UNICHAR_ID unichar_id() const
unsigned TotalOfStates() const
void remove_unichar_id(unsigned index)
MATRIX_COORD MatrixCoord(unsigned index) const
void set_unichar_id(UNICHAR_ID unichar_id, unsigned index)
void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const
UNICHAR_ID unichar_id(unsigned index) const
static const float kBadRating
bool dangerous_ambig_found() const
unsigned state(unsigned index) const
void set_permuter(uint8_t perm)
BLOB_CHOICE_LIST * blob_choices(unsigned index, MATRIX *ratings) const
std::string & unichar_string()
void SetAllScriptPositions(ScriptPos position)
void UpdateStateForSplit(int blob_position)
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
float adjust_factor() const
TDimension height() const
int16_t reject_count() const
void remove_pos(uint16_t pos)
void initialise(uint16_t length)
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
bool HasAnySplits() const
bool PrepareToInsertSeam(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int insert_index, bool modify)
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
static C_BLOB * FakeBlob(const TBOX &box)
static int SortByXMiddle(const void *v1, const void *v2)
bool flag(WERD_FLAGS mask) const
void set_flag(WERD_FLAGS mask, bool value)
void set_script_id(int id)
TBOX bounding_box() const
C_BLOB_LIST * rej_cblob_list()
C_BLOB_LIST * cblob_list()
void operator=(const ELIST_LINK &)
bool script_has_xheight() const
const char * id_to_unichar(UNICHAR_ID id) const
bool contains_unichar(const char *const unichar_repr) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool get_enabled(UNICHAR_ID unichar_id) const