21# include "config_auto.h"
46static const int kMaxNumChunks = 64;
57static int check_blob(TBLOB *blob) {
61 for (outline = blob->outlines; outline !=
nullptr; outline = outline->next) {
62 edgept = outline->loop;
64 if (edgept ==
nullptr) {
67 edgept = edgept->next;
68 }
while (edgept != outline->loop);
69 if (edgept ==
nullptr) {
81static int any_shared_split_points(
const std::vector<SEAM *> &seams, SEAM *seam) {
85 length = seams.size();
86 for (index = 0; index < length; index++) {
87 if (seam->SharesPosition(*seams[index])) {
99static void preserve_outline(EDGEPT *start) {
102 if (start ==
nullptr) {
107 srcpt->runlength = 1;
109 }
while (srcpt != start);
110 srcpt->runlength = 2;
113static void preserve_outline_tree(TESSLINE *srcline) {
116 for (outline = srcline; outline !=
nullptr; outline = outline->next) {
117 preserve_outline(outline->loop);
126static EDGEPT *restore_outline(EDGEPT *start) {
130 if (start ==
nullptr) {
135 if (srcpt->runlength == 2) {
139 }
while (srcpt != start);
143 if (srcpt->prev->runlength == 0) {
146 }
while (srcpt != real_start);
150static void restore_outline_tree(TESSLINE *srcline) {
153 for (outline = srcline; outline !=
nullptr; outline = outline->next) {
154 outline->loop = restore_outline(outline->loop);
155 outline->start = outline->loop->pos;
165static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
166 TBOX box1 = blob1->bounding_box();
167 TBOX box2 = blob2->bounding_box();
168 return box1.contains(box2) || box2.contains(box1);
173static SEAM *CheckSeam(
int debug_level, int32_t blob_number, TWERD *word, TBLOB *blob,
174 TBLOB *other_blob,
const std::vector<SEAM *> &seams, SEAM *seam) {
175 if (seam ==
nullptr || blob->outlines ==
nullptr || other_blob->outlines ==
nullptr ||
176 total_containment(blob, other_blob) || check_blob(other_blob) ||
177 !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
178 any_shared_split_points(seams, seam) ||
179 !seam->PrepareToInsertSeam(seams, word->blobs, blob_number,
false)) {
180 word->blobs.erase(word->blobs.begin() + blob_number + 1);
185#ifndef GRAPHICS_DISABLED
187 if (debug_level > 2) {
190 tprintf(
"\n** seam being removed ** \n");
208 const std::vector<SEAM *> &seams) {
209 if (repair_unchopped_blobs) {
210 preserve_outline_tree(blob->
outlines);
214 word->
blobs.insert(word->
blobs.begin() + blob_number + 1, other_blob);
216 SEAM *seam =
nullptr;
217 if (prioritize_division) {
220 seam =
new SEAM(0.0f, location);
223 if (seam ==
nullptr) {
227 if (seam !=
nullptr) {
228 seam->
Print(
"Good seam picked=");
230 tprintf(
"\n** no seam picked *** \n");
234 seam->
ApplySeam(italic_blob, blob, other_blob);
237 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
238 if (seam ==
nullptr) {
239 if (repair_unchopped_blobs) {
240 restore_outline_tree(blob->
outlines);
242 if (allow_blob_division && !prioritize_division) {
247 word->
blobs.insert(word->
blobs.begin() + blob_number + 1, other_blob);
248 seam =
new SEAM(0.0f, location);
249 seam->
ApplySeam(italic_blob, blob, other_blob);
250 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
254 if (seam !=
nullptr) {
262 const std::vector<SEAM *> &seams) {
267 WERD_RES *word_res,
unsigned *blob_number) {
269 for (*blob_number = 0; *blob_number < word->
NumBlobs(); ++*blob_number) {
277 TPOINT original_topleft, original_botright;
282 TBOX(original_topleft.
x, original_botright.
y, original_botright.
x, original_topleft.
y);
284 bool almost_equal_box =
false;
286 for (
auto &&boxe : boxes) {
291 almost_equal_box =
true;
296 if (
divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) {
298 if (seam !=
nullptr) {
304 *blob_number = UINT_MAX;
321 bool split_next_to_fragment,
bool italic_blob,
WERD_RES *word,
322 unsigned *blob_number) {
323 float rating_ceiling = FLT_MAX;
324 SEAM *seam =
nullptr;
328 tprintf(
"blob_number from fixpt = %d\n", blob);
330 bool split_point_from_dict = (blob != -1);
331 if (split_point_from_dict) {
337 tprintf(
"blob_number = %d\n", blob);
346 if (seam !=
nullptr) {
349 if (blob_choices[*blob_number] ==
nullptr) {
352 if (!split_point_from_dict) {
354 rating_ceiling = blob_choices[*blob_number]->rating();
368 const std::vector<BLOB_CHOICE *> &blob_choices,
WERD_RES *word_res,
369 unsigned *blob_number) {
370 if (prioritize_division) {
373 return improve_one_blob(blob_choices,
nullptr,
false,
true, word_res, blob_number);
387 if (word->
ratings ==
nullptr) {
388 word->
ratings =
new MATRIX(num_blobs, wordrec_max_join_chunks);
392 for (
int b = 0; b < num_blobs; ++b) {
401 row < word->
ratings->
dimension() && row < col + word->ratings->bandwidth(); ++row) {
402 BLOB_CHOICE_LIST *choices = word->
ratings->
get(col, row);
403 if (choices !=
nullptr) {
404 BLOB_CHOICE_IT bc_it(choices);
405 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
406 bc_it.data()->set_matrix_cell(col, row);
428 if (word->
blamer_bundle !=
nullptr && this->fill_lattice_ !=
nullptr) {
431 if (wordrec_debug_level > 0) {
432 tprintf(
"Final Ratings Matrix:\n");
448 std::vector<SegSearchPending> *pending) {
449 unsigned blob_number;
453 std::vector<BLOB_CHOICE *> blob_choices;
455 for (
int i = 0;
i < num_blobs; ++
i) {
457 if (choices ==
nullptr || choices->empty()) {
458 blob_choices.push_back(
nullptr);
460 BLOB_CHOICE_IT bc_it(choices);
461 blob_choices.push_back(bc_it.data());
466 if (seam ==
nullptr) {
478 best_choice_bundle->
fixpt.clear();
488 pain_point.
col = blob_number + 1;
489 pain_point.
row = blob_number + 1;
499 best_choice_bundle, blamer_bundle);
510 bool valid_permuter = word->
best_choice !=
nullptr &&
513 wordrec_debug_blamer);
524 float rating_ceiling,
bool split_next_to_fragment) {
526 float worst = -FLT_MAX;
527 int worst_index = -1;
528 float worst_near_fragment = -FLT_MAX;
529 int worst_index_near_fragment = -1;
530 std::vector<const CHAR_FRAGMENT *> fragments;
533 if (rating_ceiling < FLT_MAX) {
534 tprintf(
"rating_ceiling = %8.4f\n", rating_ceiling);
536 tprintf(
"rating_ceiling = No Limit\n");
540 if (split_next_to_fragment && blob_choices.size() > 0) {
541 fragments.resize(blob_choices.size());
542 if (blob_choices[0] !=
nullptr) {
545 fragments[0] =
nullptr;
549 for (
unsigned x = 0;
x < blob_choices.size(); ++
x) {
550 if (blob_choices[
x] ==
nullptr) {
553 blob_choice = blob_choices[
x];
555 if (split_next_to_fragment &&
x + 1 < blob_choices.size()) {
556 if (blob_choices[
x + 1] !=
nullptr) {
560 fragments[
x + 1] =
nullptr;
563 if (blob_choice->
rating() < rating_ceiling &&
564 blob_choice->
certainty() < tessedit_certainty_threshold) {
566 if (blob_choice->
rating() > worst) {
568 worst = blob_choice->
rating();
570 if (split_next_to_fragment) {
572 bool expand_following_fragment =
573 (
x + 1 < blob_choices.size() && fragments[
x + 1] !=
nullptr &&
574 !fragments[
x + 1]->is_beginning());
575 bool expand_preceding_fragment =
576 (
x > 0 && fragments[
x - 1] !=
nullptr && !fragments[
x - 1]->is_ending());
577 if ((expand_following_fragment || expand_preceding_fragment) &&
578 blob_choice->
rating() > worst_near_fragment) {
579 worst_index_near_fragment =
x;
580 worst_near_fragment = blob_choice->
rating();
583 "worst_index_near_fragment=%d"
584 " expand_following_fragment=%d"
585 " expand_preceding_fragment=%d\n",
586 worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment);
595 return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index;
609 for (
auto &
i : *fixpt) {
610 if (
i.begin + 1 ==
i.end &&
i.dangerous &&
i.correct_is_ngram) {
void remove_edgept(EDGEPT *point)
void tprintf(const char *format,...)
std::vector< DANGERR_INFO > DANGERR
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
void display_blob(TBLOB *blob, ScrollView::Color color)
void put(ICOORD pos, const T &thing)
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
IncorrectResultReason incorrect_result_reason() const
TBOX bounding_box() const
static TBLOB * ShallowCopy(const TBLOB &src)
std::vector< TBLOB * > blobs
unsigned NumBlobs() const
void print(const UNICHARSET &unicharset) const
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
void FakeWordFromRatings(PermuterType permuter)
WERD_CHOICE * best_choice
void FilterWordChoices(int debug_level)
void InsertSeam(int blob_number, SEAM *seam)
BlamerBundle * blamer_bundle
const UNICHARSET * uch_set
WERD_CHOICE_LIST best_choices
std::vector< SEAM * > seam_array
bool almost_equal(const TBOX &box, int tolerance) const
TDimension bottom() const
double overlap_fraction(const TBOX &box) const
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
void Print(const char *label) const
bool flag(WERD_FLAGS mask) const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
void reset_hyphen_vars(bool last_word_on_line)
const UNICHARSET & getUnicharset() const
void RemapForSplit(int index)
Struct to store information maintained by various language model components.
Bundle together all the things pertaining to the best choice/state.
std::vector< LanguageModelState * > beam
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
SEAM * improve_one_blob(const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
SEAM * chop_one_blob(const std::vector< TBOX > &boxes, const std::vector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, unsigned *blob_number)
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
int select_blob_to_split(const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
SEAM * chop_overlapping_blob(const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
void chop_word_main(WERD_RES *word)
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
SEAM * pick_good_seam(TBLOB *blob)
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
std::unique_ptr< LanguageModel > language_model_