tesseract v5.3.3.20231005
chopper.cpp
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * File: chopper.cpp (Formerly chopper.c)
4 * Author: Mark Seaman, OCR Technology
5 *
6 * (c) Copyright 1987, Hewlett-Packard Company.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 *****************************************************************************/
18
19// Include automatically generated configuration file if running autoconf.
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h"
22#endif
23
24#include "blamer.h" // for BlamerBundle, IRR_CORRECT
25#include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob
26#include "dict.h" // for Dict
27#include "lm_pain_points.h" // for LMPainPoints
28#include "lm_state.h" // for BestChoiceBundle
29#include "matrix.h" // for MATRIX
30#include "normalis.h" // for DENORM
31#include "pageres.h" // for WERD_RES
32#include "params.h" // for IntParam, BoolParam
33#include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ...
34#include "rect.h" // for TBOX
35#include "render.h" // for display_blob
36#include "seam.h" // for SEAM
37#include "split.h" // for remove_edgept
38#include "stopper.h" // for DANGERR
39#include "tprintf.h" // for tprintf
40#include "wordrec.h" // for Wordrec, SegSearchPending (ptr only)
41
42namespace tesseract {
43
44// Even though the limit on the number of chunks may now be removed, keep
45// the same limit for repeatable behavior, and it may be a speed advantage.
46static const int kMaxNumChunks = 64;
47
48/*----------------------------------------------------------------------
49 F u n c t i o n s
50----------------------------------------------------------------------*/
51
57static int check_blob(TBLOB *blob) {
58 TESSLINE *outline;
59 EDGEPT *edgept;
60
61 for (outline = blob->outlines; outline != nullptr; outline = outline->next) {
62 edgept = outline->loop;
63 do {
64 if (edgept == nullptr) {
65 break;
66 }
67 edgept = edgept->next;
68 } while (edgept != outline->loop);
69 if (edgept == nullptr) {
70 return 1;
71 }
72 }
73 return 0;
74}
75
81static int any_shared_split_points(const std::vector<SEAM *> &seams, SEAM *seam) {
82 int length;
83 int index;
84
85 length = seams.size();
86 for (index = 0; index < length; index++) {
87 if (seam->SharesPosition(*seams[index])) {
88 return true;
89 }
90 }
91 return false;
92}
93
99static void preserve_outline(EDGEPT *start) {
100 EDGEPT *srcpt;
101
102 if (start == nullptr) {
103 return;
104 }
105 srcpt = start;
106 do {
107 srcpt->runlength = 1;
108 srcpt = srcpt->next;
109 } while (srcpt != start);
110 srcpt->runlength = 2;
111}
112
113static void preserve_outline_tree(TESSLINE *srcline) {
114 TESSLINE *outline;
115
116 for (outline = srcline; outline != nullptr; outline = outline->next) {
117 preserve_outline(outline->loop);
118 }
119}
120
126static EDGEPT *restore_outline(EDGEPT *start) {
127 EDGEPT *srcpt;
128 EDGEPT *real_start;
129
130 if (start == nullptr) {
131 return nullptr;
132 }
133 srcpt = start;
134 do {
135 if (srcpt->runlength == 2) {
136 break;
137 }
138 srcpt = srcpt->next;
139 } while (srcpt != start);
140 real_start = srcpt;
141 do {
142 srcpt = srcpt->next;
143 if (srcpt->prev->runlength == 0) {
144 remove_edgept(srcpt->prev);
145 }
146 } while (srcpt != real_start);
147 return real_start;
148}
149
150static void restore_outline_tree(TESSLINE *srcline) {
151 TESSLINE *outline;
152
153 for (outline = srcline; outline != nullptr; outline = outline->next) {
154 outline->loop = restore_outline(outline->loop);
155 outline->start = outline->loop->pos;
156 }
157}
158
159/**********************************************************************
160 * total_containment
161 *
162 * Check to see if one of these outlines is totally contained within
163 * the bounding box of the other.
164 **********************************************************************/
165static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
166 TBOX box1 = blob1->bounding_box();
167 TBOX box2 = blob2->bounding_box();
168 return box1.contains(box2) || box2.contains(box1);
169}
170
171// Helper runs all the checks on a seam to make sure it is valid.
172// Returns the seam if OK, otherwise deletes the seam and returns nullptr.
173static SEAM *CheckSeam(int debug_level, int32_t blob_number, TWERD *word, TBLOB *blob,
174 TBLOB *other_blob, const std::vector<SEAM *> &seams, SEAM *seam) {
175 if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr ||
176 total_containment(blob, other_blob) || check_blob(other_blob) ||
177 !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
178 any_shared_split_points(seams, seam) ||
179 !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
180 word->blobs.erase(word->blobs.begin() + blob_number + 1);
181 if (seam) {
182 seam->UndoSeam(blob, other_blob);
183 delete seam;
184 seam = nullptr;
185#ifndef GRAPHICS_DISABLED
186 if (debug_level) {
187 if (debug_level > 2) {
189 }
190 tprintf("\n** seam being removed ** \n");
191 }
192#endif
193 } else {
194 delete other_blob;
195 }
196 return nullptr;
197 }
198 return seam;
199}
200
207SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob,
208 const std::vector<SEAM *> &seams) {
209 if (repair_unchopped_blobs) {
210 preserve_outline_tree(blob->outlines);
211 }
212 TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
213 // Insert it into the word.
214 word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
215
216 SEAM *seam = nullptr;
217 if (prioritize_division) {
218 TPOINT location;
219 if (divisible_blob(blob, italic_blob, &location)) {
220 seam = new SEAM(0.0f, location);
221 }
222 }
223 if (seam == nullptr) {
224 seam = pick_good_seam(blob);
225 }
226 if (chop_debug) {
227 if (seam != nullptr) {
228 seam->Print("Good seam picked=");
229 } else {
230 tprintf("\n** no seam picked *** \n");
231 }
232 }
233 if (seam) {
234 seam->ApplySeam(italic_blob, blob, other_blob);
235 }
236
237 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
238 if (seam == nullptr) {
239 if (repair_unchopped_blobs) {
240 restore_outline_tree(blob->outlines);
241 }
242 if (allow_blob_division && !prioritize_division) {
243 // If the blob can simply be divided into outlines, then do that.
244 TPOINT location;
245 if (divisible_blob(blob, italic_blob, &location)) {
246 other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
247 word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
248 seam = new SEAM(0.0f, location);
249 seam->ApplySeam(italic_blob, blob, other_blob);
250 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
251 }
252 }
253 }
254 if (seam != nullptr) {
255 // Make sure this seam doesn't get chopped again.
256 seam->Finalize();
257 }
258 return seam;
259}
260
261SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob,
262 const std::vector<SEAM *> &seams) {
263 return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams);
264}
265
266SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX> &boxes, bool italic_blob,
267 WERD_RES *word_res, unsigned *blob_number) {
268 TWERD *word = word_res->chopped_word;
269 for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
270 TBLOB *blob = word->blobs[*blob_number];
271 TPOINT topleft, botright;
272 topleft.x = blob->bounding_box().left();
273 topleft.y = blob->bounding_box().top();
274 botright.x = blob->bounding_box().right();
275 botright.y = blob->bounding_box().bottom();
276
277 TPOINT original_topleft, original_botright;
278 word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
279 word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
280
281 TBOX original_box =
282 TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y);
283
284 bool almost_equal_box = false;
285 int num_overlap = 0;
286 for (auto &&boxe : boxes) {
287 if (original_box.overlap_fraction(boxe) > 0.125) {
288 num_overlap++;
289 }
290 if (original_box.almost_equal(boxe, 3)) {
291 almost_equal_box = true;
292 }
293 }
294
295 TPOINT location;
296 if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) {
297 SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array);
298 if (seam != nullptr) {
299 return seam;
300 }
301 }
302 }
303
304 *blob_number = UINT_MAX;
305 return nullptr;
306}
307
320SEAM *Wordrec::improve_one_blob(const std::vector<BLOB_CHOICE *> &blob_choices, DANGERR *fixpt,
321 bool split_next_to_fragment, bool italic_blob, WERD_RES *word,
322 unsigned *blob_number) {
323 float rating_ceiling = FLT_MAX;
324 SEAM *seam = nullptr;
325 do {
326 auto blob = select_blob_to_split_from_fixpt(fixpt);
327 if (chop_debug) {
328 tprintf("blob_number from fixpt = %d\n", blob);
329 }
330 bool split_point_from_dict = (blob != -1);
331 if (split_point_from_dict) {
332 fixpt->clear();
333 } else {
334 blob = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
335 }
336 if (chop_debug) {
337 tprintf("blob_number = %d\n", blob);
338 }
339 *blob_number = blob;
340 if (blob == -1) {
341 return nullptr;
342 }
343
344 // TODO(rays) it may eventually help to allow italic_blob to be true,
345 seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array);
346 if (seam != nullptr) {
347 return seam; // Success!
348 }
349 if (blob_choices[*blob_number] == nullptr) {
350 return nullptr;
351 }
352 if (!split_point_from_dict) {
353 // We chopped the worst rated blob, try something else next time.
354 rating_ceiling = blob_choices[*blob_number]->rating();
355 }
356 } while (true);
357 return seam;
358}
359
367SEAM *Wordrec::chop_one_blob(const std::vector<TBOX> &boxes,
368 const std::vector<BLOB_CHOICE *> &blob_choices, WERD_RES *word_res,
369 unsigned *blob_number) {
370 if (prioritize_division) {
371 return chop_overlapping_blob(boxes, true, word_res, blob_number);
372 } else {
373 return improve_one_blob(blob_choices, nullptr, false, true, word_res, blob_number);
374 }
375}
376
386 int num_blobs = word->chopped_word->NumBlobs();
387 if (word->ratings == nullptr) {
388 word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
389 }
390 if (word->ratings->get(0, 0) == nullptr) {
391 // Run initial classification.
392 for (int b = 0; b < num_blobs; ++b) {
393 BLOB_CHOICE_LIST *choices = classify_piece(
394 word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle);
395 word->ratings->put(b, b, choices);
396 }
397 } else {
398 // Blobs have been pre-classified. Set matrix cell for all blob choices
399 for (int col = 0; col < word->ratings->dimension(); ++col) {
400 for (int row = col;
401 row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) {
402 BLOB_CHOICE_LIST *choices = word->ratings->get(col, row);
403 if (choices != nullptr) {
404 BLOB_CHOICE_IT bc_it(choices);
405 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
406 bc_it.data()->set_matrix_cell(col, row);
407 }
408 }
409 }
410 }
411 }
412
413 // Run Segmentation Search.
414 BestChoiceBundle best_choice_bundle(word->ratings->dimension());
415 SegSearch(word, &best_choice_bundle, word->blamer_bundle);
416
417 if (word->best_choice == nullptr) {
418 // SegSearch found no valid paths, so just use the leading diagonal.
420 }
421 word->RebuildBestState();
422 // If we finished without a hyphen at the end of the word, let the next word
423 // be found in the dictionary.
424 if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) {
426 }
427
428 if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
429 CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle);
430 }
431 if (wordrec_debug_level > 0) {
432 tprintf("Final Ratings Matrix:\n");
433 word->ratings->print(getDict().getUnicharset());
434 }
435 word->FilterWordChoices(getDict().stopper_debug_level);
436}
437
445void Wordrec::improve_by_chopping(float rating_cert_scale, WERD_RES *word,
446 BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle,
447 LMPainPoints *pain_points,
448 std::vector<SegSearchPending> *pending) {
449 unsigned blob_number;
450 do { // improvement loop.
451 // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
452 // one to chop.
453 std::vector<BLOB_CHOICE *> blob_choices;
454 int num_blobs = word->ratings->dimension();
455 for (int i = 0; i < num_blobs; ++i) {
456 BLOB_CHOICE_LIST *choices = word->ratings->get(i, i);
457 if (choices == nullptr || choices->empty()) {
458 blob_choices.push_back(nullptr);
459 } else {
460 BLOB_CHOICE_IT bc_it(choices);
461 blob_choices.push_back(bc_it.data());
462 }
463 }
464 SEAM *seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, false, false, word,
465 &blob_number);
466 if (seam == nullptr) {
467 break;
468 }
469 // A chop has been made. We have to correct all the data structures to
470 // take into account the extra bottom-level blob.
471 // Put the seam into the seam_array and correct everything else on the
472 // word: ratings matrix (including matrix location in the BLOB_CHOICES),
473 // states in WERD_CHOICEs, and blob widths.
474 word->InsertSeam(blob_number, seam);
475 // Insert a new entry in the beam array.
476 best_choice_bundle->beam.insert(best_choice_bundle->beam.begin() + blob_number, new LanguageModelState);
477 // Fixpts are outdated, but will get recalculated.
478 best_choice_bundle->fixpt.clear();
479 // Remap existing pain points.
480 pain_points->RemapForSplit(blob_number);
481 // Insert a new pending at the chop point.
482 pending->insert(pending->begin() + blob_number, SegSearchPending());
483
484 // Classify the two newly created blobs using ProcessSegSearchPainPoint,
485 // as that updates the pending correctly and adds new pain points.
486 MATRIX_COORD pain_point(blob_number, blob_number);
487 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, pain_points, blamer_bundle);
488 pain_point.col = blob_number + 1;
489 pain_point.row = blob_number + 1;
490 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, pain_points, blamer_bundle);
491 if (language_model_->language_model_ngram_on) {
492 // N-gram evaluation depends on the number of blobs in a chunk, so we
493 // have to re-evaluate everything in the word.
494 ResetNGramSearch(word, best_choice_bundle, *pending);
495 blob_number = 0;
496 }
497 // Run language model incrementally. (Except with the n-gram model on.)
498 UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, pain_points,
499 best_choice_bundle, blamer_bundle);
500 } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks);
501
502 // If after running only the chopper best_choice is incorrect and no blame
503 // has been yet set, blame the classifier if best_choice is classifier's
504 // top choice and is a dictionary word (i.e. language model could not have
505 // helped). Otherwise blame the tradeoff between the classifier and
506 // the old language model (permuters).
507 if (word->blamer_bundle != nullptr &&
510 bool valid_permuter = word->best_choice != nullptr &&
512 word->blamer_bundle->BlameClassifierOrLangModel(word, getDict().getUnicharset(), valid_permuter,
513 wordrec_debug_blamer);
514 }
515}
516
517/**********************************************************************
518 * select_blob_to_split
519 *
520 * These are the results of the last classification. Find a likely
521 * place to apply splits. If none, return -1.
522 **********************************************************************/
523int Wordrec::select_blob_to_split(const std::vector<BLOB_CHOICE *> &blob_choices,
524 float rating_ceiling, bool split_next_to_fragment) {
525 BLOB_CHOICE *blob_choice;
526 float worst = -FLT_MAX;
527 int worst_index = -1;
528 float worst_near_fragment = -FLT_MAX;
529 int worst_index_near_fragment = -1;
530 std::vector<const CHAR_FRAGMENT *> fragments;
531
532 if (chop_debug) {
533 if (rating_ceiling < FLT_MAX) {
534 tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
535 } else {
536 tprintf("rating_ceiling = No Limit\n");
537 }
538 }
539
540 if (split_next_to_fragment && blob_choices.size() > 0) {
541 fragments.resize(blob_choices.size());
542 if (blob_choices[0] != nullptr) {
543 fragments[0] = getDict().getUnicharset().get_fragment(blob_choices[0]->unichar_id());
544 } else {
545 fragments[0] = nullptr;
546 }
547 }
548
549 for (unsigned x = 0; x < blob_choices.size(); ++x) {
550 if (blob_choices[x] == nullptr) {
551 return x;
552 } else {
553 blob_choice = blob_choices[x];
554 // Populate fragments for the following position.
555 if (split_next_to_fragment && x + 1 < blob_choices.size()) {
556 if (blob_choices[x + 1] != nullptr) {
557 fragments[x + 1] =
558 getDict().getUnicharset().get_fragment(blob_choices[x + 1]->unichar_id());
559 } else {
560 fragments[x + 1] = nullptr;
561 }
562 }
563 if (blob_choice->rating() < rating_ceiling &&
564 blob_choice->certainty() < tessedit_certainty_threshold) {
565 // Update worst and worst_index.
566 if (blob_choice->rating() > worst) {
567 worst_index = x;
568 worst = blob_choice->rating();
569 }
570 if (split_next_to_fragment) {
571 // Update worst_near_fragment and worst_index_near_fragment.
572 bool expand_following_fragment =
573 (x + 1 < blob_choices.size() && fragments[x + 1] != nullptr &&
574 !fragments[x + 1]->is_beginning());
575 bool expand_preceding_fragment =
576 (x > 0 && fragments[x - 1] != nullptr && !fragments[x - 1]->is_ending());
577 if ((expand_following_fragment || expand_preceding_fragment) &&
578 blob_choice->rating() > worst_near_fragment) {
579 worst_index_near_fragment = x;
580 worst_near_fragment = blob_choice->rating();
581 if (chop_debug) {
582 tprintf(
583 "worst_index_near_fragment=%d"
584 " expand_following_fragment=%d"
585 " expand_preceding_fragment=%d\n",
586 worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment);
587 }
588 }
589 }
590 }
591 }
592 }
593 // TODO(daria): maybe a threshold of badness for
594 // worst_near_fragment would be useful.
595 return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index;
596}
597
598/**********************************************************************
599 * select_blob_to_split_from_fixpt
600 *
601 * Given the fix point from a dictionary search, if there is a single
602 * dangerous blob that maps to multiple characters, return that blob
603 * index as a place we need to split. If none, return -1.
604 **********************************************************************/
606 if (!fixpt) {
607 return -1;
608 }
609 for (auto &i : *fixpt) {
610 if (i.begin + 1 == i.end && i.dangerous && i.correct_is_ngram) {
611 return i.begin;
612 }
613 }
614 return -1;
615}
616
617} // namespace tesseract
@ TBOX
@ W_EOL
end of line
Definition: werd.h:35
void remove_edgept(EDGEPT *point)
Definition: split.cpp:199
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ IRR_CORRECT
Definition: blamer.h:58
std::vector< DANGERR_INFO > DANGERR
Definition: stopper.h:47
@ TOP_CHOICE_PERM
Definition: ratngs.h:238
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:923
void display_blob(TBLOB *blob, ScrollView::Color color)
Definition: render.cpp:54
T get(ICOORD pos) const
Definition: matrix.h:268
void put(ICOORD pos, const T &thing)
Definition: matrix.h:260
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:363
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:131
TDimension x
Definition: blobs.h:89
TDimension y
Definition: blobs.h:90
EDGEPT * next
Definition: blobs.h:200
TBOX bounding_box() const
Definition: blobs.cpp:466
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:342
TESSLINE * outlines
Definition: blobs.h:404
std::vector< TBLOB * > blobs
Definition: blobs.h:462
unsigned NumBlobs() const
Definition: blobs.h:449
int dimension() const
Definition: matrix.h:612
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:115
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:401
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:930
WERD_CHOICE * best_choice
Definition: pageres.h:239
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:518
TWERD * chopped_word
Definition: pageres.h:210
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:419
BlamerBundle * blamer_bundle
Definition: pageres.h:250
void RebuildBestState()
Definition: pageres.cpp:837
const UNICHARSET * uch_set
Definition: pageres.h:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:247
MATRIX * ratings
Definition: pageres.h:235
std::vector< SEAM * > seam_array
Definition: pageres.h:212
float certainty() const
Definition: ratngs.h:87
float rating() const
Definition: ratngs.h:84
uint8_t permuter() const
Definition: ratngs.h:331
TDimension left() const
Definition: rect.h:82
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:272
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
double overlap_fraction(const TBOX &box) const
Definition: rect.h:396
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:106
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:122
void Finalize()
Definition: seam.h:126
void Print(const char *label) const
Definition: seam.cpp:144
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:768
virtual Dict & getDict()
Definition: classify.h:98
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:27
const UNICHARSET & getUnicharset() const
Definition: dict.h:104
void RemapForSplit(int index)
Struct to store information maintained by various language model components.
Definition: lm_state.h:204
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:226
std::vector< LanguageModelState * > beam
Definition: lm_state.h:246
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
Definition: lm_state.h:242
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
Definition: chopper.cpp:445
SEAM * improve_one_blob(const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
Definition: chopper.cpp:320
SEAM * chop_one_blob(const std::vector< TBOX > &boxes, const std::vector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:367
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:240
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:162
int select_blob_to_split(const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:523
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:261
SEAM * chop_overlapping_blob(const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:266
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:385
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:222
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:605
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:207
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:49
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
Definition: segsearch.cpp:279
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:214
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:33
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:382