tesseract v5.3.3.20231005
blamer.cpp
Go to the documentation of this file.
1
2// File: blamer.cpp
3// Description: Module allowing precise error causes to be allocated.
4// Author: Rike Antonova
5// Refactored: Ray Smith
6//
7// (C) Copyright 2013, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "blamer.h"
21
22#include "blobs.h" // for TPOINT, TWERD, TBLOB
23#include "errcode.h" // for ASSERT_HOST
24#if !defined(DISABLED_LEGACY_ENGINE)
25# include "lm_pain_points.h" // for LMPainPoints
26#endif
27#include "matrix.h" // for MATRIX
28#include "normalis.h" // for DENORM
29#include "pageres.h" // for WERD_RES
30#include "unicharset.h" // for UNICHARSET
31
32#include <cmath> // for abs
33#include <cstdlib> // for abs
34
35namespace tesseract {
36
37// Names for each value of IncorrectResultReason enum. Keep in sync.
38const char kBlameCorrect[] = "corr";
39const char kBlameClassifier[] = "cl";
40const char kBlameChopper[] = "chop";
41const char kBlameClassLMTradeoff[] = "cl/LM";
42const char kBlamePageLayout[] = "pglt";
43const char kBlameSegsearchHeur[] = "ss_heur";
44const char kBlameSegsearchPP[] = "ss_pp";
45const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
46const char kBlameAdaption[] = "adapt";
47const char kBlameNoTruthSplit[] = "no_tr_spl";
48const char kBlameNoTruth[] = "no_tr";
49const char kBlameUnknown[] = "unkn";
50
51const char *const kIncorrectResultReasonNames[] = {
55
58}
59
60const char *BlamerBundle::IncorrectReason() const {
61 return kIncorrectResultReasonNames[incorrect_result_reason_];
62}
63
64// Functions to setup the blamer.
65// Whole word string, whole word bounding box.
66void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_str,
67 const TBOX &word_box) {
68 truth_word_.InsertBox(0, word_box);
69 truth_has_char_boxes_ = false;
70 // Encode the string as UNICHAR_IDs.
71 std::vector<UNICHAR_ID> encoding;
72 std::vector<char> lengths;
73 unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
74 int total_length = 0;
75 for (size_t i = 0; i < encoding.size(); total_length += lengths[i++]) {
76 std::string uch(truth_str + total_length);
77 uch.resize(lengths[i] - total_length);
78 UNICHAR_ID id = encoding[i];
79 if (id != INVALID_UNICHAR_ID) {
80 uch = unicharset.get_normed_unichar(id);
81 }
82 truth_text_.push_back(uch);
83 }
84}
85
86// Single "character" string, "character" bounding box.
87// May be called multiple times to indicate the characters in a word.
88void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str,
89 const TBOX &char_box) {
90 std::string symbol_str(char_str);
91 UNICHAR_ID id = unicharset.unichar_to_id(char_str);
92 if (id != INVALID_UNICHAR_ID) {
93 std::string normed_uch(unicharset.get_normed_unichar(id));
94 if (normed_uch.length() > 0) {
95 symbol_str = normed_uch;
96 }
97 }
98 int length = truth_word_.length();
99 truth_text_.push_back(symbol_str);
100 truth_word_.InsertBox(length, char_box);
101 if (length == 0) {
102 truth_has_char_boxes_ = true;
103 } else if (truth_word_.BlobBox(length - 1) == char_box) {
104 truth_has_char_boxes_ = false;
105 }
106}
107
108// Marks that there is something wrong with the truth text, like it contains
109// reject characters.
111 incorrect_result_reason_ = IRR_NO_TRUTH;
112 truth_has_char_boxes_ = false;
113}
114
115// Returns true if the provided word_choice is correct.
116bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
117 if (word_choice == nullptr) {
118 return false;
119 }
120 const UNICHARSET *uni_set = word_choice->unicharset();
121 std::string normed_choice_str;
122 for (unsigned i = 0; i < word_choice->length(); ++i) {
123 normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i));
124 }
125 std::string truth_str = TruthString();
126 return truth_str == normed_choice_str;
127}
128
129void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug) {
130 debug += "Truth ";
131 for (auto &text : this->truth_text_) {
132 debug += text;
133 }
134 if (!this->truth_has_char_boxes_) {
135 debug += " (no char boxes)";
136 }
137 if (choice != nullptr) {
138 debug += " Choice ";
139 std::string choice_str;
140 choice->string_and_lengths(&choice_str, nullptr);
141 debug += choice_str;
142 }
143 if (msg.length() > 0) {
144 debug += "\n";
145 debug += msg;
146 }
147 debug += "\n";
148}
149
150// Sets up the norm_truth_word from truth_word using the given DENORM.
152 // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
153 norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
154 TPOINT topleft;
155 TPOINT botright;
156 TPOINT norm_topleft;
157 TPOINT norm_botright;
158 for (unsigned b = 0; b < truth_word_.length(); ++b) {
159 const TBOX &box = truth_word_.BlobBox(b);
160 topleft.x = box.left();
161 topleft.y = box.top();
162 botright.x = box.right();
163 botright.y = box.bottom();
164 denorm.NormTransform(nullptr, topleft, &norm_topleft);
165 denorm.NormTransform(nullptr, botright, &norm_botright);
166 TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y);
167 norm_truth_word_.InsertBox(b, norm_box);
168 }
169}
170
171// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
172// bundles) where the right edge/ of the left-hand word is word1_right,
173// and the left edge of the right-hand word is word2_left.
174void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
175 BlamerBundle *bundle2) const {
176 std::string debug_str;
177 // Find truth boxes that correspond to the split in the blobs.
178 unsigned begin2_truth_index = 0;
179 if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
180 debug_str = "Looking for truth split at";
181 debug_str += " end1_x " + std::to_string(word1_right);
182 debug_str += " begin2_x " + std::to_string(word2_left);
183 debug_str += "\nnorm_truth_word boxes:\n";
184 if (norm_truth_word_.length() > 1) {
185 norm_truth_word_.BlobBox(0).print_to_str(debug_str);
186 for (unsigned b = 1; b < norm_truth_word_.length(); ++b) {
187 norm_truth_word_.BlobBox(b).print_to_str(debug_str);
188 if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
189 (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
190 begin2_truth_index = b;
191 debug_str += "Split found";
192 break;
193 }
194 }
195 debug_str += '\n';
196 }
197 }
198 // Populate truth information in word and word2 with the first and second
199 // part of the original truth.
200 if (begin2_truth_index > 0) {
201 bundle1->truth_has_char_boxes_ = true;
202 bundle1->norm_box_tolerance_ = norm_box_tolerance_;
203 bundle2->truth_has_char_boxes_ = true;
204 bundle2->norm_box_tolerance_ = norm_box_tolerance_;
205 BlamerBundle *curr_bb = bundle1;
206 for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
207 if (b == begin2_truth_index) {
208 curr_bb = bundle2;
209 }
210 curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
211 curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
212 curr_bb->truth_text_.push_back(truth_text_[b]);
213 }
214 } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
215 bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
216 bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
217 } else {
218 debug_str += "Truth split not found";
219 debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n";
220 bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
221 bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
222 }
223}
224
225// "Joins" the blames from bundle1 and bundle2 into *this.
226void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2,
227 bool debug) {
228 std::string debug_str;
229 IncorrectResultReason irr = incorrect_result_reason_;
230 if (irr != IRR_NO_TRUTH_SPLIT) {
231 debug_str = "";
232 }
233 if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
234 bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
235 bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
236 debug_str += "Blame from part 1: ";
237 debug_str += bundle1.debug_;
238 irr = bundle1.incorrect_result_reason_;
239 }
240 if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
241 bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
242 bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
243 debug_str += "Blame from part 2: ";
244 debug_str += bundle2.debug_;
245 if (irr == IRR_CORRECT) {
246 irr = bundle2.incorrect_result_reason_;
247 } else if (irr != bundle2.incorrect_result_reason_) {
248 irr = IRR_UNKNOWN;
249 }
250 }
251 incorrect_result_reason_ = irr;
252 if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
253 SetBlame(irr, debug_str, nullptr, debug);
254 }
255}
256
257// If a blob with the same bounding box as one of the truth character
258// bounding boxes is not classified as the corresponding truth character
259// blames character classifier for incorrect answer.
260void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
261 const BLOB_CHOICE_LIST &choices, bool debug) {
262 if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
263 return; // Nothing to do here.
264 }
265
266 for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
267 const TBOX &truth_box = norm_truth_word_.BlobBox(b);
268 // Note that we are more strict on the bounding box boundaries here
269 // than in other places (chopper, segmentation search), since we do
270 // not have the ability to check the previous and next bounding box.
271 if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
272 bool found = false;
273 bool incorrect_adapted = false;
274 UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
275 const char *truth_str = truth_text_[b].c_str();
276 // We promise not to modify the list or its contents, using a
277 // const BLOB_CHOICE* below.
278 BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices));
279 for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
280 const BLOB_CHOICE *choice = choices_it.data();
281 if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) {
282 found = true;
283 break;
284 } else if (choice->IsAdapted()) {
285 incorrect_adapted = true;
286 incorrect_adapted_id = choice->unichar_id();
287 }
288 } // end choices_it for loop
289 if (!found) {
290 std::string debug_str = "unichar ";
291 debug_str += truth_str;
292 debug_str += " not found in classification list";
293 SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
294 } else if (incorrect_adapted) {
295 std::string debug_str = "better rating for adapted ";
296 debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
297 debug_str += " than for correct ";
298 debug_str += truth_str;
299 SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
300 }
301 break;
302 }
303 } // end iterating over blamer_bundle->norm_truth_word
304}
305
306// Checks whether chops were made at all the character bounding box
307// boundaries in word->truth_word. If not - blames the chopper for an
308// incorrect answer.
309void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
310 if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) {
311 return;
312 }
313 bool missing_chop = false;
314 int num_blobs = word->chopped_word->blobs.size();
315 unsigned box_index = 0;
316 int blob_index = 0;
317 int16_t truth_x = -1;
318 while (box_index < truth_word_.length() && blob_index < num_blobs) {
319 truth_x = norm_truth_word_.BlobBox(box_index).right();
320 TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
321 if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
322 ++blob_index;
323 continue; // encountered an extra chop, keep looking
324 } else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) {
325 missing_chop = true;
326 break;
327 } else {
328 ++blob_index;
329 }
330 }
331 if (missing_chop || box_index < norm_truth_word_.length()) {
332 std::string debug_str;
333 if (missing_chop) {
334 debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
335 debug_str += ") at Bounding Box=";
336 TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
337 curr_blob->bounding_box().print_to_str(debug_str);
338 debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
339 } else {
340 debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
341 debug_str += " truth box(es)";
342 }
343 debug_str += "\nMaximally chopped word boxes:\n";
344 for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
345 TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
346 curr_blob->bounding_box().print_to_str(debug_str);
347 debug_str += '\n';
348 }
349 debug_str += "Truth bounding boxes:\n";
350 for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
351 norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
352 debug_str += '\n';
353 }
354 SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
355 }
356}
357
358// Blames the classifier or the language model if, after running only the
359// chopper, best_choice is incorrect and no blame has been yet set.
360// Blames the classifier if best_choice is classifier's top choice and is a
361// dictionary word (i.e. language model could not have helped).
362// Otherwise, blames the language model (formerly permuter word adjustment).
364 bool valid_permuter, bool debug) {
365 if (valid_permuter) {
366 // Find out whether best choice is a top choice.
367 best_choice_is_dict_and_top_choice_ = true;
368 for (unsigned i = 0; i < word->best_choice->length(); ++i) {
369 BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
370 ASSERT_HOST(!blob_choice_it.empty());
371 BLOB_CHOICE *first_choice = nullptr;
372 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
373 blob_choice_it.forward()) { // find first non-fragment choice
374 if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
375 first_choice = blob_choice_it.data();
376 break;
377 }
378 }
379 ASSERT_HOST(first_choice != nullptr);
380 if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
381 best_choice_is_dict_and_top_choice_ = false;
382 break;
383 }
384 }
385 }
386 std::string debug_str;
387 if (best_choice_is_dict_and_top_choice_) {
388 debug_str = "Best choice is: incorrect, top choice, dictionary word";
389 debug_str += " with permuter ";
390 debug_str += word->best_choice->permuter_name();
391 } else {
392 debug_str = "Classifier/Old LM tradeoff is to blame";
393 }
394 SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF,
395 debug_str, word->best_choice, debug);
396}
397
398// Sets up the correct_segmentation_* to mark the correct bounding boxes.
399void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
400#ifndef DISABLED_LEGACY_ENGINE
401 params_training_bundle_.StartHypothesisList();
402#endif // ndef DISABLED_LEGACY_ENGINE
403 if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
404 return; // Nothing to do here.
405 }
406
407 std::string debug_str = "Blamer computing correct_segmentation_cols\n";
408 int curr_box_col = 0;
409 int next_box_col = 0;
410 int num_blobs = word->NumBlobs();
411 if (num_blobs == 0) {
412 return; // No blobs to play with.
413 }
414 int blob_index = 0;
415 int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
416 for (unsigned truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
417 ++blob_index) {
418 ++next_box_col;
419 int16_t curr_box_x = next_box_x;
420 if (blob_index + 1 < num_blobs) {
421 next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
422 }
423 int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
424 debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
425 debug_str += " " + std::to_string(truth_x);
426 debug_str += "\n";
427 if (curr_box_x > (truth_x + norm_box_tolerance_)) {
428 break; // failed to find a matching box
429 } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
430 (blob_index + 1 >= num_blobs || // next box can't be included
431 next_box_x > truth_x + norm_box_tolerance_)) {
432 correct_segmentation_cols_.push_back(curr_box_col);
433 correct_segmentation_rows_.push_back(next_box_col - 1);
434 ++truth_idx;
435 debug_str += "col=" + std::to_string(curr_box_col);
436 debug_str += " row=" + std::to_string(next_box_col - 1);
437 debug_str += "\n";
438 curr_box_col = next_box_col;
439 }
440 }
441 if (blob_index < num_blobs || // trailing blobs
442 correct_segmentation_cols_.size() != norm_truth_word_.length()) {
443 debug_str +=
444 "Blamer failed to find correct segmentation"
445 " (tolerance=" +
446 std::to_string(norm_box_tolerance_);
447 if (blob_index >= num_blobs) {
448 debug_str += " blob == nullptr";
449 }
450 debug_str += ")\n";
451 debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
452 debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
453 debug_str += "\n";
454 SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
455 correct_segmentation_cols_.clear();
456 correct_segmentation_rows_.clear();
457 }
458}
459
460// Returns true if a guided segmentation search is needed.
461bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
462 return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ &&
463 truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice);
464}
465
466#if !defined(DISABLED_LEGACY_ENGINE)
467// Setup ready to guide the segmentation search to the correct segmentation.
468void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
469 UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
470 tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
471 WERD_RES *word_res) {
472 segsearch_is_looking_for_blame_ = true;
473 if (debug) {
474 tprintf("segsearch starting to look for blame\n");
475 }
476 // Fill pain points for any unclassifed blob corresponding to the
477 // correct segmentation state.
478 debug_str += "Correct segmentation:\n";
479 for (unsigned idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
480 debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
481 debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
482 debug_str += "\n";
483 if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
484 wildcard_id) &&
485 !pain_points->GeneratePainPoint(
486 correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
487 tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
488 segsearch_is_looking_for_blame_ = false;
489 debug_str += "\nFailed to insert pain point\n";
490 SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
491 break;
492 }
493 } // end for blamer_bundle->correct_segmentation_cols/rows
494}
495#endif // !defined(DISABLED_LEGACY_ENGINE)
496
497// Returns true if the guided segsearch is in progress.
499 return segsearch_is_looking_for_blame_;
500}
501
502// The segmentation search has ended. Sets the blame appropriately.
503void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
504 // If we are still looking for blame (i.e. best_choice is incorrect, but a
505 // path representing the correct segmentation could be constructed), we can
506 // blame segmentation search pain point prioritization if the rating of the
507 // path corresponding to the correct segmentation is better than that of
508 // best_choice (i.e. language model would have done the correct thing, but
509 // because of poor pain point prioritization the correct segmentation was
510 // never explored). Otherwise we blame the tradeoff between the language model
511 // and the classifier, since even after exploring the path corresponding to
512 // the correct segmentation incorrect best_choice would have been chosen.
513 // One special case when we blame the classifier instead is when best choice
514 // is incorrect, but it is a dictionary word and it classifier's top choice.
515 if (segsearch_is_looking_for_blame_) {
516 segsearch_is_looking_for_blame_ = false;
517 if (best_choice_is_dict_and_top_choice_) {
518 debug_str = "Best choice is: incorrect, top choice, dictionary word";
519 debug_str += " with permuter ";
520 debug_str += best_choice->permuter_name();
521 SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
522 } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
523 debug_str += "Correct segmentation state was not explored";
524 SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
525 } else {
526 if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
527 debug_str += "Correct segmentation paths were pruned by LM\n";
528 } else {
529 debug_str += "Best correct segmentation rating " +
530 std::to_string(best_correctly_segmented_rating_);
531 debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
532 }
533 SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
534 }
535 }
536}
537
538// If the bundle is null or still does not indicate the correct result,
539// fix it and use some backup reason for the blame.
541 if (word->blamer_bundle == nullptr) {
542 word->blamer_bundle = new BlamerBundle();
543 word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug);
544 } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
545 word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug);
546 } else {
547 bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
548 IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
549 if (irr == IRR_CORRECT && !correct) {
550 std::string debug_str = "Choice is incorrect after recognition";
551 word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug);
552 } else if (irr != IRR_CORRECT && correct) {
553 if (debug) {
554 tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
555 }
556 word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
557 word->blamer_bundle->debug_ = "";
558 }
559 }
560}
561
562// Sets the misadaption debug if this word is incorrect, as this word is
563// being adapted to.
564void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug) {
565 if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) {
566 misadaption_debug_ = "misadapt to word (";
567 misadaption_debug_ += best_choice->permuter_name();
568 misadaption_debug_ += "): ";
569 FillDebugString("", best_choice, misadaption_debug_);
570 if (debug) {
571 tprintf("%s\n", misadaption_debug_.c_str());
572 }
573 }
574}
575
576} // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:54
const char kBlameNoTruthSplit[]
Definition: blamer.cpp:47
const char kBlameSegsearchHeur[]
Definition: blamer.cpp:43
const char kBlameChopper[]
Definition: blamer.cpp:40
const char kBlameUnknown[]
Definition: blamer.cpp:49
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const char kBlamePageLayout[]
Definition: blamer.cpp:42
const char kBlameSegsearchPP[]
Definition: blamer.cpp:44
const char kBlameClassOldLMTradeoff[]
Definition: blamer.cpp:45
const char kBlameNoTruth[]
Definition: blamer.cpp:48
const char kBlameClassifier[]
Definition: blamer.cpp:39
IncorrectResultReason
Definition: blamer.h:56
@ IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:90
@ IRR_SEGSEARCH_HEUR
Definition: blamer.h:80
@ IRR_CORRECT
Definition: blamer.h:58
@ IRR_SEGSEARCH_PP
Definition: blamer.h:86
@ IRR_CHOPPER
Definition: blamer.h:66
@ IRR_PAGE_LAYOUT
Definition: blamer.h:77
@ IRR_UNKNOWN
Definition: blamer.h:101
@ IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:73
@ IRR_CLASSIFIER
Definition: blamer.h:63
@ IRR_NO_TRUTH
Definition: blamer.h:98
@ IRR_NO_TRUTH_SPLIT
Definition: blamer.h:95
@ IRR_ADAPTION
Definition: blamer.h:93
int UNICHAR_ID
Definition: unichar.h:34
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:51
const char kBlameAdaption[]
Definition: blamer.cpp:46
const char kBlameClassLMTradeoff[]
Definition: blamer.cpp:41
const char kBlameCorrect[]
Definition: blamer.cpp:38
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
Definition: blamer.cpp:461
bool GuidedSegsearchStillGoing() const
Definition: blamer.cpp:498
static const char * IncorrectReasonName(IncorrectResultReason irr)
Definition: blamer.cpp:56
std::string TruthString() const
Definition: blamer.h:124
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str)
Definition: blamer.cpp:503
static void LastChanceBlame(bool debug, WERD_RES *word)
Definition: blamer.cpp:540
void SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
Definition: blamer.cpp:174
bool NoTruth() const
Definition: blamer.h:134
const std::string & debug() const
Definition: blamer.h:140
void JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
Definition: blamer.cpp:226
void SetWordTruth(const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
Definition: blamer.cpp:66
void SetChopperBlame(const WERD_RES *word, bool debug)
Definition: blamer.cpp:309
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
Definition: blamer.cpp:468
void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug)
Definition: blamer.cpp:564
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:363
const char * IncorrectReason() const
Definition: blamer.cpp:60
void SetSymbolTruth(const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
Definition: blamer.cpp:88
void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug)
Definition: blamer.cpp:129
void BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
Definition: blamer.cpp:260
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:151
void SetupCorrectSegmentation(const TWERD *word, bool debug)
Definition: blamer.cpp:399
TDimension x
Definition: blobs.h:89
TDimension y
Definition: blobs.h:90
TBOX bounding_box() const
Definition: blobs.cpp:466
std::vector< TBLOB * > blobs
Definition: blobs.h:462
unsigned NumBlobs() const
Definition: blobs.h:449
unsigned length() const
Definition: boxword.h:81
const TBOX & BlobBox(unsigned index) const
Definition: boxword.h:84
void InsertBox(unsigned index, const TBOX &box)
Definition: boxword.cpp:157
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:36
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:340
float x_scale() const
Definition: normalis.h:259
WERD_CHOICE * best_choice
Definition: pageres.h:239
TWERD * chopped_word
Definition: pageres.h:210
BlamerBundle * blamer_bundle
Definition: pageres.h:250
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:779
UNICHAR_ID unichar_id() const
Definition: ratngs.h:81
bool IsAdapted() const
Definition: ratngs.h:136
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:189
void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const
Definition: ratngs.cpp:427
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299
static const float kBadRating
Definition: ratngs.h:260
const UNICHARSET * unicharset() const
Definition: ratngs.h:281
unsigned length() const
Definition: ratngs.h:287
float rating() const
Definition: ratngs.h:312
TDimension left() const
Definition: rect.h:82
void print_to_str(std::string &str) const
Definition: rect.cpp:177
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:268
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
bool encode_string(const char *str, bool give_up_on_failure, std::vector< UNICHAR_ID > *encoding, std::vector< char > *lengths, unsigned *encoded_length) const
Definition: unicharset.cpp:239
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:768
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:859
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)