tesseract v5.3.3.20231005
stopper.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: stopper.c
3 ** Purpose: Stopping criteria for word classifier.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18#include <cctype>
19#include <cmath>
20#include <cstdio>
21#include <cstring>
22
23#include "stopper.h"
24#ifndef DISABLED_LEGACY_ENGINE
25# include "ambigs.h"
26#endif
27#include <tesseract/unichar.h>
28#include "ccutil.h"
29#include "dict.h"
30#include "helpers.h"
31#include "matchdefs.h"
32#include "pageres.h"
33#include "params.h"
34#include "ratngs.h"
35
36/*----------------------------------------------------------------------------
37 Private Code
38----------------------------------------------------------------------------*/
39
40namespace tesseract {
41
42bool Dict::AcceptableChoice(const WERD_CHOICE &best_choice,
43 XHeightConsistencyEnum xheight_consistency) {
44 float CertaintyThreshold = stopper_nondict_certainty_base;
45 int WordSize;
46
47 if (stopper_no_acceptable_choices) {
48 return false;
49 }
50
51 if (best_choice.empty()) {
52 return false;
53 }
54
55 bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
56 bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
57 bool is_case_ok = case_ok(best_choice);
58
59 if (stopper_debug_level >= 1) {
60 const char *xht = "UNKNOWN";
61 switch (xheight_consistency) {
62 case XH_GOOD:
63 xht = "NORMAL";
64 break;
65 case XH_SUBNORMAL:
66 xht = "SUBNORMAL";
67 break;
68 case XH_INCONSISTENT:
69 xht = "INCONSISTENT";
70 break;
71 default:
72 xht = "UNKNOWN";
73 }
74 tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
75 best_choice.unichar_string().c_str(), (is_valid_word ? 'y' : 'n'),
76 (is_case_ok ? 'y' : 'n'), xht, best_choice.min_x_height(), best_choice.max_x_height());
77 }
78 // Do not accept invalid words in PASS1.
79 if (reject_offset_ <= 0.0f && !is_valid_word) {
80 return false;
81 }
82 if (is_valid_word && is_case_ok) {
83 WordSize = LengthOfShortestAlphaRun(best_choice);
84 WordSize -= stopper_smallword_size;
85 if (WordSize < 0) {
86 WordSize = 0;
87 }
88 CertaintyThreshold += WordSize * stopper_certainty_per_char;
89 }
90
91 if (stopper_debug_level >= 1) {
92 tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
93 best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
94 }
95
96 if (no_dang_ambigs && best_choice.certainty() > CertaintyThreshold &&
97 xheight_consistency < XH_INCONSISTENT && UniformCertainties(best_choice)) {
98 return true;
99 } else {
100 if (stopper_debug_level >= 1) {
101 tprintf(
102 "AcceptableChoice() returned false"
103 " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
104 no_dang_ambigs, best_choice.certainty(), CertaintyThreshold,
105 UniformCertainties(best_choice));
106 }
107 return false;
108 }
109}
110
112 if (word->best_choice == nullptr) {
113 return false;
114 }
115 float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
116 int WordSize;
117
118 if (stopper_debug_level >= 1) {
119 tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
120 word->best_choice->debug_string().c_str(), (valid_word(*word->best_choice) ? 'y' : 'n'),
121 (case_ok(*word->best_choice) ? 'y' : 'n'),
122 word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
123 word->best_choices.singleton() ? 'n' : 'y');
124 }
125
126 if (word->best_choice->empty() || !word->best_choices.singleton()) {
127 return false;
128 }
129 if (valid_word(*word->best_choice) && case_ok(*word->best_choice)) {
130 WordSize = LengthOfShortestAlphaRun(*word->best_choice);
131 WordSize -= stopper_smallword_size;
132 if (WordSize < 0) {
133 WordSize = 0;
134 }
135 CertaintyThreshold += WordSize * stopper_certainty_per_char;
136 }
137
138 if (stopper_debug_level >= 1) {
139 tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ", word->best_choice->certainty(),
140 CertaintyThreshold);
141 }
142
143 if (word->best_choice->certainty() > CertaintyThreshold && !stopper_no_acceptable_choices) {
144 if (stopper_debug_level >= 1) {
145 tprintf("ACCEPTED\n");
146 }
147 return true;
148 } else {
149 if (stopper_debug_level >= 1) {
150 tprintf("REJECTED\n");
151 }
152 return false;
153 }
154}
155
156#if !defined(DISABLED_LEGACY_ENGINE)
157
158bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_replaceable,
159 MATRIX *ratings) {
160 if (stopper_debug_level > 2) {
161 tprintf("\nRunning NoDangerousAmbig() for %s\n", best_choice->debug_string().c_str());
162 }
163
164 // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
165 // for each unichar id in BestChoice.
166 BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
167 bool ambigs_found = false;
168 // For each position in best_choice:
169 // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
170 // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
171 // -- look for ambiguities corresponding to wrong_ngram in the list while
172 // adding the following unichar_ids from best_choice to wrong_ngram
173 //
174 // Repeat the above procedure twice: first time look through
175 // ambigs to be replaced and replace all the ambiguities found;
176 // second time look through dangerous ambiguities and construct
177 // ambig_blob_choices with fake a blob choice for each ambiguity
178 // and pass them to dawg_permute_and_select() to search for
179 // ambiguous words in the dictionaries.
180 //
181 // Note that during the execution of the for loop (on the first pass)
182 // if replacements are made the length of best_choice might change.
183 for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
184 bool replace = (fix_replaceable && pass == 0);
185 const UnicharAmbigsVector &table =
187 if (!replace) {
188 // Initialize ambig_blob_choices with lists containing a single
189 // unichar id for the corresponding position in best_choice.
190 // best_choice consisting from only the original letters will
191 // have a rating of 0.0.
192 for (unsigned i = 0; i < best_choice->length(); ++i) {
193 auto *lst = new BLOB_CHOICE_LIST();
194 BLOB_CHOICE_IT lst_it(lst);
195 // TODO(rays/antonova) Put real xheights and y shifts here.
196 lst_it.add_to_end(
197 new BLOB_CHOICE(best_choice->unichar_id(i), 0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
198 ambig_blob_choices.push_back(lst);
199 }
200 }
201 UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
202 int wrong_ngram_index;
203 int blob_index = 0;
204 for (unsigned i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
205 auto curr_unichar_id = best_choice->unichar_id(i);
206 if (stopper_debug_level > 2) {
207 tprintf("Looking for %s ngrams starting with %s:\n", replace ? "replaceable" : "ambiguous",
208 getUnicharset().debug_str(curr_unichar_id).c_str());
209 }
210 int num_wrong_blobs = best_choice->state(i);
211 wrong_ngram_index = 0;
212 wrong_ngram[wrong_ngram_index] = curr_unichar_id;
213 if (curr_unichar_id == INVALID_UNICHAR_ID || static_cast<size_t>(curr_unichar_id) >= table.size() ||
214 table[curr_unichar_id] == nullptr) {
215 continue; // there is no ambig spec for this unichar id
216 }
217 AmbigSpec_IT spec_it(table[curr_unichar_id]);
218 for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
219 const AmbigSpec *ambig_spec = spec_it.data();
220 wrong_ngram[wrong_ngram_index + 1] = INVALID_UNICHAR_ID;
221 int compare = UnicharIdArrayUtils::compare(wrong_ngram, ambig_spec->wrong_ngram);
222 if (stopper_debug_level > 2) {
223 tprintf("candidate ngram: ");
225 tprintf("current ngram from spec: ");
227 tprintf("comparison result: %d\n", compare);
228 }
229 if (compare == 0) {
230 // Record the place where we found an ambiguity.
231 if (fixpt != nullptr) {
232 UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
233 fixpt->push_back(DANGERR_INFO(blob_index, blob_index + num_wrong_blobs, replace,
234 getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
235 leftmost_id));
236 if (stopper_debug_level > 1) {
237 tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index, blob_index + num_wrong_blobs, false,
238 getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
239 getUnicharset().id_to_unichar(leftmost_id));
240 }
241 }
242
243 if (replace) {
244 if (stopper_debug_level > 2) {
245 tprintf("replace ambiguity with %s : ",
246 getUnicharset().id_to_unichar(ambig_spec->correct_ngram_id));
248 }
249 ReplaceAmbig(i, ambig_spec->wrong_ngram_size, ambig_spec->correct_ngram_id, best_choice,
250 ratings);
251 } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
252 // We found dang ambig - update ambig_blob_choices.
253 if (stopper_debug_level > 2) {
254 tprintf("found ambiguity: ");
256 }
257 ambigs_found = true;
258 for (int tmp_index = 0; tmp_index <= wrong_ngram_index; ++tmp_index) {
259 // Add a blob choice for the corresponding fragment of the
260 // ambiguity. These fake blob choices are initialized with
261 // negative ratings (which are not possible for real blob
262 // choices), so that dawg_permute_and_select() considers any
263 // word not consisting of only the original letters a better
264 // choice and stops searching for alternatives once such a
265 // choice is found.
266 BLOB_CHOICE_IT bc_it(ambig_blob_choices[i + tmp_index]);
267 bc_it.add_to_end(new BLOB_CHOICE(ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
268 -1, 0, 1, 0, BCC_AMBIG));
269 }
270 }
271 spec_it.forward();
272 } else if (compare == -1) {
273 unsigned next_index;
274 if (wrong_ngram_index + 1 < ambig_spec->wrong_ngram_size &&
275 ((next_index = wrong_ngram_index + 1 + i) < best_choice->length())) {
276 // Add the next unichar id to wrong_ngram and keep looking for
277 // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
278 wrong_ngram[++wrong_ngram_index] = best_choice->unichar_id(next_index);
279 num_wrong_blobs += best_choice->state(next_index);
280 } else {
281 break; // no more matching ambigs in this AMBIG_SPEC_LIST
282 }
283 } else {
284 spec_it.forward();
285 }
286 } // end searching AmbigSpec_LIST
287 } // end searching best_choice
288 } // end searching replace and dangerous ambigs
289
290 // If any ambiguities were found permute the constructed ambig_blob_choices
291 // to see if an alternative dictionary word can be found.
292 if (ambigs_found) {
293 if (stopper_debug_level > 2) {
294 tprintf("\nResulting ambig_blob_choices:\n");
295 for (unsigned i = 0; i < ambig_blob_choices.size(); ++i) {
296 print_ratings_list("", ambig_blob_choices.at(i), getUnicharset());
297 tprintf("\n");
298 }
299 }
300 WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
301 ambigs_found = (alt_word->rating() < 0.0);
302 if (ambigs_found) {
303 if (stopper_debug_level >= 1) {
304 tprintf("Stopper: Possible ambiguous word = %s\n", alt_word->debug_string().c_str());
305 }
306 if (fixpt != nullptr) {
307 // Note: Currently character choices combined from fragments can only
308 // be generated by NoDangrousAmbigs(). This code should be updated if
309 // the capability to produce classifications combined from character
310 // fragments is added to other functions.
311 int orig_i = 0;
312 for (unsigned i = 0; i < alt_word->length(); ++i) {
313 const UNICHARSET &uchset = getUnicharset();
314 bool replacement_is_ngram = uchset.get_isngram(alt_word->unichar_id(i));
315 UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
316 if (replacement_is_ngram) {
317 // we have to extract the leftmost unichar from the ngram.
318 const char *str = uchset.id_to_unichar(leftmost_id);
319 int step = uchset.step(str);
320 if (step) {
321 leftmost_id = uchset.unichar_to_id(str, step);
322 }
323 }
324 int end_i = orig_i + alt_word->state(i);
325 if (alt_word->state(i) > 1 || (orig_i + 1 == end_i && replacement_is_ngram)) {
326 // Compute proper blob indices.
327 int blob_start = 0;
328 for (int j = 0; j < orig_i; ++j) {
329 blob_start += best_choice->state(j);
330 }
331 int blob_end = blob_start;
332 for (int j = orig_i; j < end_i; ++j) {
333 blob_end += best_choice->state(j);
334 }
335 fixpt->push_back(
336 DANGERR_INFO(blob_start, blob_end, true, replacement_is_ngram, leftmost_id));
337 if (stopper_debug_level > 1) {
338 tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i, true,
339 replacement_is_ngram, uchset.id_to_unichar(leftmost_id));
340 }
341 }
342 orig_i += alt_word->state(i);
343 }
344 }
345 }
346 delete alt_word;
347 }
348 if (output_ambig_words_file_ != nullptr) {
349 fprintf(output_ambig_words_file_, "\n");
350 }
351
352 for (auto data : ambig_blob_choices) {
353 delete data;
354 }
355 return !ambigs_found;
356}
357
359
360#endif // !defined(DISABLED_LEGACY_ENGINE)
361
363 reject_offset_ = 0.0;
364}
365
367 reject_offset_ = stopper_phase2_certainty_rejection_offset;
368}
369
370void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
371 UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings) {
372 int num_blobs_to_replace = 0;
373 int begin_blob_index = 0;
374 int i;
375 // Rating and certainty for the new BLOB_CHOICE are derived from the
376 // replaced choices.
377 float new_rating = 0.0f;
378 float new_certainty = 0.0f;
379 BLOB_CHOICE *old_choice = nullptr;
380 for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
381 if (i >= wrong_ngram_begin_index) {
382 int num_blobs = werd_choice->state(i);
383 int col = begin_blob_index + num_blobs_to_replace;
384 int row = col + num_blobs - 1;
385 BLOB_CHOICE_LIST *choices = ratings->get(col, row);
386 ASSERT_HOST(choices != nullptr);
387 old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
388 ASSERT_HOST(old_choice != nullptr);
389 new_rating += old_choice->rating();
390 new_certainty += old_choice->certainty();
391 num_blobs_to_replace += num_blobs;
392 } else {
393 begin_blob_index += werd_choice->state(i);
394 }
395 }
396 new_certainty /= wrong_ngram_size;
397 // If there is no entry in the ratings matrix, add it.
398 MATRIX_COORD coord(begin_blob_index, begin_blob_index + num_blobs_to_replace - 1);
399 if (!coord.Valid(*ratings)) {
400 ratings->IncreaseBandSize(coord.row - coord.col + 1);
401 }
402 if (ratings->get(coord.col, coord.row) == nullptr) {
403 ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
404 }
405 BLOB_CHOICE_LIST *new_choices = ratings->get(coord.col, coord.row);
406 BLOB_CHOICE *choice = FindMatchingChoice(correct_ngram_id, new_choices);
407 if (choice != nullptr) {
408 // Already there. Upgrade if new rating better.
409 if (new_rating < choice->rating()) {
410 choice->set_rating(new_rating);
411 }
412 if (new_certainty < choice->certainty()) {
413 choice->set_certainty(new_certainty);
414 }
415 // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
416 } else {
417 // Need a new choice with the correct_ngram_id.
418 choice = new BLOB_CHOICE(*old_choice);
419 choice->set_unichar_id(correct_ngram_id);
420 choice->set_rating(new_rating);
421 choice->set_certainty(new_certainty);
422 choice->set_classifier(BCC_AMBIG);
423 choice->set_matrix_cell(coord.col, coord.row);
424 BLOB_CHOICE_IT it(new_choices);
425 it.add_to_end(choice);
426 }
427 // Remove current unichar from werd_choice. On the last iteration
428 // set the correct replacement unichar instead of removing a unichar.
429 for (int replaced_count = 0; replaced_count < wrong_ngram_size; ++replaced_count) {
430 if (replaced_count + 1 == wrong_ngram_size) {
431 werd_choice->set_blob_choice(wrong_ngram_begin_index, num_blobs_to_replace, choice);
432 } else {
433 werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
434 }
435 }
436 if (stopper_debug_level >= 1) {
437 werd_choice->print("ReplaceAmbig() ");
438 tprintf("Modified blob_choices: ");
439 print_ratings_list("\n", new_choices, getUnicharset());
440 }
441}
442
443int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
444 int shortest = INT32_MAX;
445 int curr_len = 0;
446 for (unsigned w = 0; w < WordChoice.length(); ++w) {
447 if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
448 curr_len++;
449 } else if (curr_len > 0) {
450 if (curr_len < shortest) {
451 shortest = curr_len;
452 }
453 curr_len = 0;
454 }
455 }
456 if (curr_len > 0 && curr_len < shortest) {
457 shortest = curr_len;
458 } else if (shortest == INT32_MAX) {
459 shortest = 0;
460 }
461 return shortest;
462}
463
465 float Certainty;
466 float WorstCertainty = FLT_MAX;
467 float CertaintyThreshold;
468 double TotalCertainty;
469 double TotalCertaintySquared;
470 double Variance;
471 float Mean, StdDev;
472 int word_length = word.length();
473
474 if (word_length < 3) {
475 return true;
476 }
477
478 TotalCertainty = TotalCertaintySquared = 0.0;
479 for (int i = 0; i < word_length; ++i) {
480 Certainty = word.certainty(i);
481 TotalCertainty += Certainty;
482 TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
483 if (Certainty < WorstCertainty) {
484 WorstCertainty = Certainty;
485 }
486 }
487
488 // Subtract off worst certainty from statistics.
489 word_length--;
490 TotalCertainty -= WorstCertainty;
491 TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
492
493 Mean = TotalCertainty / word_length;
494 Variance = ((word_length * TotalCertaintySquared - TotalCertainty * TotalCertainty) /
495 (word_length * (word_length - 1)));
496 if (Variance < 0.0) {
497 Variance = 0.0;
498 }
499 StdDev = sqrt(Variance);
500
501 CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
502 if (CertaintyThreshold > stopper_nondict_certainty_base) {
503 CertaintyThreshold = stopper_nondict_certainty_base;
504 }
505
506 if (word.certainty() < CertaintyThreshold) {
507 if (stopper_debug_level >= 1) {
508 tprintf(
509 "Stopper: Non-uniform certainty = %4.1f"
510 " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
511 word.certainty(), Mean, StdDev, CertaintyThreshold);
512 }
513 return false;
514 } else {
515 return true;
516 }
517}
518
519} // namespace tesseract
#define MAX_AMBIG_SIZE
Definition: ambigs.h:34
#define ASSERT_HOST(x)
Definition: errcode.h:54
std::vector< AmbigSpec_LIST * > UnicharAmbigsVector
Definition: ambigs.h:140
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
XHeightConsistencyEnum
Definition: dict.h:81
@ XH_GOOD
Definition: dict.h:81
@ XH_SUBNORMAL
Definition: dict.h:81
@ XH_INCONSISTENT
Definition: dict.h:81
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
float Mean(PROTOTYPE *Proto, uint16_t Dimension)
Definition: cluster.cpp:1662
int UNICHAR_ID
Definition: unichar.h:34
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:177
@ CASE_AMBIG
Definition: ambigs.h:45
@ BCC_AMBIG
Definition: ratngs.h:52
std::vector< DANGERR_INFO > DANGERR
Definition: stopper.h:47
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
Definition: ratngs.h:627
T get(ICOORD pos) const
Definition: matrix.h:268
void put(ICOORD pos, const T &thing)
Definition: matrix.h:260
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:52
bool Valid(const MATRIX &m) const
Definition: matrix.h:697
WERD_CHOICE * best_choice
Definition: pageres.h:239
WERD_CHOICE_LIST best_choices
Definition: pageres.h:247
void set_certainty(float newrat)
Definition: ratngs.h:150
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
float certainty() const
Definition: ratngs.h:87
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:160
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
void set_rating(float newrat)
Definition: ratngs.h:147
float rating() const
Definition: ratngs.h:84
float max_x_height() const
Definition: ratngs.h:324
std::string debug_string() const
Definition: ratngs.h:479
float certainty() const
Definition: ratngs.h:315
void remove_unichar_id(unsigned index)
Definition: ratngs.h:458
void set_blob_choice(unsigned index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:297
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299
bool empty() const
Definition: ratngs.h:284
uint8_t permuter() const
Definition: ratngs.h:331
bool dangerous_ambig_found() const
Definition: ratngs.h:348
unsigned state(unsigned index) const
Definition: ratngs.h:303
const UNICHARSET * unicharset() const
Definition: ratngs.h:281
float min_x_height() const
Definition: ratngs.h:321
unsigned length() const
Definition: ratngs.h:287
void print() const
Definition: ratngs.h:561
std::string & unichar_string()
Definition: ratngs.h:519
float rating() const
Definition: ratngs.h:312
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:93
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:58
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:132
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:130
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:131
AmbigType type
Definition: ambigs.h:133
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:160
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:157
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:497
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:542
int step(const char *str) const
Definition: unicharset.cpp:211
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
bool AcceptableResult(WERD_RES *word) const
Definition: stopper.cpp:111
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:464
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:366
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:443
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:370
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:42
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:159
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:111
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:158
void EndDangerousAmbigs()
Definition: stopper.cpp:358
int case_ok(const WERD_CHOICE &word) const
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:45
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:362
const UNICHARSET & getUnicharset() const
Definition: dict.h:104