All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
stopper.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: stopper.c
3  ** Purpose: Stopping criteria for word classifier.
4  ** Author: Dan Johnson
5  ** History: Mon Apr 29 14:56:49 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <math.h>
23 
24 #include "stopper.h"
25 #include "ambigs.h"
26 #include "ccutil.h"
27 #include "const.h"
28 #include "danerror.h"
29 #include "dict.h"
30 #include "efio.h"
31 #include "helpers.h"
32 #include "matchdefs.h"
33 #include "pageres.h"
34 #include "params.h"
35 #include "ratngs.h"
36 #include "scanutils.h"
37 #include "unichar.h"
38 
39 #ifdef _MSC_VER
40 #pragma warning(disable:4244) // Conversion warnings
41 #pragma warning(disable:4800) // int/bool warnings
42 #endif
43 
45 /*----------------------------------------------------------------------------
46  Private Code
47 ----------------------------------------------------------------------------*/
48 
49 namespace tesseract {
50 
51 bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
52  XHeightConsistencyEnum xheight_consistency) {
53  float CertaintyThreshold = stopper_nondict_certainty_base;
54  int WordSize;
55 
56  if (stopper_no_acceptable_choices) return false;
57 
58  if (best_choice.length() == 0) return false;
59 
60  bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
61  bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
62  bool is_case_ok = case_ok(best_choice, getUnicharset());
63 
64  if (stopper_debug_level >= 1) {
65  const char *xht = "UNKNOWN";
66  switch (xheight_consistency) {
67  case XH_GOOD: xht = "NORMAL"; break;
68  case XH_SUBNORMAL: xht = "SUBNORMAL"; break;
69  case XH_INCONSISTENT: xht = "INCONSISTENT"; break;
70  default: xht = "UNKNOWN";
71  }
72  tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
73  best_choice.unichar_string().string(),
74  (is_valid_word ? 'y' : 'n'),
75  (is_case_ok ? 'y' : 'n'),
76  xht,
77  best_choice.min_x_height(),
78  best_choice.max_x_height());
79  }
80  // Do not accept invalid words in PASS1.
81  if (reject_offset_ <= 0.0f && !is_valid_word) return false;
82  if (is_valid_word && is_case_ok) {
83  WordSize = LengthOfShortestAlphaRun(best_choice);
84  WordSize -= stopper_smallword_size;
85  if (WordSize < 0)
86  WordSize = 0;
87  CertaintyThreshold += WordSize * stopper_certainty_per_char;
88  }
89 
90  if (stopper_debug_level >= 1)
91  tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
92  best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
93 
94  if (no_dang_ambigs &&
95  best_choice.certainty() > CertaintyThreshold &&
96  xheight_consistency < XH_INCONSISTENT &&
97  UniformCertainties(best_choice)) {
98  return true;
99  } else {
100  if (stopper_debug_level >= 1) {
101  tprintf("AcceptableChoice() returned false"
102  " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
103  no_dang_ambigs, best_choice.certainty(),
104  CertaintyThreshold,
105  UniformCertainties(best_choice));
106  }
107  return false;
108  }
109 }
110 
112  if (word->best_choice == NULL) return false;
113  float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
114  int WordSize;
115 
116  if (stopper_debug_level >= 1) {
117  tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
118  word->best_choice->debug_string().string(),
119  (valid_word(*word->best_choice) ? 'y' : 'n'),
120  (case_ok(*word->best_choice, getUnicharset()) ? 'y' : 'n'),
121  word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
122  word->best_choices.singleton() ? 'n' : 'y');
123  }
124 
125  if (word->best_choice->length() == 0 || !word->best_choices.singleton())
126  return false;
127  if (valid_word(*word->best_choice) &&
128  case_ok(*word->best_choice, getUnicharset())) {
129  WordSize = LengthOfShortestAlphaRun(*word->best_choice);
130  WordSize -= stopper_smallword_size;
131  if (WordSize < 0)
132  WordSize = 0;
133  CertaintyThreshold += WordSize * stopper_certainty_per_char;
134  }
135 
136  if (stopper_debug_level >= 1)
137  tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
138  word->best_choice->certainty(), CertaintyThreshold);
139 
140  if (word->best_choice->certainty() > CertaintyThreshold &&
142  if (stopper_debug_level >= 1)
143  tprintf("ACCEPTED\n");
144  return true;
145  } else {
146  if (stopper_debug_level >= 1)
147  tprintf("REJECTED\n");
148  return false;
149  }
150 }
151 
153  DANGERR *fixpt,
154  bool fix_replaceable,
155  MATRIX *ratings) {
156  if (stopper_debug_level > 2) {
157  tprintf("\nRunning NoDangerousAmbig() for %s\n",
158  best_choice->debug_string().string());
159  }
160 
161  // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
162  // for each unichar id in BestChoice.
163  BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
164  int i;
165  bool ambigs_found = false;
166  // For each position in best_choice:
167  // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
168  // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
169  // -- look for ambiguities corresponding to wrong_ngram in the list while
170  // adding the following unichar_ids from best_choice to wrong_ngram
171  //
172  // Repeat the above procedure twice: first time look through
173  // ambigs to be replaced and replace all the ambiguities found;
174  // second time look through dangerous ambiguities and construct
175  // ambig_blob_choices with fake a blob choice for each ambiguity
176  // and pass them to dawg_permute_and_select() to search for
177  // ambiguous words in the dictionaries.
178  //
179  // Note that during the execution of the for loop (on the first pass)
180  // if replacements are made the length of best_choice might change.
181  for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
182  bool replace = (fix_replaceable && pass == 0);
183  const UnicharAmbigsVector &table = replace ?
185  if (!replace) {
186  // Initialize ambig_blob_choices with lists containing a single
187  // unichar id for the correspoding position in best_choice.
188  // best_choice consisting from only the original letters will
189  // have a rating of 0.0.
190  for (i = 0; i < best_choice->length(); ++i) {
191  BLOB_CHOICE_LIST *lst = new BLOB_CHOICE_LIST();
192  BLOB_CHOICE_IT lst_it(lst);
193  // TODO(rays/antonova) Put real xheights and y shifts here.
194  lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i),
195  0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
196  ambig_blob_choices.push_back(lst);
197  }
198  }
199  UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
200  int wrong_ngram_index;
201  int next_index;
202  int blob_index = 0;
203  for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i),
204  ++i) {
205  UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
206  if (stopper_debug_level > 2) {
207  tprintf("Looking for %s ngrams starting with %s:\n",
208  replace ? "replaceable" : "ambiguous",
209  getUnicharset().debug_str(curr_unichar_id).string());
210  }
211  int num_wrong_blobs = best_choice->state(i);
212  wrong_ngram_index = 0;
213  wrong_ngram[wrong_ngram_index] = curr_unichar_id;
214  if (curr_unichar_id == INVALID_UNICHAR_ID ||
215  curr_unichar_id >= table.size() ||
216  table[curr_unichar_id] == NULL) {
217  continue; // there is no ambig spec for this unichar id
218  }
219  AmbigSpec_IT spec_it(table[curr_unichar_id]);
220  for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
221  const AmbigSpec *ambig_spec = spec_it.data();
222  wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID;
223  int compare = UnicharIdArrayUtils::compare(wrong_ngram,
224  ambig_spec->wrong_ngram);
225  if (stopper_debug_level > 2) {
226  tprintf("candidate ngram: ");
228  tprintf("current ngram from spec: ");
230  tprintf("comparison result: %d\n", compare);
231  }
232  if (compare == 0) {
233  // Record the place where we found an ambiguity.
234  if (fixpt != NULL) {
235  UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
236  fixpt->push_back(DANGERR_INFO(
237  blob_index, blob_index + num_wrong_blobs, replace,
238  getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
239  leftmost_id));
240  if (stopper_debug_level > 1) {
241  tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index,
242  blob_index + num_wrong_blobs, false,
243  getUnicharset().get_isngram(
244  ambig_spec->correct_ngram_id),
245  getUnicharset().id_to_unichar(leftmost_id));
246  }
247  }
248 
249  if (replace) {
250  if (stopper_debug_level > 2) {
251  tprintf("replace ambiguity with %s : ",
252  getUnicharset().id_to_unichar(
253  ambig_spec->correct_ngram_id));
255  ambig_spec->correct_fragments, getUnicharset());
256  }
257  ReplaceAmbig(i, ambig_spec->wrong_ngram_size,
258  ambig_spec->correct_ngram_id,
259  best_choice, ratings);
260  } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
261  // We found dang ambig - update ambig_blob_choices.
262  if (stopper_debug_level > 2) {
263  tprintf("found ambiguity: ");
265  ambig_spec->correct_fragments, getUnicharset());
266  }
267  ambigs_found = true;
268  for (int tmp_index = 0; tmp_index <= wrong_ngram_index;
269  ++tmp_index) {
270  // Add a blob choice for the corresponding fragment of the
271  // ambiguity. These fake blob choices are initialized with
272  // negative ratings (which are not possible for real blob
273  // choices), so that dawg_permute_and_select() considers any
274  // word not consisting of only the original letters a better
275  // choice and stops searching for alternatives once such a
276  // choice is found.
277  BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]);
278  bc_it.add_to_end(new BLOB_CHOICE(
279  ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
280  -1, 0, 1, 0, BCC_AMBIG));
281  }
282  }
283  spec_it.forward();
284  } else if (compare == -1) {
285  if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size &&
286  ((next_index = wrong_ngram_index+1+i) < best_choice->length())) {
287  // Add the next unichar id to wrong_ngram and keep looking for
288  // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
289  wrong_ngram[++wrong_ngram_index] =
290  best_choice->unichar_id(next_index);
291  num_wrong_blobs += best_choice->state(next_index);
292  } else {
293  break; // no more matching ambigs in this AMBIG_SPEC_LIST
294  }
295  } else {
296  spec_it.forward();
297  }
298  } // end searching AmbigSpec_LIST
299  } // end searching best_choice
300  } // end searching replace and dangerous ambigs
301 
302  // If any ambiguities were found permute the constructed ambig_blob_choices
303  // to see if an alternative dictionary word can be found.
304  if (ambigs_found) {
305  if (stopper_debug_level > 2) {
306  tprintf("\nResulting ambig_blob_choices:\n");
307  for (i = 0; i < ambig_blob_choices.length(); ++i) {
308  print_ratings_list("", ambig_blob_choices.get(i), getUnicharset());
309  tprintf("\n");
310  }
311  }
312  WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
313  ambigs_found = (alt_word->rating() < 0.0);
314  if (ambigs_found) {
315  if (stopper_debug_level >= 1) {
316  tprintf ("Stopper: Possible ambiguous word = %s\n",
317  alt_word->debug_string().string());
318  }
319  if (fixpt != NULL) {
320  // Note: Currently character choices combined from fragments can only
321  // be generated by NoDangrousAmbigs(). This code should be updated if
322  // the capability to produce classifications combined from character
323  // fragments is added to other functions.
324  int orig_i = 0;
325  for (i = 0; i < alt_word->length(); ++i) {
326  const UNICHARSET &uchset = getUnicharset();
327  bool replacement_is_ngram =
328  uchset.get_isngram(alt_word->unichar_id(i));
329  UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
330  if (replacement_is_ngram) {
331  // we have to extract the leftmost unichar from the ngram.
332  const char *str = uchset.id_to_unichar(leftmost_id);
333  int step = uchset.step(str);
334  if (step) leftmost_id = uchset.unichar_to_id(str, step);
335  }
336  int end_i = orig_i + alt_word->state(i);
337  if (alt_word->state(i) > 1 ||
338  (orig_i + 1 == end_i && replacement_is_ngram)) {
339  // Compute proper blob indices.
340  int blob_start = 0;
341  for (int j = 0; j < orig_i; ++j)
342  blob_start += best_choice->state(j);
343  int blob_end = blob_start;
344  for (int j = orig_i; j < end_i; ++j)
345  blob_end += best_choice->state(j);
346  fixpt->push_back(DANGERR_INFO(blob_start, blob_end, true,
347  replacement_is_ngram, leftmost_id));
348  if (stopper_debug_level > 1) {
349  tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i,
350  true, replacement_is_ngram,
351  uchset.id_to_unichar(leftmost_id));
352  }
353  }
354  orig_i += alt_word->state(i);
355  }
356  }
357  }
358  delete alt_word;
359  }
360  if (output_ambig_words_file_ != NULL) {
361  fprintf(output_ambig_words_file_, "\n");
362  }
363 
364  ambig_blob_choices.delete_data_pointers();
365  return !ambigs_found;
366 }
367 
369 
371  reject_offset_ = 0.0;
372 }
373 
376 }
377 
378 void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
379  UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
380  MATRIX *ratings) {
381  int num_blobs_to_replace = 0;
382  int begin_blob_index = 0;
383  int i;
384  // Rating and certainty for the new BLOB_CHOICE are derived from the
385  // replaced choices.
386  float new_rating = 0.0f;
387  float new_certainty = 0.0f;
388  BLOB_CHOICE* old_choice = NULL;
389  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
390  if (i >= wrong_ngram_begin_index) {
391  int num_blobs = werd_choice->state(i);
392  int col = begin_blob_index + num_blobs_to_replace;
393  int row = col + num_blobs - 1;
394  BLOB_CHOICE_LIST* choices = ratings->get(col, row);
395  ASSERT_HOST(choices != NULL);
396  old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
397  ASSERT_HOST(old_choice != NULL);
398  new_rating += old_choice->rating();
399  new_certainty += old_choice->certainty();
400  num_blobs_to_replace += num_blobs;
401  } else {
402  begin_blob_index += werd_choice->state(i);
403  }
404  }
405  new_certainty /= wrong_ngram_size;
406  // If there is no entry in the ratings matrix, add it.
407  MATRIX_COORD coord(begin_blob_index,
408  begin_blob_index + num_blobs_to_replace - 1);
409  if (!coord.Valid(*ratings)) {
410  ratings->IncreaseBandSize(coord.row - coord.col + 1);
411  }
412  if (ratings->get(coord.col, coord.row) == NULL)
413  ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
414  BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
415  BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
416  if (choice != NULL) {
417  // Already there. Upgrade if new rating better.
418  if (new_rating < choice->rating())
419  choice->set_rating(new_rating);
420  if (new_certainty < choice->certainty())
421  choice->set_certainty(new_certainty);
422  // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
423  } else {
424  // Need a new choice with the correct_ngram_id.
425  choice = new BLOB_CHOICE(*old_choice);
426  choice->set_unichar_id(correct_ngram_id);
427  choice->set_rating(new_rating);
428  choice->set_certainty(new_certainty);
429  choice->set_classifier(BCC_AMBIG);
430  choice->set_matrix_cell(coord.col, coord.row);
431  BLOB_CHOICE_IT it (new_choices);
432  it.add_to_end(choice);
433  }
434  // Remove current unichar from werd_choice. On the last iteration
435  // set the correct replacement unichar instead of removing a unichar.
436  for (int replaced_count = 0; replaced_count < wrong_ngram_size;
437  ++replaced_count) {
438  if (replaced_count + 1 == wrong_ngram_size) {
439  werd_choice->set_blob_choice(wrong_ngram_begin_index,
440  num_blobs_to_replace, choice);
441  } else {
442  werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
443  }
444  }
445  if (stopper_debug_level >= 1) {
446  werd_choice->print("ReplaceAmbig() ");
447  tprintf("Modified blob_choices: ");
448  print_ratings_list("\n", new_choices, getUnicharset());
449  }
450 }
451 
453  int shortest = MAX_INT32;
454  int curr_len = 0;
455  for (int w = 0; w < WordChoice.length(); ++w) {
456  if (getUnicharset().get_isalpha(WordChoice.unichar_id(w))) {
457  curr_len++;
458  } else if (curr_len > 0) {
459  if (curr_len < shortest) shortest = curr_len;
460  curr_len = 0;
461  }
462  }
463  if (curr_len > 0 && curr_len < shortest) {
464  shortest = curr_len;
465  } else if (shortest == MAX_INT32) {
466  shortest = 0;
467  }
468  return shortest;
469 }
470 
472  float Certainty;
473  float WorstCertainty = MAX_FLOAT32;
474  float CertaintyThreshold;
475  FLOAT64 TotalCertainty;
476  FLOAT64 TotalCertaintySquared;
477  FLOAT64 Variance;
478  FLOAT32 Mean, StdDev;
479  int word_length = word.length();
480 
481  if (word_length < 3)
482  return true;
483 
484  TotalCertainty = TotalCertaintySquared = 0.0;
485  for (int i = 0; i < word_length; ++i) {
486  Certainty = word.certainty(i);
487  TotalCertainty += Certainty;
488  TotalCertaintySquared += Certainty * Certainty;
489  if (Certainty < WorstCertainty)
490  WorstCertainty = Certainty;
491  }
492 
493  // Subtract off worst certainty from statistics.
494  word_length--;
495  TotalCertainty -= WorstCertainty;
496  TotalCertaintySquared -= WorstCertainty * WorstCertainty;
497 
498  Mean = TotalCertainty / word_length;
499  Variance = ((word_length * TotalCertaintySquared -
500  TotalCertainty * TotalCertainty) /
501  (word_length * (word_length - 1)));
502  if (Variance < 0.0)
503  Variance = 0.0;
504  StdDev = sqrt(Variance);
505 
506  CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
507  if (CertaintyThreshold > stopper_nondict_certainty_base)
508  CertaintyThreshold = stopper_nondict_certainty_base;
509 
510  if (word.certainty() < CertaintyThreshold) {
511  if (stopper_debug_level >= 1)
512  tprintf("Stopper: Non-uniform certainty = %4.1f"
513  " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
514  word.certainty(), Mean, StdDev, CertaintyThreshold);
515  return false;
516  } else {
517  return true;
518  }
519 }
520 
521 } // namespace tesseract
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:378
double stopper_phase2_certainty_rejection_offset
Definition: dict.h:605
int size() const
Definition: genericvector.h:72
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
void remove_unichar_id(int index)
Definition: ratngs.h:481
float rating() const
Definition: ratngs.h:324
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
int length() const
Definition: genericvector.h:79
float FLOAT32
Definition: host.h:111
static int compare(const UNICHAR_ID array1[], const UNICHAR_ID array2[])
Definition: ambigs.h:62
void EndDangerousAmbigs()
Definition: stopper.cpp:368
#define MAX_AMBIG_SIZE
Definition: ambigs.h:30
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
int push_back(T object)
T get(int column, int row) const
Definition: matrix.h:171
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:705
#define tprintf(...)
Definition: tprintf.h:31
double stopper_allowable_character_badness
Definition: dict.h:611
XHeightConsistencyEnum
Definition: dict.h:75
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:154
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:132
int stopper_smallword_size
Definition: dict.h:607
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
void put(int column, int row, const T &thing)
Definition: matrix.h:166
float min_x_height() const
Definition: ratngs.h:333
bool dangerous_ambig_found() const
Definition: ratngs.h:360
#define ASSERT_HOST(x)
Definition: errcode.h:84
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
const STRING & unichar_string() const
Definition: ratngs.h:524
double stopper_certainty_per_char
Definition: dict.h:609
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:51
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:370
float rating() const
Definition: ratngs.h:79
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice)
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:452
int state(int index) const
Definition: ratngs.h:316
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:166
static bool valid_word_permuter(uinT8 perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:447
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset)
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:58
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:102
float certainty() const
Definition: ratngs.h:327
void set_certainty(float newrat)
Definition: ratngs.h:150
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:484
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
void delete_data_pointers()
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:152
double stopper_nondict_certainty_base
Definition: dict.h:603
uinT8 permuter() const
Definition: ratngs.h:343
const STRING debug_string() const
Definition: ratngs.h:502
int stopper_debug_level
Definition: dict.h:612
bool Valid(const MATRIX &m) const
Definition: matrix.h:327
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:175
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:374
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:153
int UNICHAR_ID
Definition: unichar.h:33
bool stopper_no_acceptable_choices
Definition: dict.h:615
#define MAX_INT32
Definition: host.h:120
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:133
float max_x_height() const
Definition: ratngs.h:336
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:290
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:134
FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension)
Definition: cluster.cpp:650
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:97
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
bool AcceptableResult(WERD_RES *word)
Definition: stopper.cpp:111
AmbigType type
Definition: ambigs.h:135
int step(const char *str) const
Definition: unicharset.cpp:211
#define MAX_FLOAT32
Definition: host.h:124
Definition: matrix.h:289
void print() const
Definition: ratngs.h:563
#define NULL
Definition: host.h:144
void set_rating(float newrat)
Definition: ratngs.h:147
const char * string() const
Definition: strngs.cpp:193
float certainty() const
Definition: ratngs.h:82
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:471
T & get(int index) const
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
double FLOAT64
Definition: host.h:112
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49