All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ratngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: ratngs.h (Formerly ratings.h)
3  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 11:40:38 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef RATNGS_H
21 #define RATNGS_H
22 
23 #include <assert.h>
24 
25 #include "clst.h"
26 #include "elst.h"
27 #include "fontinfo.h"
28 #include "genericvector.h"
29 #include "matrix.h"
30 #include "unichar.h"
31 #include "unicharset.h"
32 #include "werd.h"
33 
34 class MATRIX;
35 struct TBLOB;
36 struct TWERD;
37 
38 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
39 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
41  BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
42  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
43  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
44  BCC_AMBIG, // Generated by ambiguity detection.
45  BCC_FAKE, // From some other process.
46 };
47 
48 class BLOB_CHOICE: public ELIST_LINK
49 {
50  public:
52  unichar_id_ = UNICHAR_SPACE;
53  fontinfo_id_ = -1;
54  fontinfo_id2_ = -1;
55  rating_ = 10.0;
56  certainty_ = -1.0;
57  script_id_ = -1;
58  xgap_before_ = 0;
59  xgap_after_ = 0;
60  min_xheight_ = 0.0f;
61  max_xheight_ = 0.0f;
62  yshift_ = 0.0f;
63  classifier_ = BCC_FAKE;
64  }
65  BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
66  float src_rating, // rating
67  float src_cert, // certainty
68  int script_id, // script
69  float min_xheight, // min xheight in image pixel units
70  float max_xheight, // max xheight allowed by this char
71  float yshift, // the larger of y shift (top or bottom)
72  BlobChoiceClassifier c); // adapted match or other
73  BLOB_CHOICE(const BLOB_CHOICE &other);
75 
77  return unichar_id_;
78  }
79  float rating() const {
80  return rating_;
81  }
82  float certainty() const {
83  return certainty_;
84  }
85  inT16 fontinfo_id() const {
86  return fontinfo_id_;
87  }
88  inT16 fontinfo_id2() const {
89  return fontinfo_id2_;
90  }
92  return fonts_;
93  }
95  fonts_ = fonts;
96  int score1 = 0, score2 = 0;
97  fontinfo_id_ = -1;
98  fontinfo_id2_ = -1;
99  for (int f = 0; f < fonts_.size(); ++f) {
100  if (fonts_[f].score > score1) {
101  score2 = score1;
102  fontinfo_id2_ = fontinfo_id_;
103  score1 = fonts_[f].score;
104  fontinfo_id_ = fonts_[f].fontinfo_id;
105  } else if (fonts_[f].score > score2) {
106  score2 = fonts_[f].score;
107  fontinfo_id2_ = fonts_[f].fontinfo_id;
108  }
109  }
110  }
111  int script_id() const {
112  return script_id_;
113  }
115  return matrix_cell_;
116  }
117  inT16 xgap_before() const {
118  return xgap_before_;
119  }
120  inT16 xgap_after() const {
121  return xgap_after_;
122  }
123  float min_xheight() const {
124  return min_xheight_;
125  }
126  float max_xheight() const {
127  return max_xheight_;
128  }
129  float yshift() const {
130  return yshift_;
131  }
133  return classifier_;
134  }
135  bool IsAdapted() const {
136  return classifier_ == BCC_ADAPTED_CLASSIFIER;
137  }
138  bool IsClassified() const {
139  return classifier_ == BCC_STATIC_CLASSIFIER ||
140  classifier_ == BCC_ADAPTED_CLASSIFIER ||
141  classifier_ == BCC_SPECKLE_CLASSIFIER;
142  }
143 
144  void set_unichar_id(UNICHAR_ID newunichar_id) {
145  unichar_id_ = newunichar_id;
146  }
147  void set_rating(float newrat) {
148  rating_ = newrat;
149  }
150  void set_certainty(float newrat) {
151  certainty_ = newrat;
152  }
153  void set_script(int newscript_id) {
154  script_id_ = newscript_id;
155  }
156  void set_matrix_cell(int col, int row) {
157  matrix_cell_.col = col;
158  matrix_cell_.row = row;
159  }
160  void set_xgap_before(inT16 gap) {
161  xgap_before_ = gap;
162  }
163  void set_xgap_after(inT16 gap) {
164  xgap_after_ = gap;
165  }
167  classifier_ = classifier;
168  }
169  static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
170  BLOB_CHOICE* choice = new BLOB_CHOICE;
171  *choice = *src;
172  return choice;
173  }
174  // Returns true if *this and other agree on the baseline and x-height
175  // to within some tolerance based on a given estimate of the x-height.
176  bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height,
177  bool debug) const;
178 
179  void print(const UNICHARSET *unicharset) const {
180  tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
181  rating_, certainty_,
182  min_xheight_, max_xheight_, unichar_id_,
183  (unicharset == NULL) ? "" :
184  unicharset->debug_str(unichar_id_).string());
185  }
186  void print_full() const {
187  print(NULL);
188  tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
189  script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
190  }
191  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
192  static int SortByRating(const void *p1, const void *p2) {
193  const BLOB_CHOICE *bc1 =
194  *reinterpret_cast<const BLOB_CHOICE * const *>(p1);
195  const BLOB_CHOICE *bc2 =
196  *reinterpret_cast<const BLOB_CHOICE * const *>(p2);
197  return (bc1->rating_ < bc2->rating_) ? -1 : 1;
198  }
199 
200  private:
201  UNICHAR_ID unichar_id_; // unichar id
202  // Fonts and scores. Allowed to be empty.
204  inT16 fontinfo_id_; // char font information
205  inT16 fontinfo_id2_; // 2nd choice font information
206  // Rating is the classifier distance weighted by the length of the outline
207  // in the blob. In terms of probability, classifier distance is -klog p such
208  // that the resulting distance is in the range [0, 1] and then
209  // rating = w (-k log p) where w is the weight for the length of the outline.
210  // Sums of ratings may be compared meaningfully for words of different
211  // segmentation.
212  float rating_; // size related
213  // Certainty is a number in [-20, 0] indicating the classifier certainty
214  // of the choice. In terms of probability, certainty = 20 (k log p) where
215  // k is defined as above to normalize -klog p to the range [0, 1].
216  float certainty_; // absolute
217  int script_id_;
218  // Holds the position of this choice in the ratings matrix.
219  // Used to location position in the matrix during path backtracking.
220  MATRIX_COORD matrix_cell_;
221  inT16 xgap_before_;
222  inT16 xgap_after_;
223  // X-height range (in image pixels) that this classification supports.
224  float min_xheight_;
225  float max_xheight_;
226  // yshift_ - The vertical distance (in image pixels) the character is
227  // shifted (up or down) from an acceptable y position.
228  float yshift_;
229  BlobChoiceClassifier classifier_; // What generated *this.
230 };
231 
232 // Make BLOB_CHOICE listable.
234 
235 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
236 // or NULL if there is no match.
237 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
238 
239 // Permuter codes used in WERD_CHOICEs.
241  NO_PERM, // 0
242  PUNC_PERM, // 1
254 
256 };
257 
258 namespace tesseract {
259 // ScriptPos tells whether a character is subscript, superscript or normal.
260 enum ScriptPos {
265 };
266 
267 const char *ScriptPosToString(tesseract::ScriptPos script_pos);
268 
269 } // namespace tesseract.
270 
271 class WERD_CHOICE : public ELIST_LINK {
272  public:
273  static const float kBadRating;
274  static const char *permuter_name(uinT8 permuter);
275 
277  : unicharset_(unicharset) { this->init(8); }
278  WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
279  : unicharset_(unicharset) { this->init(reserved); }
280  WERD_CHOICE(const char *src_string,
281  const char *src_lengths,
282  float src_rating,
283  float src_certainty,
284  uinT8 src_permuter,
285  const UNICHARSET &unicharset)
286  : unicharset_(&unicharset) {
287  this->init(src_string, src_lengths, src_rating,
288  src_certainty, src_permuter);
289  }
290  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
291  WERD_CHOICE(const WERD_CHOICE &word) : unicharset_(word.unicharset_) {
292  this->init(word.length());
293  this->operator=(word);
294  }
295  ~WERD_CHOICE();
296 
297  const UNICHARSET *unicharset() const {
298  return unicharset_;
299  }
300  inline int length() const {
301  return length_;
302  }
303  float adjust_factor() const {
304  return adjust_factor_;
305  }
306  void set_adjust_factor(float factor) {
307  adjust_factor_ = factor;
308  }
309  inline const UNICHAR_ID *unichar_ids() const {
310  return unichar_ids_;
311  }
312  inline const UNICHAR_ID unichar_id(int index) const {
313  assert(index < length_);
314  return unichar_ids_[index];
315  }
316  inline int state(int index) const {
317  return state_[index];
318  }
320  if (index < 0 || index >= length_)
321  return tesseract::SP_NORMAL;
322  return script_pos_[index];
323  }
324  inline float rating() const {
325  return rating_;
326  }
327  inline float certainty() const {
328  return certainty_;
329  }
330  inline float certainty(int index) const {
331  return certainties_[index];
332  }
333  inline float min_x_height() const {
334  return min_x_height_;
335  }
336  inline float max_x_height() const {
337  return max_x_height_;
338  }
339  inline void set_x_heights(float min_height, float max_height) {
340  min_x_height_ = min_height;
341  max_x_height_ = max_height;
342  }
343  inline uinT8 permuter() const {
344  return permuter_;
345  }
346  const char *permuter_name() const;
347  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
348  // taken from the appropriate cell in the ratings MATRIX.
349  // Borrowed pointer, so do not delete.
350  BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const;
351 
352  // Returns the MATRIX_COORD corresponding to the location in the ratings
353  // MATRIX for the given index into the word.
354  MATRIX_COORD MatrixCoord(int index) const;
355 
356  inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
357  assert(index < length_);
358  unichar_ids_[index] = unichar_id;
359  }
360  bool dangerous_ambig_found() const {
361  return dangerous_ambig_found_;
362  }
363  void set_dangerous_ambig_found_(bool value) {
364  dangerous_ambig_found_ = value;
365  }
366  inline void set_rating(float new_val) {
367  rating_ = new_val;
368  }
369  inline void set_certainty(float new_val) {
370  certainty_ = new_val;
371  }
372  inline void set_permuter(uinT8 perm) {
373  permuter_ = perm;
374  }
375  // Note: this function should only be used if all the fields
376  // are populated manually with set_* functions (rather than
377  // (copy)constructors and append_* functions).
378  inline void set_length(int len) {
379  ASSERT_HOST(reserved_ >= len);
380  length_ = len;
381  }
382 
384  inline void double_the_size() {
385  if (reserved_ > 0) {
387  reserved_, unichar_ids_);
389  reserved_, script_pos_);
391  reserved_, state_);
393  reserved_, certainties_);
394  reserved_ *= 2;
395  } else {
396  unichar_ids_ = new UNICHAR_ID[1];
397  script_pos_ = new tesseract::ScriptPos[1];
398  state_ = new int[1];
399  certainties_ = new float[1];
400  reserved_ = 1;
401  }
402  }
403 
406  inline void init(int reserved) {
407  reserved_ = reserved;
408  if (reserved > 0) {
409  unichar_ids_ = new UNICHAR_ID[reserved];
410  script_pos_ = new tesseract::ScriptPos[reserved];
411  state_ = new int[reserved];
412  certainties_ = new float[reserved];
413  } else {
414  unichar_ids_ = NULL;
415  script_pos_ = NULL;
416  state_ = NULL;
417  certainties_ = NULL;
418  }
419  length_ = 0;
420  adjust_factor_ = 1.0f;
421  rating_ = 0.0;
422  certainty_ = MAX_FLOAT32;
423  min_x_height_ = 0.0f;
424  max_x_height_ = MAX_FLOAT32;
425  permuter_ = NO_PERM;
426  unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
427  dangerous_ambig_found_ = false;
428  }
429 
435  void init(const char *src_string, const char *src_lengths,
436  float src_rating, float src_certainty,
437  uinT8 src_permuter);
438 
440  inline void make_bad() {
441  length_ = 0;
442  rating_ = kBadRating;
443  certainty_ = -MAX_FLOAT32;
444  }
445 
450  UNICHAR_ID unichar_id, int blob_count,
451  float rating, float certainty) {
452  assert(reserved_ > length_);
453  length_++;
454  this->set_unichar_id(unichar_id, blob_count,
455  rating, certainty, length_-1);
456  }
457 
458  void append_unichar_id(UNICHAR_ID unichar_id, int blob_count,
459  float rating, float certainty);
460 
461  inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count,
462  float rating, float certainty, int index) {
463  assert(index < length_);
464  unichar_ids_[index] = unichar_id;
465  state_[index] = blob_count;
466  certainties_[index] = certainty;
467  script_pos_[index] = tesseract::SP_NORMAL;
468  rating_ += rating;
469  if (certainty < certainty_) {
470  certainty_ = certainty;
471  }
472  }
473  // Sets the entries for the given index from the BLOB_CHOICE, assuming
474  // unit fragment lengths, but setting the state for this index to blob_count.
475  void set_blob_choice(int index, int blob_count,
476  const BLOB_CHOICE* blob_choice);
477 
479  void remove_unichar_ids(int index, int num);
480  inline void remove_last_unichar_id() { --length_; }
481  inline void remove_unichar_id(int index) {
482  this->remove_unichar_ids(index, 1);
483  }
484  bool has_rtl_unichar_id() const;
486 
487  // Returns the half-open interval of unichar_id indices [start, end) which
488  // enclose the core portion of this word -- the part after stripping
489  // punctuation from the left and right.
490  void punct_stripped(int *start_core, int *end_core) const;
491 
492  // Returns the indices [start, end) containing the core of the word, stripped
493  // of any superscript digits on either side. (i.e., the non-footnote part
494  // of the word). There is no guarantee that the output range is non-empty.
495  void GetNonSuperscriptSpan(int *start, int *end) const;
496 
497  // Return a copy of this WERD_CHOICE with the choices [start, end).
498  // The result is useful only for checking against a dictionary.
499  WERD_CHOICE shallow_copy(int start, int end) const;
500 
501  void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
502  const STRING debug_string() const {
503  STRING word_str;
504  for (int i = 0; i < length_; ++i) {
505  word_str += unicharset_->debug_str(unichar_ids_[i]);
506  word_str += " ";
507  }
508  return word_str;
509  }
510 
511  // Call this to override the default (strict left to right graphemes)
512  // with the fact that some engine produces a "reading order" set of
513  // Graphemes for each word.
514  bool set_unichars_in_script_order(bool in_script_order) {
515  return unichars_in_script_order_ = in_script_order;
516  }
517 
519  return unichars_in_script_order_;
520  }
521 
522  // Returns a UTF-8 string equivalent to the current choice
523  // of UNICHAR IDs.
524  const STRING &unichar_string() const {
525  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
526  return unichar_string_;
527  }
528 
529  // Returns the lengths, one byte each, representing the number of bytes
530  // required in the unichar_string for each UNICHAR_ID.
531  const STRING &unichar_lengths() const {
532  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
533  return unichar_lengths_;
534  }
535 
536  // Sets up the script_pos_ member using the blobs_list to get the bln
537  // bounding boxes, *this to get the unichars, and this->unicharset
538  // to get the target positions. If small_caps is true, sub/super are not
539  // considered, but dropcaps are.
540  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
541  void SetScriptPositions(bool small_caps, TWERD* word);
542  // Sets the script_pos_ member from some source positions with a given length.
543  void SetScriptPositions(const tesseract::ScriptPos* positions, int length);
544  // Sets all the script_pos_ positions to the given position.
546 
547  static tesseract::ScriptPos ScriptPositionOf(bool print_debug,
548  const UNICHARSET& unicharset,
549  const TBOX& blob_box,
551 
552  // Returns the "dominant" script ID for the word. By "dominant", the script
553  // must account for at least half the characters. Otherwise, it returns 0.
554  // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
555  int GetTopScriptID() const;
556 
557  // Fixes the state_ for a chop at the given blob_posiiton.
558  void UpdateStateForSplit(int blob_position);
559 
560  // Returns the sum of all the state elements, being the total number of blobs.
561  int TotalOfStates() const;
562 
563  void print() const { this->print(""); }
564  void print(const char *msg) const;
565  // Prints the segmentation state with an introductory message.
566  void print_state(const char *msg) const;
567 
568  // Displays the segmentation state of *this (if not the same as the last
569  // one displayed) and waits for a click in the window.
570  void DisplaySegmentation(TWERD* word);
571 
572  WERD_CHOICE& operator+= ( // concatanate
573  const WERD_CHOICE & second);// second on first
574 
575  WERD_CHOICE& operator= (const WERD_CHOICE& source);
576 
577  private:
578  const UNICHARSET *unicharset_;
579  // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
580  // unichar_ids_ is an array of classifier "results" that make up a word.
581  // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
582  // of each unichar_id.
583  // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
584  // were put together to make the classification results in the ith position
585  // in unichar_ids_, and certainties_[i] is the certainty of the choice that
586  // was used in this word.
587  // == Change from before ==
588  // Previously there was fragment_lengths_ that allowed a word to be
589  // artificially composed of multiple fragment results. Since the new
590  // segmentation search doesn't do fragments, treatment of fragments has
591  // been moved to a lower level, augmenting the ratings matrix with the
592  // combined fragments, and allowing the language-model/segmentation-search
593  // to deal with only the combined unichar_ids.
594  UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word
595  tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar.
596  int* state_; // Number of blobs in each unichar.
597  float* certainties_; // Certainty of each unichar.
598  int reserved_; // size of the above arrays
599  int length_; // word length
600  // Factor that was used to adjust the rating.
601  float adjust_factor_;
602  // Rating is the sum of the ratings of the individual blobs in the word.
603  float rating_; // size related
604  // certainty is the min (worst) certainty of the individual blobs in the word.
605  float certainty_; // absolute
606  // xheight computed from the result, or 0 if inconsistent.
607  float min_x_height_;
608  float max_x_height_;
609  uinT8 permuter_; // permuter code
610 
611  // Normally, the ratings_ matrix represents the recognition results in order
612  // from left-to-right. However, some engines (say Cube) may return
613  // recognition results in the order of the script's major reading direction
614  // (for Arabic, that is right-to-left).
615  bool unichars_in_script_order_;
616  // True if NoDangerousAmbig found an ambiguity.
617  bool dangerous_ambig_found_;
618 
619  // The following variables are populated and passed by reference any
620  // time unichar_string() or unichar_lengths() are called.
621  mutable STRING unichar_string_;
622  mutable STRING unichar_lengths_;
623 };
624 
625 // Make WERD_CHOICE listable.
627 typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR;
628 
629 // Utilities for comparing WERD_CHOICEs
630 
632  const WERD_CHOICE &word2);
633 
634 // Utilities for debug printing.
635 void print_ratings_list(
636  const char *msg, // intro message
637  BLOB_CHOICE_LIST *ratings, // list of results
638  const UNICHARSET &current_unicharset // unicharset that can be used
639  // for id-to-unichar conversion
640  );
641 
642 #endif
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:319
Definition: blobs.h:261
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.cpp:446
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:309
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:356
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:268
static const float kBadRating
Definition: ratngs.h:273
int size() const
Definition: genericvector.h:72
void remove_unichar_id(int index)
Definition: ratngs.h:481
int script_id() const
Definition: ratngs.h:111
void DisplaySegmentation(TWERD *word)
Definition: ratngs.cpp:747
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
Definition: ratngs.h:461
float rating() const
Definition: ratngs.h:324
void SetScriptPositions(bool small_caps, TWERD *word)
Definition: ratngs.cpp:528
static T * double_the_size_memcpy(int current_size, T *data)
void set_certainty(float new_val)
Definition: ratngs.h:369
bool unichars_in_script_order() const
Definition: ratngs.h:518
bool contains_unichar_id(UNICHAR_ID unichar_id) const
Definition: ratngs.cpp:304
WERD_CHOICE & operator=(const WERD_CHOICE &source)
Definition: ratngs.cpp:499
BlobChoiceClassifier classifier() const
Definition: ratngs.h:132
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:192
void punct_stripped(int *start_core, int *end_core) const
Definition: ratngs.cpp:361
int length() const
Definition: ratngs.h:300
BLOB_CHOICE()
Definition: ratngs.h:51
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:179
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:449
#define tprintf(...)
Definition: tprintf.h:31
const char * permuter_name() const
Definition: ratngs.cpp:261
inT16 xgap_after() const
Definition: ratngs.h:120
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:339
float min_xheight() const
Definition: ratngs.h:123
PermuterType
Definition: ratngs.h:240
void set_permuter(uinT8 perm)
Definition: ratngs.h:372
WERD_CHOICE & operator+=(const WERD_CHOICE &second)
Definition: ratngs.cpp:463
void set_xgap_before(inT16 gap)
Definition: ratngs.h:160
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
const STRING & unichar_lengths() const
Definition: ratngs.h:531
void set_length(int len)
Definition: ratngs.h:378
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:276
bool has_rtl_unichar_id() const
Definition: ratngs.cpp:409
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:169
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:685
~WERD_CHOICE()
Definition: ratngs.cpp:254
bool IsAdapted() const
Definition: ratngs.h:135
WERD_CHOICE shallow_copy(int start, int end) const
Definition: ratngs.cpp:392
float min_x_height() const
Definition: ratngs.h:333
bool dangerous_ambig_found() const
Definition: ratngs.h:360
void print_state(const char *msg) const
Definition: ratngs.cpp:738
#define ASSERT_HOST(x)
Definition: errcode.h:84
float yshift() const
Definition: ratngs.h:129
const STRING & unichar_string() const
Definition: ratngs.h:524
int GetTopScriptID() const
Definition: ratngs.cpp:653
const UNICHARSET * unicharset() const
Definition: ratngs.h:297
~BLOB_CHOICE()
Definition: ratngs.h:74
void reverse_and_mirror_unichar_ids()
Definition: ratngs.cpp:343
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:180
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:280
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:384
float rating() const
Definition: ratngs.h:79
void GetNonSuperscriptSpan(int *start, int *end) const
Definition: ratngs.cpp:375
inT16 fontinfo_id() const
Definition: ratngs.h:85
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:278
int state(int index) const
Definition: ratngs.h:316
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:166
void remove_last_unichar_id()
Definition: ratngs.h:480
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:514
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:440
static tesseract::ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)
Definition: ratngs.cpp:615
float certainty() const
Definition: ratngs.h:327
void set_xgap_after(inT16 gap)
Definition: ratngs.h:163
void set_certainty(float newrat)
Definition: ratngs.h:150
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:291
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
inT16 xgap_before() const
Definition: ratngs.h:117
uinT8 permuter() const
Definition: ratngs.h:343
void init(int reserved)
Definition: ratngs.h:406
const STRING debug_string() const
Definition: ratngs.h:502
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
int UNICHAR_ID
Definition: unichar.h:33
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:981
inT16 fontinfo_id2() const
Definition: ratngs.h:88
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:427
void set_script(int newscript_id)
Definition: ratngs.h:153
float max_x_height() const
Definition: ratngs.h:336
void print_full() const
Definition: ratngs.h:186
bool IsClassified() const
Definition: ratngs.h:138
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:290
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:280
BlobChoiceClassifier
Definition: ratngs.h:40
float adjust_factor() const
Definition: ratngs.h:303
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:132
Definition: rect.h:30
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
float certainty(int index) const
Definition: ratngs.h:330
void remove_unichar_ids(int index, int num)
Definition: ratngs.cpp:320
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
Definition: ratngs.cpp:791
#define MAX_FLOAT32
Definition: host.h:124
Definition: matrix.h:289
void print() const
Definition: ratngs.h:563
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
Definition: strngs.h:44
const GenericVector< tesseract::ScoredFont > & fonts() const
Definition: ratngs.h:91
#define NULL
Definition: host.h:144
void set_rating(float newrat)
Definition: ratngs.h:147
Definition: blobs.h:395
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:609
const char * string() const
Definition: strngs.cpp:193
int TotalOfStates() const
Definition: ratngs.cpp:697
float certainty() const
Definition: ratngs.h:82
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:114
void set_adjust_factor(float factor)
Definition: ratngs.h:306
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
float max_xheight() const
Definition: ratngs.h:126
void set_rating(float new_val)
Definition: ratngs.h:366
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:363
short inT16
Definition: host.h:100
unsigned char uinT8
Definition: host.h:99