tesseract v5.3.3.20231005
ratngs.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: ratngs.h (Formerly ratings.h)
3 * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef RATNGS_H
20#define RATNGS_H
21
22#ifdef HAVE_CONFIG_H
23# include "config_auto.h" // DISABLED_LEGACY_ENGINE
24#endif
25
26#include "clst.h"
27#include "elst.h"
28#ifndef DISABLED_LEGACY_ENGINE
29# include "fontinfo.h"
30#endif // undef DISABLED_LEGACY_ENGINE
31#include "matrix.h"
32#include "unicharset.h"
33#include "werd.h"
34
35#include <tesseract/unichar.h>
36
37#include <cassert>
38#include <cfloat> // for FLT_MAX
39
40namespace tesseract {
41
42class MATRIX;
43struct TBLOB;
44struct TWERD;
45
46// Enum to describe the source of a BLOB_CHOICE to make it possible to determine
47// whether a blob has been classified by inspecting the BLOB_CHOICEs.
49 BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
50 BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
51 BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
52 BCC_AMBIG, // Generated by ambiguity detection.
53 BCC_FAKE, // From some other process.
54};
55
56class BLOB_CHOICE : public ELIST_LINK {
57public:
59 unichar_id_ = UNICHAR_SPACE;
60 fontinfo_id_ = -1;
61 fontinfo_id2_ = -1;
62 rating_ = 10.0f;
63 certainty_ = -1.0f;
64 script_id_ = -1;
65 min_xheight_ = 0.0f;
66 max_xheight_ = 0.0f;
67 yshift_ = 0.0f;
68 classifier_ = BCC_FAKE;
69 }
70 BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
71 float src_rating, // rating
72 float src_cert, // certainty
73 int script_id, // script
74 float min_xheight, // min xheight in image pixel units
75 float max_xheight, // max xheight allowed by this char
76 float yshift, // the larger of y shift (top or bottom)
77 BlobChoiceClassifier c); // adapted match or other
78 BLOB_CHOICE(const BLOB_CHOICE &other);
79 ~BLOB_CHOICE() = default;
80
82 return unichar_id_;
83 }
84 float rating() const {
85 return rating_;
86 }
87 float certainty() const {
88 return certainty_;
89 }
90 int16_t fontinfo_id() const {
91 return fontinfo_id_;
92 }
93 int16_t fontinfo_id2() const {
94 return fontinfo_id2_;
95 }
96#ifndef DISABLED_LEGACY_ENGINE
97 const std::vector<ScoredFont> &fonts() const {
98 return fonts_;
99 }
100 void set_fonts(const std::vector<ScoredFont> &fonts) {
101 fonts_ = fonts;
102 int score1 = 0, score2 = 0;
103 fontinfo_id_ = -1;
104 fontinfo_id2_ = -1;
105 for (auto &f : fonts_) {
106 if (f.score > score1) {
107 score2 = score1;
108 fontinfo_id2_ = fontinfo_id_;
109 score1 = f.score;
110 fontinfo_id_ = f.fontinfo_id;
111 } else if (f.score > score2) {
112 score2 = f.score;
113 fontinfo_id2_ = f.fontinfo_id;
114 }
115 }
116 }
117#endif // ndef DISABLED_LEGACY_ENGINE
118 int script_id() const {
119 return script_id_;
120 }
122 return matrix_cell_;
123 }
124 float min_xheight() const {
125 return min_xheight_;
126 }
127 float max_xheight() const {
128 return max_xheight_;
129 }
130 float yshift() const {
131 return yshift_;
132 }
134 return classifier_;
135 }
136 bool IsAdapted() const {
137 return classifier_ == BCC_ADAPTED_CLASSIFIER;
138 }
139 bool IsClassified() const {
140 return classifier_ == BCC_STATIC_CLASSIFIER || classifier_ == BCC_ADAPTED_CLASSIFIER ||
141 classifier_ == BCC_SPECKLE_CLASSIFIER;
142 }
143
144 void set_unichar_id(UNICHAR_ID newunichar_id) {
145 unichar_id_ = newunichar_id;
146 }
147 void set_rating(float newrat) {
148 rating_ = newrat;
149 }
150 void set_certainty(float newrat) {
151 certainty_ = newrat;
152 }
153 void set_script(int newscript_id) {
154 script_id_ = newscript_id;
155 }
156 void set_matrix_cell(int col, int row) {
157 matrix_cell_.col = col;
158 matrix_cell_.row = row;
159 }
161 classifier_ = classifier;
162 }
163 static BLOB_CHOICE *deep_copy(const BLOB_CHOICE *src) {
164 auto *choice = new BLOB_CHOICE;
165 *choice = *src;
166 return choice;
167 }
168 // Returns true if *this and other agree on the baseline and x-height
169 // to within some tolerance based on a given estimate of the x-height.
170 bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
171
172 void print(const UNICHARSET *unicharset) const {
173 tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
174 static_cast<double>(rating_),
175 static_cast<double>(certainty_),
176 static_cast<double>(min_xheight_),
177 static_cast<double>(max_xheight_),
178 unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
179 }
180 void print_full() const {
181 print(nullptr);
182 tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
183 fontinfo_id2_, static_cast<double>(yshift_), classifier_);
184 }
185 // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
186 static int SortByRating(const void *p1, const void *p2) {
187 const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
188 const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
189 return (bc1->rating_ < bc2->rating_) ? -1 : 1;
190 }
191
192private:
193 // Copy assignment operator.
194 BLOB_CHOICE &operator=(const BLOB_CHOICE &other);
195
196 UNICHAR_ID unichar_id_; // unichar id
197#ifndef DISABLED_LEGACY_ENGINE
198 // Fonts and scores. Allowed to be empty.
199 std::vector<ScoredFont> fonts_;
200#endif // ndef DISABLED_LEGACY_ENGINE
201 int16_t fontinfo_id_; // char font information
202 int16_t fontinfo_id2_; // 2nd choice font information
203 // Rating is the classifier distance weighted by the length of the outline
204 // in the blob. In terms of probability, classifier distance is -klog p such
205 // that the resulting distance is in the range [0, 1] and then
206 // rating = w (-k log p) where w is the weight for the length of the outline.
207 // Sums of ratings may be compared meaningfully for words of different
208 // segmentation.
209 float rating_; // size related
210 // Certainty is a number in [-20, 0] indicating the classifier certainty
211 // of the choice. In terms of probability, certainty = 20 (k log p) where
212 // k is defined as above to normalize -klog p to the range [0, 1].
213 float certainty_; // absolute
214 int script_id_;
215 // Holds the position of this choice in the ratings matrix.
216 // Used to location position in the matrix during path backtracking.
217 MATRIX_COORD matrix_cell_;
218 // X-height range (in image pixels) that this classification supports.
219 float min_xheight_;
220 float max_xheight_;
221 // yshift_ - The vertical distance (in image pixels) the character is
222 // shifted (up or down) from an acceptable y position.
223 float yshift_;
224 BlobChoiceClassifier classifier_; // What generated *this.
225};
226
227// Make BLOB_CHOICE listable.
228ELISTIZEH(BLOB_CHOICE)
229
230// Return the BLOB_CHOICE in bc_list matching a given unichar_id,
231// or nullptr if there is no match.
232BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
233
234// Permuter codes used in WERD_CHOICEs.
249
252
253// ScriptPos tells whether a character is subscript, superscript or normal.
255
256const char *ScriptPosToString(ScriptPos script_pos);
257
259public:
260 static const float kBadRating;
261 static const char *permuter_name(uint8_t permuter);
262
263 WERD_CHOICE(const UNICHARSET *unicharset) : unicharset_(unicharset) {
264 this->init(8);
265 }
266 WERD_CHOICE(const UNICHARSET *unicharset, int reserved) : unicharset_(unicharset) {
267 this->init(reserved);
268 }
269 WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating,
270 float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
271 : unicharset_(&unicharset) {
272 this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
273 }
274 WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
275 WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
276 this->init(word.length());
277 this->operator=(word);
278 }
280
281 const UNICHARSET *unicharset() const {
282 return unicharset_;
283 }
284 bool empty() const {
285 return length_ == 0;
286 }
287 inline unsigned length() const {
288 return length_;
289 }
290 float adjust_factor() const {
291 return adjust_factor_;
292 }
293 void set_adjust_factor(float factor) {
294 adjust_factor_ = factor;
295 }
296 inline const std::vector<UNICHAR_ID> &unichar_ids() const {
297 return unichar_ids_;
298 }
299 inline UNICHAR_ID unichar_id(unsigned index) const {
300 assert(index < length_);
301 return unichar_ids_[index];
302 }
303 inline unsigned state(unsigned index) const {
304 return state_[index];
305 }
306 ScriptPos BlobPosition(unsigned index) const {
307 if (index >= length_) {
308 return SP_NORMAL;
309 }
310 return script_pos_[index];
311 }
312 inline float rating() const {
313 return rating_;
314 }
315 inline float certainty() const {
316 return certainty_;
317 }
318 inline float certainty(unsigned index) const {
319 return certainties_[index];
320 }
321 inline float min_x_height() const {
322 return min_x_height_;
323 }
324 inline float max_x_height() const {
325 return max_x_height_;
326 }
327 inline void set_x_heights(float min_height, float max_height) {
328 min_x_height_ = min_height;
329 max_x_height_ = max_height;
330 }
331 inline uint8_t permuter() const {
332 return permuter_;
333 }
334 const char *permuter_name() const;
335 // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
336 // taken from the appropriate cell in the ratings MATRIX.
337 // Borrowed pointer, so do not delete.
338 BLOB_CHOICE_LIST *blob_choices(unsigned index, MATRIX *ratings) const;
339
340 // Returns the MATRIX_COORD corresponding to the location in the ratings
341 // MATRIX for the given index into the word.
342 MATRIX_COORD MatrixCoord(unsigned index) const;
343
344 inline void set_unichar_id(UNICHAR_ID unichar_id, unsigned index) {
345 assert(index < length_);
346 unichar_ids_[index] = unichar_id;
347 }
349 return dangerous_ambig_found_;
350 }
352 dangerous_ambig_found_ = value;
353 }
354 inline void set_rating(float new_val) {
355 rating_ = new_val;
356 }
357 inline void set_certainty(float new_val) {
358 certainty_ = new_val;
359 }
360 inline void set_permuter(uint8_t perm) {
361 permuter_ = perm;
362 }
363 // Note: this function should only be used if all the fields
364 // are populated manually with set_* functions (rather than
365 // (copy)constructors and append_* functions).
366 inline void set_length(unsigned len) {
367 ASSERT_HOST(reserved_ >= len);
368 length_ = len;
369 }
370
372 inline void double_the_size() {
373 if (reserved_ > 0) {
374 reserved_ *= 2;
375 } else {
376 reserved_ = 1;
377 }
378 unichar_ids_.resize(reserved_);
379 script_pos_.resize(reserved_);
380 state_.resize(reserved_);
381 certainties_.resize(reserved_);
382 }
383
386 inline void init(unsigned reserved) {
387 reserved_ = reserved;
388 if (reserved > 0) {
389 unichar_ids_.resize(reserved);
390 script_pos_.resize(reserved);
391 state_.resize(reserved);
392 certainties_.resize(reserved);
393 } else {
394 unichar_ids_.clear();
395 script_pos_.clear();
396 state_.clear();
397 certainties_.clear();
398 }
399 length_ = 0;
400 adjust_factor_ = 1.0f;
401 rating_ = 0.0;
402 certainty_ = FLT_MAX;
403 min_x_height_ = 0.0f;
404 max_x_height_ = FLT_MAX;
405 permuter_ = NO_PERM;
406 unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
407 dangerous_ambig_found_ = false;
408 }
409
415 void init(const char *src_string, const char *src_lengths, float src_rating, float src_certainty,
416 uint8_t src_permuter);
417
419 inline void make_bad() {
420 length_ = 0;
421 rating_ = kBadRating;
422 certainty_ = -FLT_MAX;
423 }
424
428 inline void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating,
429 float certainty) {
430 assert(reserved_ > length_);
431 length_++;
432 this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
433 }
434
435 void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty);
436
437 inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty,
438 unsigned index) {
439 assert(index < length_);
440 unichar_ids_[index] = unichar_id;
441 state_[index] = blob_count;
442 certainties_[index] = certainty;
443 script_pos_[index] = SP_NORMAL;
444 rating_ += rating;
445 if (certainty < certainty_) {
446 certainty_ = certainty;
447 }
448 }
449 // Sets the entries for the given index from the BLOB_CHOICE, assuming
450 // unit fragment lengths, but setting the state for this index to blob_count.
451 void set_blob_choice(unsigned index, int blob_count, const BLOB_CHOICE *blob_choice);
452
453 bool contains_unichar_id(UNICHAR_ID unichar_id) const;
454 void remove_unichar_ids(unsigned index, int num);
456 --length_;
457 }
458 inline void remove_unichar_id(unsigned index) {
459 this->remove_unichar_ids(index, 1);
460 }
461 bool has_rtl_unichar_id() const;
462 void reverse_and_mirror_unichar_ids();
463
464 // Returns the half-open interval of unichar_id indices [start, end) which
465 // enclose the core portion of this word -- the part after stripping
466 // punctuation from the left and right.
467 void punct_stripped(unsigned *start_core, unsigned *end_core) const;
468
469 // Returns the indices [start, end) containing the core of the word, stripped
470 // of any superscript digits on either side. (i.e., the non-footnote part
471 // of the word). There is no guarantee that the output range is non-empty.
472 void GetNonSuperscriptSpan(int *start, int *end) const;
473
474 // Return a copy of this WERD_CHOICE with the choices [start, end).
475 // The result is useful only for checking against a dictionary.
476 WERD_CHOICE shallow_copy(unsigned start, unsigned end) const;
477
478 void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const;
479 std::string debug_string() const {
480 std::string word_str;
481 for (unsigned i = 0; i < length_; ++i) {
482 word_str += unicharset_->debug_str(unichar_ids_[i]);
483 word_str += " ";
484 }
485 return word_str;
486 }
487 // Returns true if any unichar_id in the word is a non-space-delimited char.
489 for (unsigned i = 0; i < length_; ++i) {
490 if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
491 return true;
492 }
493 }
494 return false;
495 }
496 // Returns true if the word is all spaces.
497 bool IsAllSpaces() const {
498 for (unsigned i = 0; i < length_; ++i) {
499 if (unichar_ids_[i] != UNICHAR_SPACE) {
500 return false;
501 }
502 }
503 return true;
504 }
505
506 // Call this to override the default (strict left to right graphemes)
507 // with the fact that some engine produces a "reading order" set of
508 // Graphemes for each word.
509 bool set_unichars_in_script_order(bool in_script_order) {
510 return unichars_in_script_order_ = in_script_order;
511 }
512
514 return unichars_in_script_order_;
515 }
516
517 // Returns a UTF-8 string equivalent to the current choice
518 // of UNICHAR IDs.
519 std::string &unichar_string() {
520 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
521 return unichar_string_;
522 }
523
524 // Returns a UTF-8 string equivalent to the current choice
525 // of UNICHAR IDs.
526 const std::string &unichar_string() const {
527 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
528 return unichar_string_;
529 }
530
531 // Returns the lengths, one byte each, representing the number of bytes
532 // required in the unichar_string for each UNICHAR_ID.
533 const std::string &unichar_lengths() const {
534 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
535 return unichar_lengths_;
536 }
537
538 // Sets up the script_pos_ member using the blobs_list to get the bln
539 // bounding boxes, *this to get the unichars, and this->unicharset
540 // to get the target positions. If small_caps is true, sub/super are not
541 // considered, but dropcaps are.
542 // NOTE: blobs_list should be the chopped_word blobs. (Fully segmented.)
543 void SetScriptPositions(bool small_caps, TWERD *word, int debug = 0);
544 // Sets all the script_pos_ positions to the given position.
545 void SetAllScriptPositions(ScriptPos position);
546
547 static ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
548 const TBOX &blob_box, UNICHAR_ID unichar_id);
549
550 // Returns the "dominant" script ID for the word. By "dominant", the script
551 // must account for at least half the characters. Otherwise, it returns 0.
552 // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
553 int GetTopScriptID() const;
554
555 // Fixes the state_ for a chop at the given blob_posiiton.
556 void UpdateStateForSplit(int blob_position);
557
558 // Returns the sum of all the state elements, being the total number of blobs.
559 unsigned TotalOfStates() const;
560
561 void print() const {
562 this->print("");
563 }
564 void print(const char *msg) const;
565 // Prints the segmentation state with an introductory message.
566 void print_state(const char *msg) const;
567
568 // Displays the segmentation state of *this (if not the same as the last
569 // one displayed) and waits for a click in the window.
570 void DisplaySegmentation(TWERD *word);
571
572 WERD_CHOICE &operator+=( // concatanate
573 const WERD_CHOICE &second); // second on first
574
575 WERD_CHOICE &operator=(const WERD_CHOICE &source);
576
577private:
578 const UNICHARSET *unicharset_;
579 // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
580 // unichar_ids_ is an array of classifier "results" that make up a word.
581 // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
582 // of each unichar_id.
583 // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
584 // were put together to make the classification results in the ith position
585 // in unichar_ids_, and certainties_[i] is the certainty of the choice that
586 // was used in this word.
587 // == Change from before ==
588 // Previously there was fragment_lengths_ that allowed a word to be
589 // artificially composed of multiple fragment results. Since the new
590 // segmentation search doesn't do fragments, treatment of fragments has
591 // been moved to a lower level, augmenting the ratings matrix with the
592 // combined fragments, and allowing the language-model/segmentation-search
593 // to deal with only the combined unichar_ids.
594 std::vector<UNICHAR_ID> unichar_ids_; // unichar ids that represent the text of the word
595 std::vector<ScriptPos> script_pos_; // Normal/Sub/Superscript of each unichar.
596 std::vector<int> state_; // Number of blobs in each unichar.
597 std::vector<float> certainties_; // Certainty of each unichar.
598 unsigned reserved_; // size of the above arrays
599 unsigned length_; // word length
600 // Factor that was used to adjust the rating.
601 float adjust_factor_;
602 // Rating is the sum of the ratings of the individual blobs in the word.
603 float rating_; // size related
604 // certainty is the min (worst) certainty of the individual blobs in the word.
605 float certainty_; // absolute
606 // xheight computed from the result, or 0 if inconsistent.
607 float min_x_height_;
608 float max_x_height_;
609 uint8_t permuter_; // permuter code
610
611 // Normally, the ratings_ matrix represents the recognition results in order
612 // from left-to-right. However, some engines (say Cube) may return
613 // recognition results in the order of the script's major reading direction
614 // (for Arabic, that is right-to-left).
615 bool unichars_in_script_order_;
616 // True if NoDangerousAmbig found an ambiguity.
617 bool dangerous_ambig_found_;
618
619 // The following variables are populated and passed by reference any
620 // time unichar_string() or unichar_lengths() are called.
621 mutable std::string unichar_string_;
622 mutable std::string unichar_lengths_;
623};
624
625// Make WERD_CHOICE listable.
626ELISTIZEH(WERD_CHOICE)
627using BLOB_CHOICE_LIST_VECTOR = std::vector<BLOB_CHOICE_LIST *>;
628
629// Utilities for comparing WERD_CHOICEs
630
632
633// Utilities for debug printing.
634void print_ratings_list(const char *msg, // intro message
635 BLOB_CHOICE_LIST *ratings, // list of results
636 const UNICHARSET &current_unicharset // unicharset that can be used
637 // for id-to-unichar conversion
638);
639
640} // namespace tesseract
641
642#endif
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
#define ASSERT_HOST(x)
Definition: errcode.h:54
int value
STL namespace.
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:372
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ SP_SUBSCRIPT
Definition: ratngs.h:254
@ SP_DROPCAP
Definition: ratngs.h:254
@ SP_NORMAL
Definition: ratngs.h:254
@ SP_SUPERSCRIPT
Definition: ratngs.h:254
int UNICHAR_ID
Definition: unichar.h:34
@ UNICHAR_SPACE
Definition: unicharset.h:36
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:193
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:177
BlobChoiceClassifier
Definition: ratngs.h:48
@ BCC_AMBIG
Definition: ratngs.h:52
@ BCC_FAKE
Definition: ratngs.h:53
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:51
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:50
PermuterType
Definition: ratngs.h:235
@ UPPER_CASE_PERM
Definition: ratngs.h:240
@ NGRAM_PERM
Definition: ratngs.h:241
@ LOWER_CASE_PERM
Definition: ratngs.h:239
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:244
@ TOP_CHOICE_PERM
Definition: ratngs.h:238
@ NUMBER_PERM
Definition: ratngs.h:242
@ COMPOUND_PERM
Definition: ratngs.h:248
@ NO_PERM
Definition: ratngs.h:236
@ NUM_PERMUTER_TYPES
Definition: ratngs.h:250
@ PUNC_PERM
Definition: ratngs.h:237
@ USER_DAWG_PERM
Definition: ratngs.h:246
@ USER_PATTERN_PERM
Definition: ratngs.h:243
@ DOC_DAWG_PERM
Definition: ratngs.h:245
@ FREQ_DAWG_PERM
Definition: ratngs.h:247
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
Definition: ratngs.cpp:773
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
Definition: ratngs.h:627
const
Definition: upload.py:413
int16_t fontinfo_id2() const
Definition: ratngs.h:93
const std::vector< ScoredFont > & fonts() const
Definition: ratngs.h:97
void set_certainty(float newrat)
Definition: ratngs.h:150
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
float certainty() const
Definition: ratngs.h:87
UNICHAR_ID unichar_id() const
Definition: ratngs.h:81
int script_id() const
Definition: ratngs.h:118
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:160
void print_full() const
Definition: ratngs.h:180
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:172
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
void set_fonts(const std::vector< ScoredFont > &fonts)
Definition: ratngs.h:100
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:186
void set_script(int newscript_id)
Definition: ratngs.h:153
float min_xheight() const
Definition: ratngs.h:124
float yshift() const
Definition: ratngs.h:130
float max_xheight() const
Definition: ratngs.h:127
BlobChoiceClassifier classifier() const
Definition: ratngs.h:133
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:163
bool IsClassified() const
Definition: ratngs.h:139
bool IsAdapted() const
Definition: ratngs.h:136
int16_t fontinfo_id() const
Definition: ratngs.h:90
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:121
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:152
void set_rating(float newrat)
Definition: ratngs.h:147
float rating() const
Definition: ratngs.h:84
float max_x_height() const
Definition: ratngs.h:324
std::string debug_string() const
Definition: ratngs.h:479
float certainty() const
Definition: ratngs.h:315
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:509
float certainty(unsigned index) const
Definition: ratngs.h:318
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:351
void remove_unichar_id(unsigned index)
Definition: ratngs.h:458
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:327
void set_unichar_id(UNICHAR_ID unichar_id, unsigned index)
Definition: ratngs.h:344
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:263
bool unichars_in_script_order() const
Definition: ratngs.h:513
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299
bool empty() const
Definition: ratngs.h:284
uint8_t permuter() const
Definition: ratngs.h:331
static const float kBadRating
Definition: ratngs.h:260
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:266
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:419
bool dangerous_ambig_found() const
Definition: ratngs.h:348
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:269
void set_certainty(float new_val)
Definition: ratngs.h:357
void set_length(unsigned len)
Definition: ratngs.h:366
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:275
bool ContainsAnyNonSpaceDelimited() const
Definition: ratngs.h:488
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:428
void init(unsigned reserved)
Definition: ratngs.h:386
unsigned state(unsigned index) const
Definition: ratngs.h:303
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:372
void set_permuter(uint8_t perm)
Definition: ratngs.h:360
const UNICHARSET * unicharset() const
Definition: ratngs.h:281
float min_x_height() const
Definition: ratngs.h:321
const std::vector< UNICHAR_ID > & unichar_ids() const
Definition: ratngs.h:296
const std::string & unichar_string() const
Definition: ratngs.h:526
unsigned length() const
Definition: ratngs.h:287
void remove_last_unichar_id()
Definition: ratngs.h:455
void set_adjust_factor(float factor)
Definition: ratngs.h:293
void print() const
Definition: ratngs.h:561
const std::string & unichar_lengths() const
Definition: ratngs.h:533
std::string & unichar_string()
Definition: ratngs.h:519
float rating() const
Definition: ratngs.h:312
float adjust_factor() const
Definition: ratngs.h:290
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, unsigned index)
Definition: ratngs.h:437
void set_rating(float new_val)
Definition: ratngs.h:354
ScriptPos BlobPosition(unsigned index) const
Definition: ratngs.h:306
bool IsAllSpaces() const
Definition: ratngs.h:497
std::string debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:331
#define TESS_API
Definition: export.h:32