tesseract v5.3.3.20231005
|
#include <ratngs.h>
Public Member Functions | |
WERD_CHOICE (const UNICHARSET *unicharset) | |
WERD_CHOICE (const UNICHARSET *unicharset, int reserved) | |
WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset) | |
WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset) | |
WERD_CHOICE (const WERD_CHOICE &word) | |
~WERD_CHOICE () | |
const UNICHARSET * | unicharset () const |
bool | empty () const |
unsigned | length () const |
float | adjust_factor () const |
void | set_adjust_factor (float factor) |
const std::vector< UNICHAR_ID > & | unichar_ids () const |
UNICHAR_ID | unichar_id (unsigned index) const |
unsigned | state (unsigned index) const |
ScriptPos | BlobPosition (unsigned index) const |
float | rating () const |
float | certainty () const |
float | certainty (unsigned index) const |
float | min_x_height () const |
float | max_x_height () const |
void | set_x_heights (float min_height, float max_height) |
uint8_t | permuter () const |
const char * | permuter_name () const |
BLOB_CHOICE_LIST * | blob_choices (unsigned index, MATRIX *ratings) const |
MATRIX_COORD | MatrixCoord (unsigned index) const |
void | set_unichar_id (UNICHAR_ID unichar_id, unsigned index) |
bool | dangerous_ambig_found () const |
void | set_dangerous_ambig_found_ (bool value) |
void | set_rating (float new_val) |
void | set_certainty (float new_val) |
void | set_permuter (uint8_t perm) |
void | set_length (unsigned len) |
void | double_the_size () |
Make more space in unichar_id_ and fragment_lengths_ arrays. More... | |
void | init (unsigned reserved) |
void | init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter) |
void | make_bad () |
Set the fields in this choice to be default (bad) values. More... | |
void | append_unichar_id_space_allocated (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty) |
void | append_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty) |
void | set_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, unsigned index) |
void | set_blob_choice (unsigned index, int blob_count, const BLOB_CHOICE *blob_choice) |
bool | contains_unichar_id (UNICHAR_ID unichar_id) const |
void | remove_unichar_ids (unsigned index, int num) |
void | remove_last_unichar_id () |
void | remove_unichar_id (unsigned index) |
bool | has_rtl_unichar_id () const |
void | reverse_and_mirror_unichar_ids () |
void | punct_stripped (unsigned *start_core, unsigned *end_core) const |
void | GetNonSuperscriptSpan (int *start, int *end) const |
WERD_CHOICE | shallow_copy (unsigned start, unsigned end) const |
void | string_and_lengths (std::string *word_str, std::string *word_lengths_str) const |
std::string | debug_string () const |
bool | ContainsAnyNonSpaceDelimited () const |
bool | IsAllSpaces () const |
bool | set_unichars_in_script_order (bool in_script_order) |
bool | unichars_in_script_order () const |
std::string & | unichar_string () |
const std::string & | unichar_string () const |
const std::string & | unichar_lengths () const |
void | SetScriptPositions (bool small_caps, TWERD *word, int debug=0) |
void | SetAllScriptPositions (ScriptPos position) |
int | GetTopScriptID () const |
void | UpdateStateForSplit (int blob_position) |
unsigned | TotalOfStates () const |
void | print () const |
void | print (const char *msg) const |
void | print_state (const char *msg) const |
void | DisplaySegmentation (TWERD *word) |
WERD_CHOICE & | operator+= (const WERD_CHOICE &second) |
WERD_CHOICE & | operator= (const WERD_CHOICE &source) |
![]() | |
ELIST_LINK () | |
ELIST_LINK (const ELIST_LINK &) | |
void | operator= (const ELIST_LINK &) |
Static Public Member Functions | |
static const char * | permuter_name (uint8_t permuter) |
static ScriptPos | ScriptPositionOf (bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id) |
Static Public Attributes | |
static const float | kBadRating = 100000.0 |
|
inline |
|
inline |
|
inline |
Definition at line 269 of file ratngs.h.
tesseract::WERD_CHOICE::WERD_CHOICE | ( | const char * | src_string, |
const UNICHARSET & | unicharset | ||
) |
Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not nullptr.
Definition at line 213 of file ratngs.cpp.
|
inline |
Definition at line 275 of file ratngs.h.
|
default |
|
inline |
void tesseract::WERD_CHOICE::append_unichar_id | ( | UNICHAR_ID | unichar_id, |
int | blob_count, | ||
float | rating, | ||
float | certainty | ||
) |
append_unichar_id
Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().
Definition at line 447 of file ratngs.cpp.
|
inline |
This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().
Definition at line 428 of file ratngs.h.
BLOB_CHOICE_LIST * tesseract::WERD_CHOICE::blob_choices | ( | unsigned | index, |
MATRIX * | ratings | ||
) | const |
Definition at line 274 of file ratngs.cpp.
|
inline |
|
inline |
|
inline |
bool tesseract::WERD_CHOICE::contains_unichar_id | ( | UNICHAR_ID | unichar_id | ) | const |
contains_unichar_id
Returns true if unichar_ids_ contain the given unichar_id, false otherwise.
Definition at line 309 of file ratngs.cpp.
|
inline |
Definition at line 488 of file ratngs.h.
|
inline |
|
inline |
Definition at line 479 of file ratngs.h.
void tesseract::WERD_CHOICE::DisplaySegmentation | ( | TWERD * | word | ) |
Definition at line 728 of file ratngs.cpp.
|
inline |
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition at line 372 of file ratngs.h.
|
inline |
void tesseract::WERD_CHOICE::GetNonSuperscriptSpan | ( | int * | start, |
int * | end | ||
) | const |
Definition at line 378 of file ratngs.cpp.
int tesseract::WERD_CHOICE::GetTopScriptID | ( | ) | const |
Definition at line 631 of file ratngs.cpp.
bool tesseract::WERD_CHOICE::has_rtl_unichar_id | ( | ) | const |
has_rtl_unichar_id
Returns true if unichar_ids contain at least one "strongly" RTL unichar.
Definition at line 411 of file ratngs.cpp.
void tesseract::WERD_CHOICE::init | ( | const char * | src_string, |
const char * | src_lengths, | ||
float | src_rating, | ||
float | src_certainty, | ||
uint8_t | src_permuter | ||
) |
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not nullptr. src_lengths argument could be nullptr, in which case the unichars in src_string are assumed to all be of length 1.
Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.
The function assumes that src_string is not nullptr. src_lengths argument could be nullptr, in which case the unichars in src_string are assumed to all be of length 1.
Definition at line 238 of file ratngs.cpp.
|
inline |
Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.
Definition at line 386 of file ratngs.h.
|
inline |
|
inline |
|
inline |
Set the fields in this choice to be default (bad) values.
MATRIX_COORD tesseract::WERD_CHOICE::MatrixCoord | ( | unsigned | index | ) | const |
|
inline |
|
inline |
WERD_CHOICE & tesseract::WERD_CHOICE::operator+= | ( | const WERD_CHOICE & | second | ) |
Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM
Definition at line 462 of file ratngs.cpp.
WERD_CHOICE & tesseract::WERD_CHOICE::operator= | ( | const WERD_CHOICE & | source | ) |
Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.
Definition at line 499 of file ratngs.cpp.
|
inline |
const char * tesseract::WERD_CHOICE::permuter_name | ( | ) | const |
Definition at line 267 of file ratngs.cpp.
|
static |
Definition at line 189 of file ratngs.cpp.
|
inline |
void tesseract::WERD_CHOICE::print | ( | const char * | msg | ) | const |
Print WERD_CHOICE to stdout.
Definition at line 689 of file ratngs.cpp.
void tesseract::WERD_CHOICE::print_state | ( | const char * | msg | ) | const |
void tesseract::WERD_CHOICE::punct_stripped | ( | unsigned * | start, |
unsigned * | end | ||
) | const |
punct_stripped
Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word – the part after stripping punctuation from the left and right.
Definition at line 367 of file ratngs.cpp.
|
inline |
|
inline |
|
inline |
Definition at line 458 of file ratngs.h.
void tesseract::WERD_CHOICE::remove_unichar_ids | ( | unsigned | start, |
int | num | ||
) |
remove_unichar_ids
Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.
Definition at line 325 of file ratngs.cpp.
void tesseract::WERD_CHOICE::reverse_and_mirror_unichar_ids | ( | ) |
reverse_and_mirror_unichar_ids
Reverses and mirrors unichars in unichar_ids.
Definition at line 349 of file ratngs.cpp.
|
static |
Definition at line 599 of file ratngs.cpp.
|
inline |
void tesseract::WERD_CHOICE::set_blob_choice | ( | unsigned | index, |
int | blob_count, | ||
const BLOB_CHOICE * | blob_choice | ||
) |
Definition at line 297 of file ratngs.cpp.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
void tesseract::WERD_CHOICE::SetAllScriptPositions | ( | tesseract::ScriptPos | position | ) |
void tesseract::WERD_CHOICE::SetScriptPositions | ( | bool | small_caps, |
TWERD * | word, | ||
int | debug = 0 |
||
) |
Definition at line 528 of file ratngs.cpp.
WERD_CHOICE tesseract::WERD_CHOICE::shallow_copy | ( | unsigned | start, |
unsigned | end | ||
) | const |
Definition at line 393 of file ratngs.cpp.
|
inline |
void tesseract::WERD_CHOICE::string_and_lengths | ( | std::string * | word_str, |
std::string * | word_lengths_str | ||
) | const |
string_and_lengths
Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.
Definition at line 427 of file ratngs.cpp.
unsigned tesseract::WERD_CHOICE::TotalOfStates | ( | ) | const |
|
inline |
|
inline |
|
inline |
Definition at line 533 of file ratngs.h.
|
inline |
Definition at line 519 of file ratngs.h.
|
inline |
Definition at line 526 of file ratngs.h.
|
inline |
|
inline |
void tesseract::WERD_CHOICE::UpdateStateForSplit | ( | int | blob_position | ) |
|
static |