98 prev_word_best_choice =
nullptr;
99 blame_reasons.clear();
108 BLOCK_LIST *block_list,
152 ROW_RES(
bool merge_similar_words,
ROW *the_row);
219 std::vector<std::vector<std::pair<const char *, float>>>
timesteps;
221 std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
226 bool leading_space =
false;
293 bool tess_failed =
false;
301 bool tess_accepted =
false;
302 bool tess_would_adapt =
false;
304 bool small_caps =
false;
305 bool odd_size =
false;
309 int8_t fontinfo_id_count = 0;
310 int8_t fontinfo_id2_count = 0;
311 bool guessed_x_ht =
true;
312 bool guessed_caps_ht =
true;
314 float x_height = 0.0f;
315 float caps_height = 0.0f;
316 float baseline_shift = 0.0f;
319 float space_certainty = 0.0f;
337 bool combination =
false;
338 bool part_of_combo =
false;
339 bool reject_spaces =
false;
361 const char *
BestUTF8(
unsigned blob_index,
bool in_rtl_context)
const {
362 if (best_choice ==
nullptr || blob_index >= best_choice->
length()) {
366 if (
static_cast<unsigned>(
id) >= uch_set->
size()) {
370 if (in_rtl_context && mirrored > 0) {
376 const char *
RawUTF8(
unsigned blob_index)
const {
377 if (blob_index >= raw_choice->
length()) {
381 if (
static_cast<unsigned>(
id) >= uch_set->
size()) {
388 if (best_choice ==
nullptr || blob_index >= best_choice->
length()) {
395 if (uch_set ==
nullptr || best_choice ==
nullptr ||
396 best_choice->
length() < 1) {
399 for (
unsigned id = 0;
id < best_choice->
length();
id++) {
400 unsigned unichar_id = best_choice->
unichar_id(
id);
401 if (unichar_id >= uch_set->
size()) {
414 if (uch_set ==
nullptr || best_choice ==
nullptr ||
415 best_choice->
length() < 1) {
418 for (
unsigned id = 0;
id < best_choice->
length();
id++) {
419 unsigned unichar_id = best_choice->
unichar_id(
id);
420 if (unichar_id >= uch_set->
size()) {
441 void ClearWordChoices();
448 void CopySimpleFields(
const WERD_RES &source);
454 void InitForRetryRecognition(
const WERD_RES &source);
471 bool SetupForRecognition(
const UNICHARSET &unicharset_in,
473 int norm_mode,
const TBOX *norm_box,
474 bool numeric_mode,
bool use_body_size,
475 bool allow_detailed_fx,
ROW *row,
481 void SetupBasicsFromChoppedWord(
const UNICHARSET &unicharset_in);
488 void SetupWordScript(
const UNICHARSET &unicharset_in);
491 void SetupBlamerBundle();
494 void SetupBlobWidthsAndGaps();
499 void InsertSeam(
int blob_number,
SEAM *seam);
503 bool AlternativeChoiceAdjustmentsWorseThan(
float threshold)
const;
511 bool StatesAllValid();
515 void DebugWordChoices(
bool debug,
const char *word_to_debug);
518 void DebugTopChoice(
const char *msg)
const;
522 void FilterWordChoices(
int debug_level);
539 void ComputeAdaptionThresholds(
float certainty_scale,
float min_rating,
540 float max_rating,
float rating_margin,
552 bool LogNewCookedChoice(
int max_num_choices,
bool debug,
556 void PrintBestChoices()
const;
560 int GetBlobsWidth(
int start_blob,
int last_blob)
const;
562 int GetBlobsGap(
unsigned blob_index)
const;
573 BLOB_CHOICE_LIST *GetBlobChoices(
int index)
const;
583 void ConsumeWordResults(
WERD_RES *word);
591 void RebuildBestState();
595 void CloneChoppedToRebuild();
602 void SetScriptPositions();
614 void FakeClassifyWord(
unsigned blob_count,
BLOB_CHOICE **choices);
621 void BestChoiceToCorrectText();
628 bool ConditionalBlobMerge(
630 const std::function<
bool(
const TBOX &,
const TBOX &)> &box_cb);
634 void MergeAdjacentBlobs(
unsigned index);
646 bool HyphenBoxesOverlap(
const TBOX &box1,
const TBOX &box2);
652 void merge_tess_fails();
675 bool PiecesAllNatural(
int start,
int count)
const;
689 page_res = the_page_res;
696 return word_res == other.word_res && row_res == other.row_res &&
697 block_res == other.block_res;
701 return !(*
this == other);
711 return start_page(
false);
714 return start_page(
true);
716 WERD_RES *start_page(
bool empty_ok);
737 void DeleteCurrentWord();
741 void MakeCurrentWordFuzzy();
744 return internal_forward(
false,
false);
748 return internal_forward(
false,
true);
755 return prev_word_res;
761 return prev_block_res;
773 return next_word_res;
779 return next_block_res;
781 void rej_stat_word();
782 void ResetWordIterator();
785 WERD_RES *internal_forward(
bool new_block,
bool empty_ok);
799 BLOCK_RES_IT block_res_it;
800 ROW_RES_IT row_res_it;
801 WERD_RES_IT word_res_it;
804 WERD_RES_IT wr_it_of_current_word;
805 WERD_RES_IT wr_it_of_next_word;
#define ELISTIZEH(CLASSNAME)
#define CLISTIZEH(CLASSNAME)
MATRIX * DeepCopy() const
BLOCK_RES_LIST block_res_list
std::vector< std::string > misadaption_log
WERD_CHOICE ** prev_word_best_choice
std::vector< int > blame_reasons
ROW_RES_LIST row_res_list
WERD_RES_LIST word_res_list
int32_t whole_word_rej_count
void copy_on(WERD_RES *word_res)
bool AnyRtlCharsInWord() const
std::vector< std::string > correct_text
std::vector< std::vector< std::pair< const char *, float > > > timesteps
const char * RawUTF8(unsigned blob_index) const
const char * BestUTF8(unsigned blob_index, bool in_rtl_context) const
WERD_CHOICE_LIST best_choices
bool UnicharsInReadingOrder() const
std::vector< int > best_state
WERD_RES(const WERD_RES &source)
std::vector< int > blob_widths
std::vector< int > blob_gaps
static WERD_RES * deep_copy(const WERD_RES *src)
std::vector< std::vector< std::pair< const char *, float > > > CTC_symbol_choices
UNICHARSET::Direction SymbolDirection(unsigned blob_index) const
std::vector< SEAM * > seam_array
bool AnyLtrCharsInWord() const
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmented_timesteps
BLOCK_RES * block() const
BLOCK_RES * next_block() const
WERD_RES * prev_word() const
bool operator==(const PAGE_RES_IT &other) const
BLOCK_RES * prev_block() const
WERD_RES * next_word() const
bool operator!=(const PAGE_RES_IT &other) const
WERD_RES * restart_page()
PAGE_RES_IT(PAGE_RES *the_page_res)
WERD_RES * forward_with_empties()
WERD_RES * restart_page_with_empties()
ROW_RES * next_row() const
ROW_RES * prev_row() const
bool unichars_in_script_order() const
UNICHAR_ID unichar_id(unsigned index) const
bool flag(WERD_FLAGS mask) const
void set_flag(WERD_FLAGS mask, bool value)
void copy_on(WERD *other)
Direction get_direction(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
const char * id_to_unichar_ext(UNICHAR_ID id) const