19#ifndef TESSERACT_WORDREC_WORDREC_H_
20#define TESSERACT_WORDREC_WORDREC_H_
23# include "config_auto.h"
26#ifdef DISABLED_LEGACY_ENGINE
41class TESS_API Wordrec :
public Classify {
50 virtual ~Wordrec() =
default;
53 void program_editup(
const std::string &textbase, TessdataManager *init_classifier,
54 TessdataManager *init_dict);
55 void program_editdown(int32_t elasped_time);
57 int dict_word(
const WERD_CHOICE &word);
60 WERD_CHOICE *prev_word_best_choice_;
95struct BestChoiceBundle;
114 : classified_row_(-1), revisit_whole_column_(false), column_classified_(false) {}
119 column_classified_ =
true;
125 classified_row_ = row;
131 revisit_whole_column_ =
true;
136 classified_row_ = -1;
137 revisit_whole_column_ =
false;
138 column_classified_ =
false;
144 return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
148 return row == classified_row_ || column_classified_;
152 return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
164 bool revisit_whole_column_;
168 bool column_classified_;
242 (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
246 void program_editup(
const std::string &textbase,
TessdataManager *init_classifier,
249 void program_editdown(int32_t elasped_time);
253 BLOB_CHOICE_LIST *call_matcher(
TBLOB *blob);
318 std::vector<SegSearchPending> *pending,
326 bool is_inside_angle(
EDGEPT *pt);
328 EDGEPT *pick_close_point(
EDGEPT *critical_point,
EDGEPT *vertical_point,
int *best_dist);
332 void vertical_projection_point(
EDGEPT *split_point,
EDGEPT *target_point,
EDGEPT **best_point,
333 EDGEPT_CLIST *new_points);
336 SEAM *attempt_blob_chop(
TWERD *word,
TBLOB *blob, int32_t blob_number,
bool italic_blob,
337 const std::vector<SEAM *> &seams);
338 SEAM *chop_numbered_blob(
TWERD *word, int32_t blob_number,
bool italic_blob,
339 const std::vector<SEAM *> &seams);
340 SEAM *chop_overlapping_blob(
const std::vector<TBOX> &boxes,
bool italic_blob,
WERD_RES *word_res,
341 unsigned *blob_number);
342 SEAM *improve_one_blob(
const std::vector<BLOB_CHOICE *> &blob_choices,
DANGERR *fixpt,
343 bool split_next_to_fragment,
bool italic_blob,
WERD_RES *word,
344 unsigned *blob_number);
345 SEAM *chop_one_blob(
const std::vector<TBOX> &boxes,
346 const std::vector<BLOB_CHOICE *> &blob_choices,
WERD_RES *word_res,
347 unsigned *blob_number);
348 void chop_word_main(
WERD_RES *word);
349 void improve_by_chopping(
float rating_cert_scale,
WERD_RES *word,
351 LMPainPoints *pain_points, std::vector<SegSearchPending> *pending);
352 int select_blob_to_split(
const std::vector<BLOB_CHOICE *> &blob_choices,
float rating_ceiling,
353 bool split_next_to_fragment);
354 int select_blob_to_split_from_fixpt(
DANGERR *fixpt);
357 void add_seam_to_queue(
float new_priority,
SEAM *new_seam,
SeamQueue *seams);
376 virtual BLOB_CHOICE_LIST *classify_piece(
const std::vector<SEAM *> &seams, int16_t start,
377 int16_t end,
const char *description,
TWERD *word,
390 void (
Wordrec::*fill_lattice_)(
const MATRIX &ratings,
const WERD_CHOICE_LIST &best_choices,
395 return (language_model_->AcceptableChoiceFound() ||
396 num_futile_classifications >= segsearch_max_futile_classifications);
424 void UpdateSegSearchNodes(
float rating_cert_scale,
int starting_col,
425 std::vector<SegSearchPending> *pending,
WERD_RES *word_res,
431 void ProcessSegSearchPainPoint(
float pain_point_priority,
const MATRIX_COORD &pain_point,
432 const char *pain_point_type,
433 std::vector<SegSearchPending> *pending,
WERD_RES *word_res,
439 std::vector<SegSearchPending> &pending);
445 BlamerBundle *blamer_bundle, std::string &blamer_debug);
#define ELISTIZEH(CLASSNAME)
BOOL_VAR_H(wordrec_display_splits)
std::vector< DANGERR_INFO > DANGERR
const std::vector< std::string > split(const std::string &s, char c)
Bundle together all the things pertaining to the best choice/state.
void SetBlobClassified(int row)
bool IsRowJustClassified(int row) const
void SetColumnClassified()
void RevisitWholeColumn()
FRAGMENT(EDGEPT *head_pt, EDGEPT *tail_pt)
BOOL_VAR_H(assume_fixed_pitch_char_segment)
INT_VAR_H(segsearch_debug_level)
WERD_CHOICE * prev_word_best_choice_
INT_VAR_H(wordrec_debug_level)
double_VAR_H(chop_width_change_knob)
BOOL_VAR_H(merge_fragments_in_matrix)
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
INT_VAR_H(segsearch_max_futile_classifications)
INT_VAR_H(chop_inside_angle)
INT_VAR_H(chop_same_distance)
BOOL_VAR_H(wordrec_skip_no_truth_words)
INT_VAR_H(chop_seam_pile_size)
void SaveAltChoices(const LIST &best_choices, WERD_RES *word)
BOOL_VAR_H(wordrec_debug_blamer)
double_VAR_H(chop_center_knob)
INT_VAR_H(chop_split_length)
void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
INT_VAR_H(repair_unchopped_blobs)
INT_VAR_H(chop_x_y_weight)
BOOL_VAR_H(wordrec_run_blamer)
BOOL_VAR_H(force_word_assoc)
double_VAR_H(chop_ok_split)
double_VAR_H(chop_sharpness_knob)
bool SegSearchDone(int num_futile_classifications)
double_VAR_H(segsearch_max_char_wh_ratio)
INT_VAR_H(chop_min_outline_area)
double_VAR_H(chop_good_split)
INT_VAR_H(segsearch_max_pain_points)
BOOL_VAR_H(chop_vertical_creep)
BOOL_VAR_H(wordrec_enable_assoc)
double_VAR_H(tessedit_certainty_threshold)
~Wordrec() override=default
BOOL_VAR_H(save_alt_choices)
BOOL_VAR_H(chop_new_seam_pile)
std::unique_ptr< LanguageModel > language_model_
INT_VAR_H(chop_centered_maxwidth)
double_VAR_H(chop_overlap_knob)
double_VAR_H(chop_split_dist_knob)
INT_VAR_H(wordrec_max_join_chunks)
INT_VAR_H(chop_min_outline_points)