35 LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
36 assume_fixed_pitch_char_segment, &
getDict(), segsearch_debug_level);
39 float rating_cert_scale = -1.0 *
getDict().certainty_scale / rating_scale;
40 std::vector<SegSearchPending> pending;
41 InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle, blamer_bundle);
46 &pain_points, &pending);
59 float pain_point_priority;
60 int num_futile_classifications = 0;
61 std::string blamer_debug;
62 while (wordrec_enable_assoc &&
66 bool found_nothing =
true;
68 while ((pp_type = pain_points.
Deque(&pain_point, &pain_point_priority)) !=
LM_PPTYPE_NUM) {
74 found_nothing =
false;
79 if (segsearch_debug_level > 0) {
80 tprintf(
"Pain points queue is empty\n");
86 &pain_points, blamer_bundle);
89 best_choice_bundle, blamer_bundle);
90 if (!best_choice_bundle->
updated) {
91 ++num_futile_classifications;
94 if (segsearch_debug_level > 0) {
95 tprintf(
"num_futile_classifications %d\n", num_futile_classifications);
98 best_choice_bundle->
updated =
false;
102 if (
SegSearchDone(num_futile_classifications) && blamer_bundle !=
nullptr &&
107 if (blamer_bundle !=
nullptr) {
111 if (segsearch_debug_level > 0) {
112 tprintf(
"Done with SegSearch (AcceptableChoiceFound: %d)\n",
121 std::vector<SegSearchPending> *pending,
123 if (segsearch_debug_level > 0) {
124 tprintf(
"Starting SegSearch on ratings matrix%s:\n",
125 wordrec_enable_assoc ?
" (with assoc)" :
"");
133 float rating_cert_scale = -1.0 *
getDict().certainty_scale / rating_scale;
136 segsearch_max_char_wh_ratio, rating_cert_scale);
142 if (blamer_bundle !=
nullptr) {
157 (*pending)[0].SetColumnClassified();
163 std::vector<SegSearchPending> *pending,
WERD_RES *word_res,
169 for (
int col = starting_col; col < ratings->
dimension(); ++col) {
170 if (!(*pending)[col].WorkToDo()) {
175 if ((*pending)[col].SingleRow() >= 0) {
176 first_row = last_row = (*pending)[col].SingleRow();
178 if (segsearch_debug_level > 0) {
179 tprintf(
"\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", col, first_row,
180 last_row, (*pending)[col].IsRowJustClassified(INT32_MAX));
183 for (
int row = first_row; row <= last_row; ++row) {
185 BLOB_CHOICE_LIST *current_node = ratings->
get(col, row);
187 if (current_node !=
nullptr &&
188 language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row,
189 current_node, parent_node, pain_points, word_res,
190 best_choice_bundle, blamer_bundle) &&
194 (*pending)[row + 1].RevisitWholeColumn();
195 if (segsearch_debug_level > 0) {
196 tprintf(
"Added child col=%d to pending\n", row + 1);
201 if (best_choice_bundle->
best_vse !=
nullptr) {
205 if (!best_choice_bundle->
fixpt.empty()) {
213 for (
unsigned col = 0; col < pending->size(); ++col) {
214 (*pending)[col].Clear();
215 ViterbiStateEntry_IT vse_it(&best_choice_bundle->
beam[col]->viterbi_state_entries);
216 for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
217 vse_it.data()->updated =
false;
223 const char *pain_point_type,
224 std::vector<SegSearchPending> *pending,
227 if (segsearch_debug_level > 0) {
228 tprintf(
"Classifying pain point %s priority=%.4f, col=%d, row=%d\n", pain_point_type,
229 pain_point_priority, pain_point.
col, pain_point.
row);
234 if (!pain_point.
Valid(*ratings)) {
238 BLOB_CHOICE_LIST *classified =
241 BLOB_CHOICE_LIST *lst = ratings->
get(pain_point.
col, pain_point.
row);
242 if (lst ==
nullptr) {
243 ratings->
put(pain_point.
col, pain_point.
row, classified);
249 BLOB_CHOICE_IT it(lst);
250 it.add_list_before(classified);
252 classified =
nullptr;
255 if (segsearch_debug_level > 0) {
263 if (classified !=
nullptr && !classified->empty()) {
264 if (pain_point.
col > 0) {
266 segsearch_max_char_wh_ratio, word_res);
270 segsearch_max_char_wh_ratio, word_res);
273 (*pending)[pain_point.
col].SetBlobClassified(pain_point.
row);
280 std::vector<SegSearchPending> &pending) {
283 for (
auto &col : best_choice_bundle->
beam) {
288 best_choice_bundle->
best_vse =
nullptr;
290 pending[0].SetColumnClassified();
291 for (
auto &data : pending) {
297 BlamerBundle *blamer_bundle, std::string &blamer_debug) {
298 pain_points->
Clear();
300 wordrec_debug_blamer, blamer_debug, pain_points,
301 segsearch_max_char_wh_ratio, word_res);
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
void tprintf(const char *format,...)
void put(ICOORD pos, const T &thing)
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const
bool GuidedSegsearchStillGoing() const
void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str)
void SetChopperBlame(const WERD_RES *word, bool debug)
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
void SetupCorrectSegmentation(const TWERD *word, bool debug)
bool Classified(int col, int row, int wildcard_id) const
void IncreaseBandSize(int bandwidth)
void print(const UNICHARSET &unicharset) const
bool Valid(const MATRIX &m) const
WERD_CHOICE * best_choice
std::vector< SEAM * > seam_array
static void PrintSeams(const char *label, const std::vector< SEAM * > &seams)
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)
void GenerateInitial(WERD_RES *word_res)
void GenerateFromPath(float rating_cert_scale, ViterbiStateEntry *vse, WERD_RES *word_res)
static const char * PainPointDescription(LMPainPointsType type)
void GenerateFromAmbigs(const DANGERR &fixpt, ViterbiStateEntry *vse, WERD_RES *word_res)
LMPainPointsType Deque(MATRIX_COORD *pp, float *priority)
bool updated
set to true if the entry has just been created/updated
Struct to store information maintained by various language model components.
Bundle together all the things pertaining to the best choice/state.
std::vector< LanguageModelState * > beam
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
ViterbiStateEntry * best_vse
Best ViterbiStateEntry and BLOB_CHOICE.
bool updated
Flag to indicate whether anything was changed.
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, std::string &blamer_debug)
WERD_CHOICE * prev_word_best_choice_
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
bool SegSearchDone(int num_futile_classifications)
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
std::unique_ptr< LanguageModel > language_model_