21#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_H_
22#define TESSERACT_WORDREC_LANGUAGE_MODEL_H_
77 float rating_cert_scale);
91 bool UpdateState(
bool just_classified,
int curr_col,
int curr_row, BLOB_CHOICE_LIST *curr_list,
109 if (language_model_use_sigmoidal_certainty) {
113 cert = -cert /
dict_->certainty_scale;
114 return 1.0f / (1.0f + exp(10.0f * cert));
116 return (-1.0f / cert);
121 if (num_problems == 0) {
124 if (num_problems == 1) {
127 return (penalty + (language_model_penalty_increment *
static_cast<float>(num_problems - 1)));
136 if (dawg_info !=
nullptr) {
138 language_model_penalty_case) +
144 language_model_penalty_chartype) +
146 language_model_penalty_spacing) +
176 ViterbiStateEntry_IT *vse_it,
213 int curr_col,
int curr_row,
float outline_length,
223 float ComputeNgramCost(
const char *unichar,
float certainty,
float denom,
const char *context,
224 int *unichar_step_len,
bool *found_small_prob,
float *ngram_prob);
261 col, row, (parent_vse !=
nullptr) ? &(parent_vse->
associate_stats) :
nullptr,
262 (parent_vse !=
nullptr) ? parent_vse->
length : 0,
fixed_pitch_, max_char_wh_ratio, word_res,
263 language_model_debug_level > 2, associate_stats);
295 INT_VAR_H(language_model_viterbi_list_max_num_prunable);
unsigned char LanguageModelFlagsType
Used for expressing various language model flags.
std::vector< DANGERR_INFO > DANGERR
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
BOOL_VAR_H(language_model_ngram_space_delimited_language)
void SetAcceptableChoiceFound(bool val)
LanguageModelDawgInfo * GenerateDawgInfo(bool word_end, int curr_col, int curr_row, const BLOB_CHOICE &b, const ViterbiStateEntry *parent_vse)
BOOL_VAR_H(language_model_use_sigmoidal_certainty)
INT_VAR_H(language_model_viterbi_list_max_num_prunable)
DawgPositionVector beginning_active_dawgs_
bool PrunablePath(const ViterbiStateEntry &vse)
static const LanguageModelFlagsType kXhtConsistentFlag
INT_VAR_H(language_model_ngram_order)
INT_VAR_H(language_model_viterbi_list_max_size)
double_VAR_H(language_model_penalty_font)
float ComputeAdjustment(int num_problems, float penalty)
static const LanguageModelFlagsType kSmallestRatingFlag
double_VAR_H(language_model_penalty_case)
static void ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float features[])
ParamsModel params_model_
static const LanguageModelFlagsType kDigitFlag
double_VAR_H(language_model_penalty_non_freq_dict_word)
bool AcceptableChoiceFound()
float ComputeNgramCost(const char *unichar, float certainty, float denom, const char *context, int *unichar_step_len, bool *found_small_prob, float *ngram_prob)
bool AcceptablePath(const ViterbiStateEntry &vse)
void UpdateBestChoice(ViterbiStateEntry *vse, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
INT_VAR_H(wordrec_display_segmentations)
int prev_word_unichar_step_len_
ParamsModel & getParamsModel()
double_VAR_H(language_model_ngram_scale_factor)
bool GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, BLOB_CHOICE **first_lower, BLOB_CHOICE **first_upper, BLOB_CHOICE **first_digit) const
static const LanguageModelFlagsType kLowerCaseFlag
bool AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags, float denom, bool word_end, int curr_col, int curr_row, BLOB_CHOICE *b, LanguageModelState *curr_state, ViterbiStateEntry *parent_vse, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
double_VAR_H(language_model_penalty_increment)
ViterbiStateEntry * GetNextParentVSE(bool just_classified, bool mixed_alnum, const BLOB_CHOICE *bc, LanguageModelFlagsType blob_choice_flags, const UNICHARSET &unicharset, WERD_RES *word_res, ViterbiStateEntry_IT *vse_it, LanguageModelFlagsType *top_choice_flags) const
WERD_CHOICE * ConstructWord(ViterbiStateEntry *vse, WERD_RES *word_res, DANGERR *fixpt, BlamerBundle *blamer_bundle, bool *truth_path)
float ComputeDenom(BLOB_CHOICE_LIST *curr_list)
bool correct_segmentation_explored_
static const float kMaxAvgNgramCost
float ComputeConsistencyAdjustment(const LanguageModelDawgInfo *dawg_info, const LMConsistencyInfo &consistency_info)
LanguageModelNgramInfo * GenerateNgramInfo(const char *unichar, float certainty, float denom, int curr_col, int curr_row, float outline_length, const ViterbiStateEntry *parent_vse)
double_VAR_H(language_model_ngram_small_prob)
double_VAR_H(language_model_penalty_punc)
BOOL_VAR_H(language_model_ngram_use_only_first_uft8_step)
double_VAR_H(language_model_ngram_rating_factor)
bool UpdateState(bool just_classified, int curr_col, int curr_row, BLOB_CHOICE_LIST *curr_list, LanguageModelState *parent_node, LMPainPoints *pain_points, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
double_VAR_H(language_model_penalty_chartype)
float ComputeAdjustedPathCost(ViterbiStateEntry *vse)
int SetTopParentLowerUpperDigit(LanguageModelState *parent_node) const
std::string prev_word_str_
BOOL_VAR_H(language_model_ngram_on)
DawgPositionVector very_beginning_active_dawgs_
static const LanguageModelFlagsType kUpperCaseFlag
double_VAR_H(language_model_penalty_non_dict_word)
void ComputeAssociateStats(int col, int row, float max_char_wh_ratio, ViterbiStateEntry *parent_vse, WERD_RES *word_res, AssociateStats *associate_stats)
void FillConsistencyInfo(int curr_col, bool word_end, BLOB_CHOICE *b, ViterbiStateEntry *parent_vse, WERD_RES *word_res, LMConsistencyInfo *consistency_info)
void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, const ViterbiStateEntry *parent_vse, LanguageModelState *lms)
double_VAR_H(language_model_penalty_script)
float CertaintyScore(float cert)
void InitForWord(const WERD_CHOICE *prev_word, bool fixed_pitch, float max_char_wh_ratio, float rating_cert_scale)
const UnicityTable< FontInfo > * fontinfo_table_
double_VAR_H(language_model_penalty_spacing)
double_VAR_H(language_model_ngram_nonmatch_score)
INT_VAR_H(language_model_min_compound_length)
bool acceptable_choice_found_
LanguageModel(const UnicityTable< FontInfo > *fontinfo_table, Dict *dict)
INT_VAR_H(language_model_debug_level)
int NumInconsistentSpaces() const
int NumInconsistentPunc() const
int NumInconsistentCase() const
int NumInconsistentChartype() const
LanguageModelDawgInfo * dawg_info
AssociateStats associate_stats
character widths/gaps/seams
int length
number of characters on the path
LanguageModelNgramInfo * ngram_info
LanguageModelFlagsType top_choice_flags
Struct to store information maintained by various language model components.
Bundle together all the things pertaining to the best choice/state.