20#ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
21#define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
96 const char *debug_uch)
112 debug_str = (debug_uch ==
nullptr) ?
nullptr :
new std::string();
121 if (debug_uch !=
nullptr) {
125 if (
debug_str !=
nullptr && debug_uch !=
nullptr) {
136 static int Compare(
const void *e1,
const void *e2) {
139 return (ve1->
cost < ve2->
cost) ? -1 : 1;
159 void Print(
const char *msg)
const;
206 : viterbi_state_entries_prunable_length(0)
207 , viterbi_state_entries_prunable_max_cost(FLT_MAX)
208 , viterbi_state_entries_length(0) {}
214 void Print(
const char *msg);
228 beam.reserve(matrix_dimension);
229 for (
int i = 0;
i < matrix_dimension; ++
i) {
234 for (
auto &state :
beam) {
246 std::vector<LanguageModelState *>
beam;
#define ELISTIZEH(CLASSNAME)
unsigned char LanguageModelFlagsType
Used for expressing various language model flags.
std::vector< DANGERR_INFO > DANGERR
std::string Print(const T &value)
UNICHAR_ID unichar_id() const
bool get_isalpha(UNICHAR_ID unichar_id) const
bool get_isdigit(UNICHAR_ID unichar_id) const
int NumInconsistentCase() const
LanguageModelDawgInfo(const DawgPositionVector *a, PermuterType pt)
DawgPositionVector active_dawgs
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
int context_unichar_step_len
float ngram_cost
-ln(P_ngram_model(path))
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
LanguageModelDawgInfo * dawg_info
float outline_length
length of the outline so far
BLOB_CHOICE * curr_b
Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
AssociateStats associate_stats
character widths/gaps/seams
ViterbiStateEntry * competing_vse
int length
number of characters on the path
void Print(const char *msg) const
ViterbiStateEntry * parent_vse
LanguageModelNgramInfo * ngram_info
LanguageModelFlagsType top_choice_flags
int adapted
number of BLOB_CHOICES from adapted templates
static int Compare(const void *e1, const void *e2)
float ratings_sum
sum of ratings of character on the path
bool updated
set to true if the entry has just been created/updated
LMConsistencyInfo consistency_info
path consistency info
ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
float min_certainty
minimum certainty on the path
bool HasAlnumChoice(const UNICHARSET &unicharset)
Struct to store information maintained by various language model components.
float viterbi_state_entries_prunable_max_cost
~LanguageModelState()=default
int viterbi_state_entries_length
Total number of entries in viterbi_state_entries.
int viterbi_state_entries_prunable_length
Number and max cost of prunable paths in viterbi_state_entries.
ViterbiStateEntry_LIST viterbi_state_entries
Storage for the Viterbi state.
Bundle together all the things pertaining to the best choice/state.
std::vector< LanguageModelState * > beam
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
ViterbiStateEntry * best_vse
Best ViterbiStateEntry and BLOB_CHOICE.
bool updated
Flag to indicate whether anything was changed.
BestChoiceBundle(int matrix_dimension)