tesseract v5.3.3.20231005
tesseract::ViterbiStateEntry Struct Reference

#include <lm_state.h>

Inheritance diagram for tesseract::ViterbiStateEntry:
tesseract::ELIST_LINK

Public Member Functions

 ViterbiStateEntry (ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
 
 ~ViterbiStateEntry ()
 
bool Consistent () const
 
bool HasAlnumChoice (const UNICHARSET &unicharset)
 
void Print (const char *msg) const
 
- Public Member Functions inherited from tesseract::ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static int Compare (const void *e1, const void *e2)
 

Public Attributes

BLOB_CHOICEcurr_b
 Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). More...
 
ViterbiStateEntryparent_vse
 
ViterbiStateEntrycompeting_vse
 
LanguageModelDawgInfodawg_info
 
LanguageModelNgramInfongram_info
 
std::string * debug_str
 
float cost
 
float ratings_sum
 sum of ratings of character on the path More...
 
float min_certainty
 minimum certainty on the path More...
 
int adapted
 number of BLOB_CHOICES from adapted templates More...
 
int length
 number of characters on the path More...
 
float outline_length
 length of the outline so far More...
 
LMConsistencyInfo consistency_info
 path consistency info More...
 
AssociateStats associate_stats
 character widths/gaps/seams More...
 
LanguageModelFlagsType top_choice_flags
 
bool updated
 set to true if the entry has just been created/updated More...
 

Detailed Description

Struct for storing the information about a path in the segmentation graph explored by Viterbi search.

Definition at line 92 of file lm_state.h.

Constructor & Destructor Documentation

◆ ViterbiStateEntry()

tesseract::ViterbiStateEntry::ViterbiStateEntry ( ViterbiStateEntry pe,
BLOB_CHOICE b,
float  c,
float  ol,
const LMConsistencyInfo ci,
const AssociateStats as,
LanguageModelFlagsType  tcf,
LanguageModelDawgInfo d,
LanguageModelNgramInfo n,
const char *  debug_uch 
)
inline

Definition at line 93 of file lm_state.h.

97 : curr_b(b)
98 , parent_vse(pe)
99 , competing_vse(nullptr)
100 , dawg_info(d)
101 , ngram_info(n)
102 , cost(c)
103 , ratings_sum(b->rating())
104 , min_certainty(b->certainty())
105 , adapted(b->IsAdapted())
106 , length(1)
107 , outline_length(ol)
108 , consistency_info(ci)
109 , associate_stats(as)
110 , top_choice_flags(tcf)
111 , updated(true) {
112 debug_str = (debug_uch == nullptr) ? nullptr : new std::string();
113 if (pe != nullptr) {
114 ratings_sum += pe->ratings_sum;
115 if (pe->min_certainty < min_certainty) {
116 min_certainty = pe->min_certainty;
117 }
118 adapted += pe->adapted;
119 length += pe->length;
120 outline_length += pe->outline_length;
121 if (debug_uch != nullptr) {
122 *debug_str += *(pe->debug_str);
123 }
124 }
125 if (debug_str != nullptr && debug_uch != nullptr) {
126 *debug_str += debug_uch;
127 }
128 }
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:170
float outline_length
length of the outline so far
Definition: lm_state.h:190
BLOB_CHOICE * curr_b
Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
Definition: lm_state.h:162
AssociateStats associate_stats
character widths/gaps/seams
Definition: lm_state.h:192
ViterbiStateEntry * competing_vse
Definition: lm_state.h:166
int length
number of characters on the path
Definition: lm_state.h:189
ViterbiStateEntry * parent_vse
Definition: lm_state.h:163
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:174
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:196
int adapted
number of BLOB_CHOICES from adapted templates
Definition: lm_state.h:188
float ratings_sum
sum of ratings of character on the path
Definition: lm_state.h:186
bool updated
set to true if the entry has just been created/updated
Definition: lm_state.h:198
LMConsistencyInfo consistency_info
path consistency info
Definition: lm_state.h:191
float min_certainty
minimum certainty on the path
Definition: lm_state.h:187

◆ ~ViterbiStateEntry()

tesseract::ViterbiStateEntry::~ViterbiStateEntry ( )
inline

Definition at line 129 of file lm_state.h.

129 {
130 delete dawg_info;
131 delete ngram_info;
132 delete debug_str;
133 }

Member Function Documentation

◆ Compare()

static int tesseract::ViterbiStateEntry::Compare ( const void *  e1,
const void *  e2 
)
inlinestatic

Comparator function for sorting ViterbiStateEntry_LISTs in non-increasing order of costs.

Definition at line 136 of file lm_state.h.

136 {
137 const ViterbiStateEntry *ve1 = *static_cast<const ViterbiStateEntry *const *>(e1);
138 const ViterbiStateEntry *ve2 = *static_cast<const ViterbiStateEntry *const *>(e2);
139 return (ve1->cost < ve2->cost) ? -1 : 1;
140 }
ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
Definition: lm_state.h:93

◆ Consistent()

bool tesseract::ViterbiStateEntry::Consistent ( ) const
inline

Definition at line 141 of file lm_state.h.

141 {
142 if (dawg_info != nullptr && consistency_info.NumInconsistentCase() == 0) {
143 return true;
144 }
146 }

◆ HasAlnumChoice()

bool tesseract::ViterbiStateEntry::HasAlnumChoice ( const UNICHARSET unicharset)
inline

Returns true if this VSE has an alphanumeric character as its classifier result.

Definition at line 149 of file lm_state.h.

149 {
150 if (curr_b == nullptr) {
151 return false;
152 }
153 UNICHAR_ID unichar_id = curr_b->unichar_id();
154 if (unicharset.get_isalpha(unichar_id) || unicharset.get_isdigit(unichar_id)) {
155 return true;
156 }
157 return false;
158 }
int UNICHAR_ID
Definition: unichar.h:34
UNICHAR_ID unichar_id() const
Definition: ratngs.h:81

◆ Print()

void tesseract::ViterbiStateEntry::Print ( const char *  msg) const

Definition at line 24 of file lm_state.cpp.

24 {
25 tprintf("%s ViterbiStateEntry", msg);
26 if (updated) {
27 tprintf("(NEW)");
28 }
29 if (this->debug_str != nullptr) {
30 tprintf(" str=%s", this->debug_str->c_str());
31 }
32 tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length,
33 this->cost);
34 if (this->top_choice_flags) {
35 tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
36 }
37 if (!this->Consistent()) {
38 tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
40 this->consistency_info.NumInconsistentCase(),
41 this->consistency_info.NumInconsistentChartype(),
42 this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font);
43 }
44 if (this->dawg_info) {
45 tprintf(" permuter=%d", this->dawg_info->permuter);
46 }
47 if (this->ngram_info) {
48 tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
49 this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(),
50 this->ngram_info->pruned);
51 }
52 if (this->associate_stats.shape_cost > 0.0f) {
53 tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
54 }
55 tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
56
57 tprintf("\n");
58}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
XHeightConsistencyEnum xht_decision
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
Definition: lm_state.h:87

Member Data Documentation

◆ adapted

int tesseract::ViterbiStateEntry::adapted

number of BLOB_CHOICES from adapted templates

Definition at line 188 of file lm_state.h.

◆ associate_stats

AssociateStats tesseract::ViterbiStateEntry::associate_stats

character widths/gaps/seams

Definition at line 192 of file lm_state.h.

◆ competing_vse

ViterbiStateEntry* tesseract::ViterbiStateEntry::competing_vse

Pointer to a case-competing ViterbiStateEntry in the same list that represents a path ending in the same letter of the opposite case.

Definition at line 166 of file lm_state.h.

◆ consistency_info

LMConsistencyInfo tesseract::ViterbiStateEntry::consistency_info

path consistency info

Definition at line 191 of file lm_state.h.

◆ cost

float tesseract::ViterbiStateEntry::cost

The cost is an adjusted ratings sum, that is adjusted by all the language model components that use Viterbi search.

Definition at line 182 of file lm_state.h.

◆ curr_b

BLOB_CHOICE* tesseract::ViterbiStateEntry::curr_b

Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).

Definition at line 162 of file lm_state.h.

◆ dawg_info

LanguageModelDawgInfo* tesseract::ViterbiStateEntry::dawg_info

Extra information maintained by Dawg language model component (owned by ViterbiStateEntry).

Definition at line 170 of file lm_state.h.

◆ debug_str

std::string* tesseract::ViterbiStateEntry::debug_str

UTF8 string representing the path corresponding to this vse. Populated only in when language_model_debug_level > 0.

Definition at line 178 of file lm_state.h.

◆ length

int tesseract::ViterbiStateEntry::length

number of characters on the path

Definition at line 189 of file lm_state.h.

◆ min_certainty

float tesseract::ViterbiStateEntry::min_certainty

minimum certainty on the path

Definition at line 187 of file lm_state.h.

◆ ngram_info

LanguageModelNgramInfo* tesseract::ViterbiStateEntry::ngram_info

Extra information maintained by Ngram language model component (owned by ViterbiStateEntry).

Definition at line 174 of file lm_state.h.

◆ outline_length

float tesseract::ViterbiStateEntry::outline_length

length of the outline so far

Definition at line 190 of file lm_state.h.

◆ parent_vse

ViterbiStateEntry* tesseract::ViterbiStateEntry::parent_vse

Definition at line 163 of file lm_state.h.

◆ ratings_sum

float tesseract::ViterbiStateEntry::ratings_sum

sum of ratings of character on the path

Various information about the characters on the path represented by this ViterbiStateEntry.

Definition at line 186 of file lm_state.h.

◆ top_choice_flags

LanguageModelFlagsType tesseract::ViterbiStateEntry::top_choice_flags

Flags for marking the entry as a top choice path with the smallest rating or lower/upper case letters).

Definition at line 196 of file lm_state.h.

◆ updated

bool tesseract::ViterbiStateEntry::updated

set to true if the entry has just been created/updated

Definition at line 198 of file lm_state.h.


The documentation for this struct was generated from the following files: