tesseract v5.3.3.20231005
lm_state.cpp
Go to the documentation of this file.
1
2// File: lm_state.cpp
3// Description: Structures and functionality for capturing the state of
4// segmentation search guided by the language model.
5// Author: Rika Antonova
6//
7// (C) Copyright 2012, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "lm_state.h"
21
22namespace tesseract {
23
24void ViterbiStateEntry::Print(const char *msg) const {
25 tprintf("%s ViterbiStateEntry", msg);
26 if (updated) {
27 tprintf("(NEW)");
28 }
29 if (this->debug_str != nullptr) {
30 tprintf(" str=%s", this->debug_str->c_str());
31 }
32 tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length,
33 this->cost);
34 if (this->top_choice_flags) {
35 tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
36 }
37 if (!this->Consistent()) {
38 tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
40 this->consistency_info.NumInconsistentCase(),
41 this->consistency_info.NumInconsistentChartype(),
42 this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font);
43 }
44 if (this->dawg_info) {
45 tprintf(" permuter=%d", this->dawg_info->permuter);
46 }
47 if (this->ngram_info) {
48 tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
49 this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(),
50 this->ngram_info->pruned);
51 }
52 if (this->associate_stats.shape_cost > 0.0f) {
53 tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
54 }
55 tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
56
57 tprintf("\n");
58}
59
66}
67
68void LanguageModelState::Print(const char *msg) {
69 tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg,
72 ViterbiStateEntry_IT vit(&viterbi_state_entries);
73 for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) {
74 vit.data()->Print("");
75 }
76}
77
78} // namespace tesseract
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
XHeightConsistencyEnum xht_decision
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
Definition: lm_state.h:87
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:170
AssociateStats associate_stats
character widths/gaps/seams
Definition: lm_state.h:192
int length
number of characters on the path
Definition: lm_state.h:189
void Print(const char *msg) const
Definition: lm_state.cpp:24
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:174
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:196
float ratings_sum
sum of ratings of character on the path
Definition: lm_state.h:186
bool updated
set to true if the entry has just been created/updated
Definition: lm_state.h:198
LMConsistencyInfo consistency_info
path consistency info
Definition: lm_state.h:191
float viterbi_state_entries_prunable_max_cost
Definition: lm_state.h:220
void Print(const char *msg)
Definition: lm_state.cpp:68
int viterbi_state_entries_length
Total number of entries in viterbi_state_entries.
Definition: lm_state.h:222
int viterbi_state_entries_prunable_length
Number and max cost of prunable paths in viterbi_state_entries.
Definition: lm_state.h:219
ViterbiStateEntry_LIST viterbi_state_entries
Storage for the Viterbi state.
Definition: lm_state.h:217
void Clear()
Clears the viterbi search state back to its initial conditions.
Definition: lm_state.cpp:61