tesseract v5.3.3.20231005
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
 ~LTRResultIterator () override
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
void BeginWord (int offset)
 

Detailed Description

Definition at line 45 of file ltrresultiterator.h.

Constructor & Destructor Documentation

◆ LTRResultIterator()

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 29 of file ltrresultiterator.cpp.

32 : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width,
33 rect_height)
34 , line_separator_("\n")
35 , paragraph_separator_("\n") {}
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

◆ ~LTRResultIterator()

tesseract::LTRResultIterator::~LTRResultIterator ( )
overridedefault

Member Function Documentation

◆ BlanksBeforeWord()

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 241 of file ltrresultiterator.cpp.

241 {
242 if (it_->word() == nullptr) {
243 return 1;
244 }
245 return it_->word()->word->space();
246}
WERD_RES * word() const
Definition: pageres.h:763
uint8_t space() const
Definition: werd.h:100

◆ Confidence()

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 97 of file ltrresultiterator.cpp.

97 {
98 if (it_->word() == nullptr) {
99 return 0.0f; // Already at the end!
100 }
101 float mean_certainty = 0.0f;
102 int certainty_count = 0;
103 PAGE_RES_IT res_it(*it_);
104 WERD_CHOICE *best_choice = res_it.word()->best_choice;
105 ASSERT_HOST(best_choice != nullptr);
106 switch (level) {
107 case RIL_BLOCK:
108 do {
109 best_choice = res_it.word()->best_choice;
110 ASSERT_HOST(best_choice != nullptr);
111 mean_certainty += best_choice->certainty();
112 ++certainty_count;
113 res_it.forward();
114 } while (res_it.block() == res_it.prev_block());
115 break;
116 case RIL_PARA:
117 do {
118 best_choice = res_it.word()->best_choice;
119 ASSERT_HOST(best_choice != nullptr);
120 mean_certainty += best_choice->certainty();
121 ++certainty_count;
122 res_it.forward();
123 } while (res_it.block() == res_it.prev_block() &&
124 res_it.row()->row->para() == res_it.prev_row()->row->para());
125 break;
126 case RIL_TEXTLINE:
127 do {
128 best_choice = res_it.word()->best_choice;
129 ASSERT_HOST(best_choice != nullptr);
130 mean_certainty += best_choice->certainty();
131 ++certainty_count;
132 res_it.forward();
133 } while (res_it.row() == res_it.prev_row());
134 break;
135 case RIL_WORD:
136 mean_certainty += best_choice->certainty();
137 ++certainty_count;
138 break;
139 case RIL_SYMBOL:
140 mean_certainty += best_choice->certainty(blob_index_);
141 ++certainty_count;
142 }
143 if (certainty_count > 0) {
144 mean_certainty /= certainty_count;
145 return ClipToRange(100 + 5 * mean_certainty, 0.0f, 100.0f);
146 }
147 return 0.0f;
148}
#define ASSERT_HOST(x)
Definition: errcode.h:54
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:105

◆ EquivalentToTruth()

bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 298 of file ltrresultiterator.cpp.

298 {
299 if (!HasTruthString()) {
300 return false;
301 }
302 ASSERT_HOST(it_->word()->uch_set != nullptr);
303 WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
304 return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
305}
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116
BlamerBundle * blamer_bundle
Definition: pageres.h:250
const UNICHARSET * uch_set
Definition: pageres.h:201

◆ GetBlamerDebug()

const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 275 of file ltrresultiterator.cpp.

275 {
276 return it_->word()->blamer_bundle->debug().c_str();
277}
const std::string & debug() const
Definition: blamer.h:140

◆ GetBlamerMisadaptionDebug()

const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 281 of file ltrresultiterator.cpp.

281 {
282 return it_->word()->blamer_bundle->misadaption_debug().c_str();
283}
const std::string & misadaption_debug() const
Definition: blamer.h:143

◆ GetParamsTrainingBundle()

const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 266 of file ltrresultiterator.cpp.

266 {
267 return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr)
269 : nullptr;
270}
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:176

◆ GetUTF8Text()

char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 44 of file ltrresultiterator.cpp.

44 {
45 if (it_->word() == nullptr) {
46 return nullptr; // Already at the end!
47 }
48 std::string text;
49 PAGE_RES_IT res_it(*it_);
50 WERD_CHOICE *best_choice = res_it.word()->best_choice;
51 ASSERT_HOST(best_choice != nullptr);
52 if (level == RIL_SYMBOL) {
53 text = res_it.word()->BestUTF8(blob_index_, false);
54 } else if (level == RIL_WORD) {
55 text = best_choice->unichar_string();
56 } else {
57 bool eol = false; // end of line?
58 bool eop = false; // end of paragraph?
59 do { // for each paragraph in a block
60 do { // for each text line in a paragraph
61 do { // for each word in a text line
62 best_choice = res_it.word()->best_choice;
63 ASSERT_HOST(best_choice != nullptr);
64 text += best_choice->unichar_string();
65 text += " ";
66 res_it.forward();
67 eol = res_it.row() != res_it.prev_row();
68 } while (!eol);
69 text.resize(text.length() - 1);
70 text += line_separator_;
71 eop = res_it.block() != res_it.prev_block() ||
72 res_it.row()->row->para() != res_it.prev_row()->row->para();
73 } while (level != RIL_TEXTLINE && !eop);
74 if (eop) {
76 }
77 } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
78 }
79 int length = text.length() + 1;
80 char *result = new char[length];
81 strncpy(result, text.c_str(), length);
82 return result;
83}

◆ HasBlamerInfo()

bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 258 of file ltrresultiterator.cpp.

258 {
259 return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr &&
261}
bool HasDebugInfo() const
Definition: blamer.h:137

◆ HasTruthString()

bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 286 of file ltrresultiterator.cpp.

286 {
287 if (it_->word() == nullptr) {
288 return false; // Already at the end!
289 }
290 if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) {
291 return false; // no truth information for this word
292 }
293 return true;
294}
bool NoTruth() const
Definition: blamer.h:134

◆ SetLineSeparator()

void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 86 of file ltrresultiterator.cpp.

86 {
87 line_separator_ = new_line;
88}

◆ SetParagraphSeparator()

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 91 of file ltrresultiterator.cpp.

91 {
92 paragraph_separator_ = new_para;
93}

◆ SymbolIsDropcap()

bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 375 of file ltrresultiterator.cpp.

375 {
376 if (cblob_it_ == nullptr && it_->word() != nullptr) {
378 }
379 return false;
380}
@ SP_DROPCAP
Definition: ratngs.h:254
WERD_CHOICE * best_choice
Definition: pageres.h:239
ScriptPos BlobPosition(unsigned index) const
Definition: ratngs.h:306

◆ SymbolIsSubscript()

bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 365 of file ltrresultiterator.cpp.

365 {
366 if (cblob_it_ == nullptr && it_->word() != nullptr) {
368 }
369 return false;
370}
@ SP_SUBSCRIPT
Definition: ratngs.h:254

◆ SymbolIsSuperscript()

bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 355 of file ltrresultiterator.cpp.

355 {
356 if (cblob_it_ == nullptr && it_->word() != nullptr) {
358 }
359 return false;
360}
@ SP_SUPERSCRIPT
Definition: ratngs.h:254

◆ WordDirection()

StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 213 of file ltrresultiterator.cpp.

213 {
214 if (it_->word() == nullptr) {
215 return DIR_NEUTRAL;
216 }
217 bool has_rtl = it_->word()->AnyRtlCharsInWord();
218 bool has_ltr = it_->word()->AnyLtrCharsInWord();
219 if (has_rtl && !has_ltr) {
220 return DIR_RIGHT_TO_LEFT;
221 }
222 if (has_ltr && !has_rtl) {
223 return DIR_LEFT_TO_RIGHT;
224 }
225 if (!has_ltr && !has_rtl) {
226 return DIR_NEUTRAL;
227 }
228 return DIR_MIX;
229}
@ DIR_MIX
Definition: unichar.h:45
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:43
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:44
@ DIR_NEUTRAL
Definition: unichar.h:42
bool AnyRtlCharsInWord() const
Definition: pageres.h:394
bool AnyLtrCharsInWord() const
Definition: pageres.h:413

◆ WordFontAttributes()

const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 158 of file ltrresultiterator.cpp.

161 {
162 const char *result = nullptr;
163
164 if (it_->word() == nullptr) {
165 // Already at the end!
166 *pointsize = 0;
167 } else {
168 float row_height =
169 it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders();
170 // Convert from pixels to printers points.
171 *pointsize =
172 scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0;
173
174#ifndef DISABLED_LEGACY_ENGINE
175 const FontInfo *font_info = it_->word()->fontinfo;
176 if (font_info) {
177 // Font information available.
178 *font_id = font_info->universal_id;
179 *is_bold = font_info->is_bold();
180 *is_italic = font_info->is_italic();
181 *is_underlined = false; // TODO(rays) fix this!
182 *is_monospace = font_info->is_fixed_pitch();
183 *is_serif = font_info->is_serif();
184 result = font_info->name;
185 }
186#endif // ndef DISABLED_LEGACY_ENGINE
187
188 *is_smallcaps = it_->word()->small_caps;
189 }
190
191 if (!result) {
192 *is_bold = false;
193 *is_italic = false;
194 *is_underlined = false;
195 *is_monospace = false;
196 *is_serif = false;
197 *is_smallcaps = false;
198 *font_id = -1;
199 }
200
201 return result;
202}
constexpr int kPointsPerInch
Definition: publictypes.h:31
int32_t universal_id
Definition: fontinfo.h:140
float x_height() const
Definition: ocrrow.h:66
float ascenders() const
Definition: ocrrow.h:84
float descenders() const
Definition: ocrrow.h:87
const FontInfo * fontinfo
Definition: pageres.h:307
ROW_RES * row() const
Definition: pageres.h:766

◆ WordIsFromDictionary()

bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 232 of file ltrresultiterator.cpp.

232 {
233 if (it_->word() == nullptr) {
234 return false; // Already at the end!
235 }
236 int permuter = it_->word()->best_choice->permuter();
237 return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
238}
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:244
@ USER_DAWG_PERM
Definition: ratngs.h:246
@ FREQ_DAWG_PERM
Definition: ratngs.h:247
uint8_t permuter() const
Definition: ratngs.h:331

◆ WordIsNumeric()

bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 249 of file ltrresultiterator.cpp.

249 {
250 if (it_->word() == nullptr) {
251 return false; // Already at the end!
252 }
253 int permuter = it_->word()->best_choice->permuter();
254 return permuter == NUMBER_PERM;
255}
@ NUMBER_PERM
Definition: ratngs.h:242

◆ WordLattice()

const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 341 of file ltrresultiterator.cpp.

341 {
342 if (it_->word() == nullptr) {
343 return nullptr; // Already at the end!
344 }
345 if (it_->word()->blamer_bundle == nullptr) {
346 return nullptr;
347 }
348 *lattice_size = it_->word()->blamer_bundle->lattice_size();
349 return it_->word()->blamer_bundle->lattice_data();
350}
int lattice_size() const
Definition: blamer.h:166
const char * lattice_data() const
Definition: blamer.h:163

◆ WordNormedUTF8Text()

char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 322 of file ltrresultiterator.cpp.

322 {
323 if (it_->word() == nullptr) {
324 return nullptr; // Already at the end!
325 }
326 std::string ocr_text;
327 WERD_CHOICE *best_choice = it_->word()->best_choice;
328 const UNICHARSET *unicharset = it_->word()->uch_set;
329 ASSERT_HOST(best_choice != nullptr);
330 for (unsigned i = 0; i < best_choice->length(); ++i) {
331 ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
332 }
333 auto length = ocr_text.length() + 1;
334 char *result = new char[length];
335 strncpy(result, ocr_text.c_str(), length);
336 return result;
337}

◆ WordRecognitionLanguage()

const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 205 of file ltrresultiterator.cpp.

205 {
206 if (it_->word() == nullptr || it_->word()->tesseract == nullptr) {
207 return nullptr;
208 }
209 return it_->word()->tesseract->lang.c_str();
210}
tesseract::Tesseract * tesseract
Definition: pageres.h:278
std::string lang
Definition: ccutil.h:59

◆ WordTruthUTF8Text()

char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 309 of file ltrresultiterator.cpp.

309 {
310 if (!HasTruthString()) {
311 return nullptr;
312 }
313 std::string truth_text = it_->word()->blamer_bundle->TruthString();
314 int length = truth_text.length() + 1;
315 char *result = new char[length];
316 strncpy(result, truth_text.c_str(), length);
317 return result;
318}
std::string TruthString() const
Definition: blamer.h:124

Friends And Related Function Documentation

◆ ChoiceIterator

friend class ChoiceIterator
friend

Definition at line 46 of file ltrresultiterator.h.

Member Data Documentation

◆ line_separator_

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 175 of file ltrresultiterator.h.

◆ paragraph_separator_

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 176 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: