25#include <allheaders.h>
30 int scaled_yres,
int rect_left,
int rect_top,
int rect_width,
34 , line_separator_(
"\n")
35 , paragraph_separator_(
"\n") {}
69 text.resize(text.length() - 1);
79 int length = text.length() + 1;
80 char *result =
new char[length];
81 strncpy(result, text.c_str(), length);
101 float mean_certainty = 0.0f;
102 int certainty_count = 0;
111 mean_certainty += best_choice->
certainty();
120 mean_certainty += best_choice->
certainty();
130 mean_certainty += best_choice->
certainty();
136 mean_certainty += best_choice->
certainty();
143 if (certainty_count > 0) {
144 mean_certainty /= certainty_count;
145 return ClipToRange(100 + 5 * mean_certainty, 0.0f, 100.0f);
159 bool *is_underlined,
bool *is_monospace,
160 bool *is_serif,
bool *is_smallcaps,
161 int *pointsize,
int *font_id)
const {
162 const char *result =
nullptr;
174#ifndef DISABLED_LEGACY_ENGINE
179 *is_bold = font_info->is_bold();
180 *is_italic = font_info->is_italic();
181 *is_underlined =
false;
182 *is_monospace = font_info->is_fixed_pitch();
183 *is_serif = font_info->is_serif();
184 result = font_info->name;
194 *is_underlined =
false;
195 *is_monospace =
false;
197 *is_smallcaps =
false;
219 if (has_rtl && !has_ltr) {
222 if (has_ltr && !has_rtl) {
225 if (!has_ltr && !has_rtl) {
263#ifndef DISABLED_LEGACY_ENGINE
314 int length = truth_text.length() + 1;
315 char *result =
new char[length];
316 strncpy(result, truth_text.c_str(), length);
326 std::string ocr_text;
330 for (
unsigned i = 0;
i < best_choice->
length(); ++
i) {
333 auto length = ocr_text.length() + 1;
334 char *result =
new char[length];
335 strncpy(result, ocr_text.c_str(), length);
384 word_res_ = result_it.
it_->
word();
389 bool lstm_choice_mode = word_res_->
tesseract->lstm_choice_mode;
390 rating_coefficient_ = word_res_->
tesseract->lstm_rating_coefficient;
392 BLOB_CHOICE_LIST *choices =
nullptr;
397 blanks_before_word_ = 0;
399 unsigned index = *tstep_index_;
400 index += blanks_before_word_;
401 if (index < word_res_->CTC_symbol_choices.size()) {
406 if ((oemLegacy || !lstm_choice_mode) && word_res_->
ratings !=
nullptr) {
409 if (choices !=
nullptr && !choices->empty()) {
410 choice_it_ =
new BLOB_CHOICE_IT(choices);
411 choice_it_->mark_cycle_pt();
413 choice_it_ =
nullptr;
415 if (LSTM_choices_ !=
nullptr && !LSTM_choices_->empty()) {
416 LSTM_choice_it_ = LSTM_choices_->begin();
426 if (oemLSTM_ && LSTM_choices_ !=
nullptr && !LSTM_choices_->empty()) {
427 if (LSTM_choice_it_ == LSTM_choices_->end() ||
428 next(LSTM_choice_it_) == LSTM_choices_->end()) {
435 if (choice_it_ ==
nullptr) {
438 choice_it_->forward();
439 return !choice_it_->cycled_list();
446 if (oemLSTM_ && LSTM_choices_ !=
nullptr && !LSTM_choices_->empty()) {
447 std::pair<const char *, float> choice = *LSTM_choice_it_;
450 if (choice_it_ ==
nullptr) {
453 UNICHAR_ID id = choice_it_->data()->unichar_id();
466 if (oemLSTM_ && LSTM_choices_ !=
nullptr && !LSTM_choices_->empty()) {
467 std::pair<const char *, float> choice = *LSTM_choice_it_;
468 confidence = 100 - rating_coefficient_ * choice.second;
470 if (choice_it_ ==
nullptr) {
473 confidence = 100 + 5 * choice_it_->data()->certainty();
480 unsigned offset = *tstep_index_ + blanks_before_word_;
487void ChoiceIterator::filterSpaces() {
488 if (LSTM_choices_->empty()) {
491 std::vector<std::pair<const char *, float>>::iterator it;
492 for (it = LSTM_choices_->begin(); it != LSTM_choices_->end();) {
493 if (!strcmp(it->first,
" ")) {
494 it = LSTM_choices_->erase(it);
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
constexpr int kPointsPerInch
const char * GetBlamerDebug() const
char * WordNormedUTF8Text() const
bool HasTruthString() const
char * GetUTF8Text(PageIteratorLevel level) const
const void * GetParamsTrainingBundle() const
StrongScriptDirection WordDirection() const
const char * paragraph_separator_
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
bool HasBlamerInfo() const
const char * GetBlamerMisadaptionDebug() const
bool SymbolIsDropcap() const
bool SymbolIsSubscript() const
~LTRResultIterator() override
const char * WordRecognitionLanguage() const
void SetLineSeparator(const char *new_line)
const char * WordLattice(int *lattice_size) const
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
bool SymbolIsSuperscript() const
void SetParagraphSeparator(const char *new_para)
float Confidence(PageIteratorLevel level) const
bool WordIsFromDictionary() const
int BlanksBeforeWord() const
bool WordIsNumeric() const
const char * line_separator_
bool EquivalentToTruth(const char *str) const
char * WordTruthUTF8Text() const
const char * GetUTF8Text() const
ChoiceIterator(const LTRResultIterator &result_it)
std::vector< std::vector< std::pair< const char *, float > > > * Timesteps() const
std::string TruthString() const
const std::string & misadaption_debug() const
const tesseract::ParamsTrainingBundle & params_training_bundle() const
bool HasDebugInfo() const
const std::string & debug() const
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
const char * lattice_data() const
tesseract::Tesseract * tesseract
WERD_CHOICE * best_choice
bool AnyRtlCharsInWord() const
BlamerBundle * blamer_bundle
const UNICHARSET * uch_set
const char * BestUTF8(unsigned blob_index, bool in_rtl_context) const
const FontInfo * fontinfo
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
std::vector< std::vector< std::pair< const char *, float > > > CTC_symbol_choices
bool AnyLtrCharsInWord() const
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmented_timesteps
BLOCK_RES * block() const
BLOCK_RES * prev_block() const
ROW_RES * prev_row() const
UNICHAR_ID unichar_id(unsigned index) const
std::string & unichar_string()
ScriptPos BlobPosition(unsigned index) const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
const char * id_to_unichar_ext(UNICHAR_ID id) const