23# include "config_auto.h"
28#ifndef DISABLED_LEGACY_ENGINE
96#ifndef DISABLED_LEGACY_ENGINE
97 const std::vector<ScoredFont> &
fonts()
const {
102 int score1 = 0, score2 = 0;
105 for (
auto &f : fonts_) {
106 if (f.score > score1) {
108 fontinfo_id2_ = fontinfo_id_;
110 fontinfo_id_ = f.fontinfo_id;
111 }
else if (f.score > score2) {
113 fontinfo_id2_ = f.fontinfo_id;
145 unichar_id_ = newunichar_id;
154 script_id_ = newscript_id;
157 matrix_cell_.
col = col;
158 matrix_cell_.
row = row;
173 tprintf(
"r%.2f c%.2f x[%g,%g]: %d %s",
174 static_cast<double>(rating_),
175 static_cast<double>(certainty_),
176 static_cast<double>(min_xheight_),
177 static_cast<double>(max_xheight_),
178 unichar_id_, (unicharset ==
nullptr) ?
"" : unicharset->
debug_str(unichar_id_).c_str());
182 tprintf(
" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
183 fontinfo_id2_,
static_cast<double>(yshift_), classifier_);
189 return (bc1->rating_ < bc2->rating_) ? -1 : 1;
197#ifndef DISABLED_LEGACY_ENGINE
199 std::vector<ScoredFont> fonts_;
201 int16_t fontinfo_id_;
202 int16_t fontinfo_id2_;
261 static const char *permuter_name(uint8_t permuter);
267 this->init(reserved);
269 WERD_CHOICE(
const char *src_string,
const char *src_lengths,
float src_rating,
270 float src_certainty, uint8_t src_permuter,
const UNICHARSET &unicharset)
271 : unicharset_(&unicharset) {
272 this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
276 this->init(word.
length());
277 this->operator=(word);
291 return adjust_factor_;
294 adjust_factor_ = factor;
300 assert(index < length_);
301 return unichar_ids_[index];
303 inline unsigned state(
unsigned index)
const {
304 return state_[index];
307 if (index >= length_) {
310 return script_pos_[index];
319 return certainties_[index];
322 return min_x_height_;
325 return max_x_height_;
328 min_x_height_ = min_height;
329 max_x_height_ = max_height;
334 const char *permuter_name()
const;
338 BLOB_CHOICE_LIST *blob_choices(
unsigned index,
MATRIX *ratings)
const;
345 assert(index < length_);
346 unichar_ids_[index] = unichar_id;
349 return dangerous_ambig_found_;
352 dangerous_ambig_found_ =
value;
358 certainty_ = new_val;
378 unichar_ids_.resize(reserved_);
379 script_pos_.resize(reserved_);
380 state_.resize(reserved_);
381 certainties_.resize(reserved_);
386 inline void init(
unsigned reserved) {
387 reserved_ = reserved;
389 unichar_ids_.resize(reserved);
390 script_pos_.resize(reserved);
391 state_.resize(reserved);
392 certainties_.resize(reserved);
394 unichar_ids_.clear();
397 certainties_.clear();
400 adjust_factor_ = 1.0f;
402 certainty_ = FLT_MAX;
403 min_x_height_ = 0.0f;
404 max_x_height_ = FLT_MAX;
406 unichars_in_script_order_ =
false;
407 dangerous_ambig_found_ =
false;
415 void init(
const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
416 uint8_t src_permuter);
421 rating_ = kBadRating;
422 certainty_ = -FLT_MAX;
430 assert(reserved_ > length_);
432 this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
435 void append_unichar_id(
UNICHAR_ID unichar_id,
int blob_count,
float rating,
float certainty);
439 assert(index < length_);
440 unichar_ids_[index] = unichar_id;
441 state_[index] = blob_count;
442 certainties_[index] = certainty;
445 if (certainty < certainty_) {
446 certainty_ = certainty;
451 void set_blob_choice(
unsigned index,
int blob_count,
const BLOB_CHOICE *blob_choice);
453 bool contains_unichar_id(
UNICHAR_ID unichar_id)
const;
454 void remove_unichar_ids(
unsigned index,
int num);
459 this->remove_unichar_ids(index, 1);
461 bool has_rtl_unichar_id()
const;
462 void reverse_and_mirror_unichar_ids();
467 void punct_stripped(
unsigned *start_core,
unsigned *end_core)
const;
472 void GetNonSuperscriptSpan(
int *start,
int *end)
const;
476 WERD_CHOICE shallow_copy(
unsigned start,
unsigned end)
const;
478 void string_and_lengths(std::string *word_str, std::string *word_lengths_str)
const;
480 std::string word_str;
481 for (
unsigned i = 0;
i < length_; ++
i) {
482 word_str += unicharset_->debug_str(unichar_ids_[
i]);
489 for (
unsigned i = 0;
i < length_; ++
i) {
490 if (!unicharset_->IsSpaceDelimited(unichar_ids_[
i])) {
498 for (
unsigned i = 0;
i < length_; ++
i) {
510 return unichars_in_script_order_ = in_script_order;
514 return unichars_in_script_order_;
520 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
521 return unichar_string_;
527 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
528 return unichar_string_;
534 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
535 return unichar_lengths_;
543 void SetScriptPositions(
bool small_caps,
TWERD *word,
int debug = 0);
545 void SetAllScriptPositions(
ScriptPos position);
553 int GetTopScriptID()
const;
556 void UpdateStateForSplit(
int blob_position);
559 unsigned TotalOfStates()
const;
564 void print(
const char *msg)
const;
566 void print_state(
const char *msg)
const;
570 void DisplaySegmentation(
TWERD *word);
594 std::vector<UNICHAR_ID> unichar_ids_;
595 std::vector<ScriptPos> script_pos_;
596 std::vector<int> state_;
597 std::vector<float> certainties_;
601 float adjust_factor_;
615 bool unichars_in_script_order_;
617 bool dangerous_ambig_found_;
621 mutable std::string unichar_string_;
622 mutable std::string unichar_lengths_;
635 BLOB_CHOICE_LIST *ratings,
#define ELISTIZEH(CLASSNAME)
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
void tprintf(const char *format,...)
const char * ScriptPosToString(enum ScriptPos script_pos)
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
int16_t fontinfo_id2() const
const std::vector< ScoredFont > & fonts() const
void set_certainty(float newrat)
void set_unichar_id(UNICHAR_ID newunichar_id)
UNICHAR_ID unichar_id() const
void set_classifier(BlobChoiceClassifier classifier)
void print(const UNICHARSET *unicharset) const
void set_matrix_cell(int col, int row)
void set_fonts(const std::vector< ScoredFont > &fonts)
static int SortByRating(const void *p1, const void *p2)
void set_script(int newscript_id)
float min_xheight() const
float max_xheight() const
BlobChoiceClassifier classifier() const
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
bool IsClassified() const
int16_t fontinfo_id() const
const MATRIX_COORD & matrix_cell()
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
void set_rating(float newrat)
float max_x_height() const
std::string debug_string() const
bool set_unichars_in_script_order(bool in_script_order)
float certainty(unsigned index) const
void set_dangerous_ambig_found_(bool value)
void remove_unichar_id(unsigned index)
void set_x_heights(float min_height, float max_height)
void set_unichar_id(UNICHAR_ID unichar_id, unsigned index)
WERD_CHOICE(const UNICHARSET *unicharset)
bool unichars_in_script_order() const
UNICHAR_ID unichar_id(unsigned index) const
static const float kBadRating
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
void make_bad()
Set the fields in this choice to be default (bad) values.
bool dangerous_ambig_found() const
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
void set_certainty(float new_val)
void set_length(unsigned len)
WERD_CHOICE(const WERD_CHOICE &word)
bool ContainsAnyNonSpaceDelimited() const
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
void init(unsigned reserved)
unsigned state(unsigned index) const
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
void set_permuter(uint8_t perm)
const UNICHARSET * unicharset() const
float min_x_height() const
const std::vector< UNICHAR_ID > & unichar_ids() const
const std::string & unichar_string() const
void remove_last_unichar_id()
void set_adjust_factor(float factor)
const std::string & unichar_lengths() const
std::string & unichar_string()
float adjust_factor() const
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, unsigned index)
void set_rating(float new_val)
ScriptPos BlobPosition(unsigned index) const
std::string debug_str(UNICHAR_ID id) const