tesseract v5.3.3.20231005
tesseract::ROW_RES Class Reference

#include <pageres.h>

Inheritance diagram for tesseract::ROW_RES:
tesseract::ELIST_LINK

Public Member Functions

 ROW_RES ()=default
 
 ROW_RES (bool merge_similar_words, ROW *the_row)
 
 ~ROW_RES ()=default
 
- Public Member Functions inherited from tesseract::ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Public Attributes

ROWrow
 
int32_t char_count
 
int32_t rej_count
 
int32_t whole_word_rej_count
 
WERD_RES_LIST word_res_list
 

Detailed Description

Definition at line 142 of file pageres.h.

Constructor & Destructor Documentation

◆ ROW_RES() [1/2]

tesseract::ROW_RES::ROW_RES ( )
default

◆ ROW_RES() [2/2]

tesseract::ROW_RES::ROW_RES ( bool  merge_similar_words,
ROW the_row 
)

Definition at line 119 of file pageres.cpp.

119 {
120 WERD_IT word_it(the_row->word_list());
121 WERD_RES_IT word_res_it(&word_res_list);
122 WERD_RES *combo = nullptr; // current combination of fuzzies
123 WERD *copy_word;
124
125 char_count = 0;
126 rej_count = 0;
128
129 row = the_row;
130 bool add_next_word = false;
131 TBOX union_box;
132 float line_height =
133 the_row->x_height() + the_row->ascenders() - the_row->descenders();
134 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
135 auto *word_res = new WERD_RES(word_it.data());
136 word_res->x_height = the_row->x_height();
137 if (add_next_word) {
138 ASSERT_HOST(combo != nullptr);
139 // We are adding this word to the combination.
140 word_res->part_of_combo = true;
141 combo->copy_on(word_res);
142 } else if (merge_similar_words) {
143 union_box = word_res->word->bounding_box();
144 add_next_word = !word_res->word->flag(W_REP_CHAR) &&
145 union_box.height() <= line_height * kMaxWordSizeRatio;
146 word_res->odd_size = !add_next_word;
147 }
148 WERD *next_word = word_it.data_relative(1);
149 if (merge_similar_words) {
150 if (add_next_word && !next_word->flag(W_REP_CHAR)) {
151 // Next word will be added on if all of the following are true:
152 // Not a rep char.
153 // Box height small enough.
154 // Union box height small enough.
155 // Horizontal gap small enough.
156 TBOX next_box = next_word->bounding_box();
157 int prev_right = union_box.right();
158 union_box += next_box;
159 if (next_box.height() > line_height * kMaxWordSizeRatio ||
160 union_box.height() > line_height * kMaxLineSizeRatio ||
161 next_box.left() > prev_right + line_height * kMaxWordGapRatio) {
162 add_next_word = false;
163 }
164 }
165 next_word->set_flag(W_FUZZY_NON, add_next_word);
166 } else {
167 add_next_word = next_word->flag(W_FUZZY_NON);
168 }
169 if (add_next_word) {
170 if (combo == nullptr) {
171 copy_word = new WERD;
172 *copy_word = *(word_it.data()); // deep copy
173 combo = new WERD_RES(copy_word);
174 combo->x_height = the_row->x_height();
175 combo->combination = true;
176 word_res_it.add_to_end(combo);
177 }
178 word_res->part_of_combo = true;
179 } else {
180 combo = nullptr;
181 }
182 word_res_it.add_to_end(word_res);
183 }
184}
#define ASSERT_HOST(x)
Definition: errcode.h:54
@ TBOX
@ W_REP_CHAR
repeated character
Definition: werd.h:40
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:42
const double kMaxWordSizeRatio
Definition: pageres.cpp:58
const double kMaxLineSizeRatio
Definition: pageres.cpp:60
const double kMaxWordGapRatio
Definition: pageres.cpp:62
WERD_RES_LIST word_res_list
Definition: pageres.h:148
int32_t whole_word_rej_count
Definition: pageres.h:147
int32_t rej_count
Definition: pageres.h:146
int32_t char_count
Definition: pageres.h:145

◆ ~ROW_RES()

tesseract::ROW_RES::~ROW_RES ( )
default

Member Data Documentation

◆ char_count

int32_t tesseract::ROW_RES::char_count

Definition at line 145 of file pageres.h.

◆ rej_count

int32_t tesseract::ROW_RES::rej_count

Definition at line 146 of file pageres.h.

◆ row

ROW* tesseract::ROW_RES::row

Definition at line 144 of file pageres.h.

◆ whole_word_rej_count

int32_t tesseract::ROW_RES::whole_word_rej_count

Definition at line 147 of file pageres.h.

◆ word_res_list

WERD_RES_LIST tesseract::ROW_RES::word_res_list

Definition at line 148 of file pageres.h.


The documentation for this class was generated from the following files: