tesseract v5.3.3.20231005
makerow.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: makerow.h (Formerly makerows.h)
3 * Description: Code to arrange blobs into rows of text.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef MAKEROW_H
20#define MAKEROW_H
21
22#include "blobbox.h"
23#include "blobs.h"
24#include "ocrblock.h"
25#include "params.h"
26#include "statistc.h"
27
28namespace tesseract {
29
31 ASSIGN, // assign it to row
32 REJECT, // reject it - dual overlap
34};
35
41};
42
85
86inline void get_min_max_xheight(int block_linesize, int *min_height, int *max_height) {
87 *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
88 if (*min_height < textord_min_xheight) {
89 *min_height = textord_min_xheight;
90 }
91 *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
92}
93
95 if (row->xheight <= 0) {
96 return ROW_INVALID;
97 }
98 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND
100}
101
102inline bool within_error_margin(float test, float num, float margin) {
103 return (test >= num * (1 - margin) && test <= num * (1 + margin));
104}
105
106void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights,
107 STATS *floating_heights);
108
109float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks);
110float make_rows(ICOORD page_tr, // top right
111 TO_BLOCK_LIST *port_blocks);
112void make_initial_textrows(ICOORD page_tr,
113 TO_BLOCK *block, // block to do
114 FCOORD rotation, // for drawing
115 bool testing_on); // correct orientation
116void fit_lms_line(TO_ROW *row);
117void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
118 float &page_m, // average gradient
119 float &page_err); // average error
120void vigorous_noise_removal(TO_BLOCK *block);
121void cleanup_rows_making(ICOORD page_tr, // top right
122 TO_BLOCK *block, // block to do
123 float gradient, // gradient to fit
124 FCOORD rotation, // for drawing
125 int32_t block_edge, // edge of block
126 bool testing_on); // correct orientation
127void delete_non_dropout_rows( // find lines
128 TO_BLOCK *block, // block to do
129 float gradient, // global skew
130 FCOORD rotation, // deskew vector
131 int32_t block_edge, // left edge
132 bool testing_on // correct orientation
133);
134bool find_best_dropout_row( // find neighbours
135 TO_ROW *row, // row to test
136 int32_t distance, // dropout dist
137 float dist_limit, // threshold distance
138 int32_t line_index, // index of row
139 TO_ROW_IT *row_it, // current position
140 bool testing_on // correct orientation
141);
142TBOX deskew_block_coords( // block box
143 TO_BLOCK *block, // block to do
144 float gradient // global skew
145);
146void compute_line_occupation( // project blobs
147 TO_BLOCK *block, // block to do
148 float gradient, // global skew
149 int32_t min_y, // min coord in block
150 int32_t max_y, // in block
151 int32_t *occupation, // output projection
152 int32_t *deltas // derivative
153);
154void compute_occupation_threshold( // project blobs
155 int32_t low_window, // below result point
156 int32_t high_window, // above result point
157 int32_t line_count, // array sizes
158 int32_t *occupation, // input projection
159 int32_t *thresholds // output thresholds
160);
161void compute_dropout_distances( // project blobs
162 int32_t *occupation, // input projection
163 int32_t *thresholds, // output thresholds
164 int32_t line_count // array sizes
165);
166void expand_rows( // find lines
167 ICOORD page_tr, // top right
168 TO_BLOCK *block, // block to do
169 float gradient, // gradient to fit
170 FCOORD rotation, // for drawing
171 int32_t block_edge, // edge of block
172 bool testing_on // correct orientation
173);
174void adjust_row_limits( // tidy limits
175 TO_BLOCK *block // block to do
176);
177void compute_row_stats( // find lines
178 TO_BLOCK *block, // block to do
179 bool testing_on // correct orientation
180);
181float median_block_xheight( // find lines
182 TO_BLOCK *block, // block to do
183 float gradient // global skew
184);
185
186int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only,
187 int min_height, int max_height, float *xheight, float *ascrise);
188
189int32_t compute_row_descdrop(TO_ROW *row, // row to do
190 float gradient, // global skew
191 int xheight_blob_count, STATS *heights);
192int32_t compute_height_modes(STATS *heights, // stats to search
193 int32_t min_height, // bottom of range
194 int32_t max_height, // top of range
195 int32_t *modes, // output array
196 int32_t maxmodes); // size of modes
197void correct_row_xheight(TO_ROW *row, // row to fix
198 float xheight, // average values
199 float ascrise, float descdrop);
200void separate_underlines(TO_BLOCK *block, // block to do
201 float gradient, // skew angle
202 FCOORD rotation, // inverse landscape
203 bool testing_on); // correct orientation
204void pre_associate_blobs(ICOORD page_tr, // top right
205 TO_BLOCK *block, // block to do
206 FCOORD rotation, // inverse landscape
207 bool testing_on); // correct orientation
208void fit_parallel_rows(TO_BLOCK *block, // block to do
209 float gradient, // gradient to fit
210 FCOORD rotation, // for drawing
211 int32_t block_edge, // edge of block
212 bool testing_on); // correct orientation
213void fit_parallel_lms(float gradient, // forced gradient
214 TO_ROW *row); // row to fit
215void make_baseline_spline(TO_ROW *row, // row to fit
216 TO_BLOCK *block); // block it came from
217bool segment_baseline( // split baseline
218 TO_ROW *row, // row to fit
219 TO_BLOCK *block, // block it came from
220 int32_t &segments, // no fo segments
221 int32_t *xstarts // coords of segments
222);
223double *linear_spline_baseline( // split baseline
224 TO_ROW *row, // row to fit
225 TO_BLOCK *block, // block it came from
226 int32_t &segments, // no fo segments
227 int32_t xstarts[] // coords of segments
228);
229void assign_blobs_to_rows( // find lines
230 TO_BLOCK *block, // block to do
231 float *gradient, // block skew
232 int pass, // identification
233 bool reject_misses, // chuck big ones out
234 bool make_new_rows, // add rows for unmatched
235 bool drawing_skew // draw smoothed skew
236);
237// find best row
238OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator
239 TO_ROW *&best_row, // output row
240 float top, // top of blob
241 float bottom, // bottom of blob
242 float rowsize, // max row size
243 bool testing_blob // test stuff
244);
245int blob_x_order( // sort function
246 const void *item1, // items to compare
247 const void *item2);
248
249void mark_repeated_chars(TO_ROW *row);
250
251} // namespace tesseract
252
253#endif
@ TBOX
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
bool textord_old_baselines
Definition: makerow.cpp:55
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:976
bool textord_show_final_rows
Definition: makerow.cpp:50
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:612
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:86
double textord_xheight_error_margin
Definition: makerow.cpp:93
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2053
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1970
double textord_minxh
Definition: makerow.cpp:79
int textord_spline_minblobs
Definition: makerow.cpp:67
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1846
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:563
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1163
double_VAR_H(classify_min_slope)
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1629
int textord_min_xheight
Definition: makerow.cpp:70
double textord_chop_width
Definition: makerow.cpp:76
bool textord_show_initial_rows
Definition: makerow.cpp:47
bool textord_show_expanded_rows
Definition: makerow.cpp:49
int textord_test_y
Definition: makerow.cpp:65
double textord_ascheight_mode_fraction
Definition: makerow.cpp:87
int textord_test_x
Definition: makerow.cpp:64
double textord_spline_shift_fraction
Definition: makerow.cpp:71
double textord_descx_ratio_min
Definition: makerow.cpp:91
ROW_CATEGORY
Definition: makerow.h:36
@ ROW_ASCENDERS_FOUND
Definition: makerow.h:37
@ ROW_DESCENDERS_FOUND
Definition: makerow.h:38
@ ROW_INVALID
Definition: makerow.h:40
@ ROW_UNKNOWN
Definition: makerow.h:39
bool textord_debug_xheights
Definition: makerow.cpp:59
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1129
int textord_spline_medianwin
Definition: makerow.cpp:68
double textord_skew_lag
Definition: makerow.cpp:73
double textord_excess_blobsize
Definition: makerow.cpp:81
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2565
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2272
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1418
bool textord_parallel_baselines
Definition: makerow.cpp:53
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:254
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:296
double textord_min_blob_height_fraction
Definition: makerow.cpp:85
bool textord_show_parallel_rows
Definition: makerow.cpp:48
bool textord_fix_makerow_bug
Definition: makerow.cpp:58
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:508
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:315
double textord_occupancy_threshold
Definition: makerow.cpp:82
double textord_underline_width
Definition: makerow.cpp:83
double textord_skew_ile
Definition: makerow.cpp:72
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2180
int textord_lms_line_trials
Definition: makerow.cpp:94
double textord_ascx_ratio_max
Definition: makerow.cpp:90
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:102
double textord_ascx_ratio_min
Definition: makerow.cpp:89
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:765
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:229
BOOL_VAR_H(wordrec_display_splits)
bool textord_heavy_nr
Definition: makerow.cpp:46
float median_block_xheight(TO_BLOCK *block, float gradient)
bool textord_test_landscape
Definition: makerow.cpp:52
bool textord_new_initial_xheight
Definition: makerow.cpp:95
INT_VAR_H(editor_image_xpos)
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1781
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1928
OVERLAP_STATE
Definition: makerow.h:30
@ ASSIGN
Definition: makerow.h:31
@ NEW_ROW
Definition: makerow.h:33
@ REJECT
Definition: makerow.h:32
double textord_linespace_iqrlimit
Definition: makerow.cpp:74
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1480
double textord_min_linesize
Definition: makerow.cpp:80
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1690
int textord_min_blobs_in_row
Definition: makerow.cpp:66
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2451
double textord_width_limit
Definition: makerow.cpp:75
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1576
double textord_xheight_mode_fraction
Definition: makerow.cpp:86
bool textord_fix_xheight_bug
Definition: makerow.cpp:57
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:94
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2542
bool textord_debug_blob
Definition: makerow.cpp:96
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:852
bool textord_show_final_blobs
Definition: makerow.cpp:51
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:190
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:933
bool textord_straight_baselines
Definition: makerow.cpp:54
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:799
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2083
double textord_descx_ratio_max
Definition: makerow.cpp:92
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:696
bool textord_old_xheight
Definition: makerow.cpp:56
integer coordinate
Definition: points.h:36