tesseract v5.3.3.20231005
textord.cpp
Go to the documentation of this file.
1
2// File: textord.cpp
3// Description: The top-level text line and word finding functionality.
4// Author: Ray Smith
5// Created: Fri Mar 13 14:43:01 PDT 2009
6//
7// (C) Copyright 2009, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20// Include automatically generated configuration file if running autoconf.
21#ifdef HAVE_CONFIG_H
22# include "config_auto.h"
23#endif
24
25#include "baselinedetect.h"
26#include "drawtord.h"
27#include "makerow.h"
28#include "pageres.h"
29#include "textord.h"
30#include "tordmain.h"
31#include "wordseg.h"
32
33namespace tesseract {
34
36 : ccstruct_(ccstruct)
37 , use_cjk_fp_model_(false)
38 ,
39 // makerow.cpp ///////////////////////////////////////////
40 BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode",
41 ccstruct_->params())
42 ,
43 // tospace.cpp ///////////////////////////////////////////
44 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", ccstruct_->params())
45 , BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46 "Constrain relative values of inter and intra-word gaps for "
47 "old_to_method.",
48 ccstruct_->params())
49 , BOOL_MEMBER(tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?",
50 ccstruct_->params())
51 , BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52 "Force word breaks on punct to break long lines in non-space "
53 "delimited langs",
54 ccstruct_->params())
55 , BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params())
56 , BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params())
57 , BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
58 , BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params())
59 , BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params())
60 , BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params())
61 , BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62 "Use row alone when inadequate cert spaces", ccstruct_->params())
63 , BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", ccstruct_->params())
64 , BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", ccstruct_->params())
65 , BOOL_MEMBER(tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables",
66 ccstruct_->params())
67 , BOOL_MEMBER(tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks",
68 ccstruct_->params())
69 , BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params())
70 , BOOL_MEMBER(tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks",
71 ccstruct_->params())
72 , BOOL_MEMBER(tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct",
73 ccstruct_->params())
74 , BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params())
75 , BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params())
76 , BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params())
77 , INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params())
78 , INT_MEMBER(tosp_enough_space_samples_for_median, 3, "or should we use mean",
79 ccstruct_->params())
80 , INT_MEMBER(tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row",
81 ccstruct_->params())
82 , INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table",
83 ccstruct_->params())
84 , INT_MEMBER(tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs",
85 ccstruct_->params())
86 , INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", ccstruct_->params())
87 , double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
88 "Factor for defining space threshold in terms of space and "
89 "kern sizes",
90 ccstruct_->params())
91 , double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params())
92 , double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params())
93 , double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params())
94 , double_MEMBER(tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this",
95 ccstruct_->params())
96 , double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params())
97 , double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params())
98 , double_MEMBER(tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp",
99 ccstruct_->params())
100 , double_MEMBER(tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp",
101 ccstruct_->params())
102 , double_MEMBER(tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp",
103 ccstruct_->params())
104 , double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params())
105 , double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", ccstruct_->params())
106 , double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", ccstruct_->params())
107 , double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", ccstruct_->params())
108 , double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", ccstruct_->params())
109 , double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", ccstruct_->params())
110 , double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", ccstruct_->params())
111 , double_MEMBER(tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats",
112 ccstruct_->params())
113 , double_MEMBER(tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table",
114 ccstruct_->params())
115 , double_MEMBER(tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this",
116 ccstruct_->params())
117 , double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this",
118 ccstruct_->params())
119 , double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params())
120 , double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", ccstruct_->params())
121 , double_MEMBER(tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn",
122 ccstruct_->params())
123 , double_MEMBER(tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this",
124 ccstruct_->params())
125 , double_MEMBER(tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this",
126 ccstruct_->params())
127 , double_MEMBER(tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh",
128 ccstruct_->params())
129 , double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation",
130 ccstruct_->params())
131 , double_MEMBER(tosp_large_kerning, 0.19, "Limit use of xht gap with large kns",
132 ccstruct_->params())
133 , double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns",
134 ccstruct_->params())
135 , double_MEMBER(tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank",
136 ccstruct_->params())
137 , double_MEMBER(tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small",
138 ccstruct_->params())
139 , double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context",
140 ccstruct_->params())
141 ,
142 // tordmain.cpp ///////////////////////////////////////////
143 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params())
144 , BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", ccstruct_->params())
145 , BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params())
146 , INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", ccstruct_->params())
147 , INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", ccstruct_->params())
148 , double_MEMBER(textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise",
149 ccstruct_->params())
150 , double_MEMBER(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess",
151 ccstruct_->params())
152 , double_MEMBER(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess",
153 ccstruct_->params())
154 , INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params())
155 , double_MEMBER(textord_noise_sizelimit, 0.5, "Fraction of x for big t count",
156 ccstruct_->params())
157 , INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params())
158 , double_MEMBER(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion",
159 ccstruct_->params())
160 , BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params())
161 , BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", ccstruct_->params())
162 , double_MEMBER(textord_noise_syfract, 0.2, "xh fract height error for norm blobs",
163 ccstruct_->params())
164 , double_MEMBER(textord_noise_sxfract, 0.4, "xh fract width error for norm blobs",
165 ccstruct_->params())
166 , double_MEMBER(textord_noise_hfract, 1.0 / 64,
167 "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168 , INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params())
169 , double_MEMBER(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion",
170 ccstruct_->params())
171 , BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params())
172 , double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params())
173 , double_MEMBER(textord_blshift_xfraction, 9.99, "Min size of baseline shift",
174 ccstruct_->params()) {}
175
176// Make the textlines and words inside each block.
177void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
178 Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
179 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180 TO_BLOCK_LIST *to_blocks) {
181 page_tr_.set_x(width);
182 page_tr_.set_y(height);
183 if (to_blocks->empty()) {
184 // AutoPageSeg was not used, so we need to find_components first.
185 find_components(binary_pix, blocks, to_blocks);
186 TO_BLOCK_IT it(to_blocks);
187 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
188 TO_BLOCK *to_block = it.data();
189 // Compute the edge offsets whether or not there is a grey_pix.
190 // We have by-passed auto page seg, so we have to run it here.
191 // By page segmentation mode there is no non-text to avoid running on.
192 to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
193 }
194 } else if (!PSM_SPARSE(pageseg_mode)) {
195 // AutoPageSeg does not need to find_components as it did that already.
196 // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
197 filter_blobs(page_tr_, to_blocks, true);
198 }
199
200 ASSERT_HOST(!to_blocks->empty());
201 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
202 const FCOORD anticlockwise90(0.0f, 1.0f);
203 const FCOORD clockwise90(0.0f, -1.0f);
204 TO_BLOCK_IT it(to_blocks);
205 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
206 TO_BLOCK *to_block = it.data();
207 BLOCK *block = to_block->block;
208 // Create a fake poly_block in block from its bounding box.
210 // Rotate the to_block along with its contained block and blobnbox lists.
211 to_block->rotate(anticlockwise90);
212 // Set the block's rotation values to obey the convention followed in
213 // layout analysis for vertical text.
214 block->set_re_rotation(clockwise90);
215 block->set_classify_rotation(clockwise90);
216 }
217 }
218
219 TO_BLOCK_IT to_block_it(to_blocks);
220 TO_BLOCK *to_block = to_block_it.data();
221 // Make the rows in the block.
222 float gradient;
223 // Do it the old fashioned way.
224 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
225 gradient = make_rows(page_tr_, to_blocks);
226 } else if (!PSM_SPARSE(pageseg_mode)) {
227 // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
228 gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
229 } else {
230 gradient = 0.0f;
231 }
232 BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
233 baseline_detector.ComputeStraightBaselines(use_box_bottoms);
234 baseline_detector.ComputeBaselineSplinesAndXheights(
235 page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, textord_show_final_rows, this);
236 // Now make the words in the lines.
237 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
238 // SINGLE_LINE uses the old word maker on the single line.
239 make_words(this, page_tr_, gradient, blocks, to_blocks);
240 } else {
241 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
242 // single word, and in SINGLE_CHAR mode, all the outlines
243 // go in a single blob.
244 TO_BLOCK *to_block = to_block_it.data();
245 make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(),
246 to_block->block->row_list());
247 }
248 // Remove empties.
249 cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
250 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
251 // Compute the margins for each row in the block, to be used later for
252 // paragraph detection.
253 BLOCK_IT b_it(blocks);
254 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
255 b_it.data()->compute_row_margins();
256 }
257#ifndef GRAPHICS_DISABLED
258 close_to_win();
259#endif
260}
261
262// If we were supposed to return only a single textline, and there is more
263// than one, clean up and leave only the best.
265 if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) {
266 return; // No cleanup required.
267 }
268 PAGE_RES_IT it(page_res);
269 // Find the best row, being the greatest mean word conf.
270 float row_total_conf = 0.0f;
271 int row_word_count = 0;
272 ROW_RES *best_row = nullptr;
273 float best_conf = 0.0f;
274 for (it.restart_page(); it.word() != nullptr; it.forward()) {
275 WERD_RES *word = it.word();
276 row_total_conf += word->best_choice->certainty();
277 ++row_word_count;
278 if (it.next_row() != it.row()) {
279 row_total_conf /= row_word_count;
280 if (best_row == nullptr || best_conf < row_total_conf) {
281 best_row = it.row();
282 best_conf = row_total_conf;
283 }
284 row_total_conf = 0.0f;
285 row_word_count = 0;
286 }
287 }
288 // Now eliminate any word not in the best row.
289 for (it.restart_page(); it.word() != nullptr; it.forward()) {
290 if (it.row() != best_row) {
292 }
293 }
294}
295
296} // namespace tesseract.
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:369
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:375
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:371
#define ASSERT_HOST(x)
Definition: errcode.h:54
bool textord_show_final_rows
Definition: makerow.cpp:50
@ PSM_SINGLE_BLOCK_VERT_TEXT
Definition: publictypes.h:164
@ PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:170
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:201
void close_to_win()
Definition: drawtord.cpp:56
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:229
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:195
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:99
bool textord_heavy_nr
Definition: makerow.cpp:46
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:53
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:204
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:190
@ PT_VERTICAL_TEXT
Definition: publictypes.h:59
void rotate(const FCOORD &rotation)
Definition: blobbox.h:715
TO_ROW_LIST * get_rows()
Definition: blobbox.h:709
void ComputeEdgeOffsets(Image thresholds, Image grey)
Definition: blobbox.cpp:1042
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:185
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:111
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:138
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:132
WERD_CHOICE * best_choice
Definition: pageres.h:239
WERD_RES * forward()
Definition: pageres.h:743
WERD_RES * word() const
Definition: pageres.h:763
WERD_RES * restart_page()
Definition: pageres.h:710
ROW_RES * next_row() const
Definition: pageres.h:775
ROW_RES * row() const
Definition: pageres.h:766
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
void set_y(TDimension yin)
rewrite function
Definition: points.h:71
float certainty() const
Definition: ratngs.h:315
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: textord.cpp:177
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:238
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
Definition: textord.cpp:264
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:211
Textord(CCStruct *ccstruct)
Definition: textord.cpp:35