All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
textord.cpp
Go to the documentation of this file.
1 // File: textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author: Ray Smith
5 // Created: Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 #include "drawtord.h"
27 #include "textord.h"
28 #include "makerow.h"
29 #include "pageres.h"
30 #include "tordmain.h"
31 #include "wordseg.h"
32 
33 namespace tesseract {
34 
36  : ccstruct_(ccstruct), use_cjk_fp_model_(false),
37  // makerow.cpp ///////////////////////////////////////////
38  BOOL_MEMBER(textord_single_height_mode, false,
39  "Script has no xheight, so use a single mode",
40  ccstruct_->params()),
41  // tospace.cpp ///////////////////////////////////////////
42  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
43  ccstruct_->params()),
44  BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
45  "Constrain relative values of inter and intra-word gaps for "
46  "old_to_method.",
47  ccstruct_->params()),
48  BOOL_MEMBER(tosp_only_use_prop_rows, true,
49  "Block stats to use fixed pitch rows?",
50  ccstruct_->params()),
51  BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
55  BOOL_MEMBER(tosp_use_pre_chopping, false,
56  "Space stats use prechopping?",
57  ccstruct_->params()),
58  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
59  ccstruct_->params()),
60  BOOL_MEMBER(tosp_block_use_cert_spaces, true,
61  "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
65  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
66  "Only stat OBVIOUS spaces",
67  ccstruct_->params()),
68  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
69  ccstruct_->params()),
70  BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
71  "Use row alone when inadequate cert spaces",
72  ccstruct_->params()),
73  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
74  ccstruct_->params()),
75  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
76  ccstruct_->params()),
77  BOOL_MEMBER(tosp_fuzzy_limit_all, true,
78  "Dont restrict kn->sp fuzzy limit to tables",
79  ccstruct_->params()),
80  BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
81  "Use within xht gap for wd breaks",
82  ccstruct_->params()),
83  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
84  ccstruct_->params()),
85  BOOL_MEMBER(tosp_only_use_xht_gaps, false,
86  "Only use within xht gap for wd breaks",
87  ccstruct_->params()),
88  BOOL_MEMBER(tosp_rule_9_test_punct, false,
89  "Dont chng kn to space next to punct",
90  ccstruct_->params()),
91  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
92  ccstruct_->params()),
93  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
94  ccstruct_->params()),
95  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
96  ccstruct_->params()),
97  INT_MEMBER(tosp_debug_level, 0, "Debug data",
98  ccstruct_->params()),
99  INT_MEMBER(tosp_enough_space_samples_for_median, 3,
100  "or should we use mean",
101  ccstruct_->params()),
102  INT_MEMBER(tosp_redo_kern_limit, 10,
103  "No.samples reqd to reestimate for row",
104  ccstruct_->params()),
105  INT_MEMBER(tosp_few_samples, 40,
106  "No.gaps reqd with 1 large gap to treat as a table",
107  ccstruct_->params()),
108  INT_MEMBER(tosp_short_row, 20,
109  "No.gaps reqd with few cert spaces to use certs",
110  ccstruct_->params()),
111  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
112  ccstruct_->params()),
113  double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
114  "Factor for defining space threshold in terms of space and "
115  "kern sizes",
116  ccstruct_->params()),
117  double_MEMBER(tosp_threshold_bias1, 0,
118  "how far between kern and space?",
119  ccstruct_->params()),
120  double_MEMBER(tosp_threshold_bias2, 0,
121  "how far between kern and space?",
122  ccstruct_->params()),
123  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
124  ccstruct_->params()),
125  double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
126  "narrow if w/h less than this",
127  ccstruct_->params()),
128  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
129  ccstruct_->params()),
130  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
131  ccstruct_->params()),
132  double_MEMBER(tosp_fuzzy_space_factor, 0.6,
133  "Fract of xheight for fuzz sp",
134  ccstruct_->params()),
135  double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
136  "Fract of xheight for fuzz sp",
137  ccstruct_->params()),
138  double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
139  "Fract of xheight for fuzz sp",
140  ccstruct_->params()),
141  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
142  ccstruct_->params()),
143  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
144  ccstruct_->params()),
145  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
146  ccstruct_->params()),
147  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
148  ccstruct_->params()),
149  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
150  ccstruct_->params()),
151  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
152  ccstruct_->params()),
153  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
154  ccstruct_->params()),
155  double_MEMBER(tosp_enough_small_gaps, 0.65,
156  "Fract of kerns reqd for isolated row stats",
157  ccstruct_->params()),
158  double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
159  "Min difference of kn & sp in table",
160  ccstruct_->params()),
161  double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
162  "Expect spaces bigger than this",
163  ccstruct_->params()),
164  double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
165  "Fuzzy if less than this",
166  ccstruct_->params()),
167  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
168  ccstruct_->params()),
169  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
170  ccstruct_->params()),
171  double_MEMBER(tosp_min_sane_kn_sp, 1.5,
172  "Dont trust spaces less than this time kn",
173  ccstruct_->params()),
174  double_MEMBER(tosp_init_guess_kn_mult, 2.2,
175  "Thresh guess - mult kn by this",
176  ccstruct_->params()),
177  double_MEMBER(tosp_init_guess_xht_mult, 0.28,
178  "Thresh guess - mult xht by this",
179  ccstruct_->params()),
180  double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
181  "Multiplier on kn to limit thresh",
182  ccstruct_->params()),
183  double_MEMBER(tosp_flip_caution, 0.0,
184  "Dont autoflip kn to sp when large separation",
185  ccstruct_->params()),
186  double_MEMBER(tosp_large_kerning, 0.19,
187  "Limit use of xht gap with large kns",
188  ccstruct_->params()),
189  double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
190  "Limit use of xht gap with odd small kns",
191  ccstruct_->params()),
192  double_MEMBER(tosp_near_lh_edge, 0,
193  "Dont reduce box if the top left is non blank",
194  ccstruct_->params()),
195  double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
196  "Dont let sp minus kn get too small",
197  ccstruct_->params()),
198  double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
199  "How wide fuzzies need context",
200  ccstruct_->params()),
201  // tordmain.cpp ///////////////////////////////////////////
202  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
203  ccstruct_->params()),
204  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
205  ccstruct_->params()),
206  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
207  ccstruct_->params()),
208  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
209  ccstruct_->params()),
210  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
211  ccstruct_->params()),
212  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
213  ccstruct_->params()),
214  double_MEMBER(textord_noise_area_ratio, 0.7,
215  "Fraction of bounding box for noise",
216  ccstruct_->params()),
217  double_MEMBER(textord_blob_size_smallile, 20,
218  "Percentile for small blobs",
219  ccstruct_->params()),
220  double_MEMBER(textord_initialx_ile, 0.75,
221  "Ile of sizes for xheight guess",
222  ccstruct_->params()),
223  double_MEMBER(textord_initialasc_ile, 0.90,
224  "Ile of sizes for xheight guess",
225  ccstruct_->params()),
226  INT_MEMBER(textord_noise_sizefraction, 10,
227  "Fraction of size for maxima",
228  ccstruct_->params()),
229  double_MEMBER(textord_noise_sizelimit, 0.5,
230  "Fraction of x for big t count",
231  ccstruct_->params()),
232  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
233  ccstruct_->params()),
234  double_MEMBER(textord_noise_normratio, 2.0,
235  "Dot to norm ratio for deletion",
236  ccstruct_->params()),
237  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
238  ccstruct_->params()),
239  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
240  ccstruct_->params()),
241  double_MEMBER(textord_noise_syfract, 0.2,
242  "xh fract height error for norm blobs",
243  ccstruct_->params()),
244  double_MEMBER(textord_noise_sxfract, 0.4,
245  "xh fract width error for norm blobs",
246  ccstruct_->params()),
247  double_MEMBER(textord_noise_hfract, 1.0/64,
248  "Height fraction to discard outlines as speckle noise",
249  ccstruct_->params()),
250  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
251  ccstruct_->params()),
252  double_MEMBER(textord_noise_rowratio, 6.0,
253  "Dot to norm ratio for deletion",
254  ccstruct_->params()),
255  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
256  ccstruct_->params()),
257  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
258  ccstruct_->params()),
259  double_MEMBER(textord_blshift_xfraction, 9.99,
260  "Min size of baseline shift",
261  ccstruct_->params()) {
262 }
263 
265 }
266 
267 // Make the textlines and words inside each block.
268 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
269  int width, int height, Pix* binary_pix,
270  Pix* thresholds_pix, Pix* grey_pix,
271  bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
272  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
273  page_tr_.set_x(width);
274  page_tr_.set_y(height);
275  if (to_blocks->empty()) {
276  // AutoPageSeg was not used, so we need to find_components first.
277  find_components(binary_pix, blocks, to_blocks);
278  TO_BLOCK_IT it(to_blocks);
279  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
280  TO_BLOCK* to_block = it.data();
281  // Compute the edge offsets whether or not there is a grey_pix.
282  // We have by-passed auto page seg, so we have to run it here.
283  // By page segmentation mode there is no non-text to avoid running on.
284  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
285  }
286  } else if (!PSM_SPARSE(pageseg_mode)) {
287  // AutoPageSeg does not need to find_components as it did that already.
288  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
289  filter_blobs(page_tr_, to_blocks, true);
290  }
291 
292  ASSERT_HOST(!to_blocks->empty());
293  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
294  const FCOORD anticlockwise90(0.0f, 1.0f);
295  const FCOORD clockwise90(0.0f, -1.0f);
296  TO_BLOCK_IT it(to_blocks);
297  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
298  TO_BLOCK* to_block = it.data();
299  BLOCK* block = to_block->block;
300  // Create a fake poly_block in block from its bounding box.
301  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
303  // Rotate the to_block along with its contained block and blobnbox lists.
304  to_block->rotate(anticlockwise90);
305  // Set the block's rotation values to obey the convention followed in
306  // layout analysis for vertical text.
307  block->set_re_rotation(clockwise90);
308  block->set_classify_rotation(clockwise90);
309  }
310  }
311 
312  TO_BLOCK_IT to_block_it(to_blocks);
313  TO_BLOCK* to_block = to_block_it.data();
314  // Make the rows in the block.
315  float gradient;
316  // Do it the old fashioned way.
317  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
318  gradient = make_rows(page_tr_, to_blocks);
319  } else if (!PSM_SPARSE(pageseg_mode)) {
320  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
321  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
322  to_block, to_blocks);
323  }
324  BaselineDetect baseline_detector(textord_baseline_debug,
325  reskew, to_blocks);
326  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
327  baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true,
330  this);
331  // Now make the words in the lines.
332  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
333  // SINGLE_LINE uses the old word maker on the single line.
334  make_words(this, page_tr_, gradient, blocks, to_blocks);
335  } else {
336  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
337  // single word, and in SINGLE_CHAR mode, all the outlines
338  // go in a single blob.
339  TO_BLOCK* to_block = to_block_it.data();
340  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
341  to_block->get_rows(), to_block->block->row_list());
342  }
343  // Remove empties.
344  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
345  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
346  // Compute the margins for each row in the block, to be used later for
347  // paragraph detection.
348  BLOCK_IT b_it(blocks);
349  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
350  b_it.data()->compute_row_margins();
351  }
352 #ifndef GRAPHICS_DISABLED
353  close_to_win();
354 #endif
355 }
356 
357 // If we were supposed to return only a single textline, and there is more
358 // than one, clean up and leave only the best.
360  PAGE_RES* page_res) {
361  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
362  return; // No cleanup required.
363  PAGE_RES_IT it(page_res);
364  // Find the best row, being the greatest mean word conf.
365  float row_total_conf = 0.0f;
366  int row_word_count = 0;
367  ROW_RES* best_row = NULL;
368  float best_conf = 0.0f;
369  for (it.restart_page(); it.word() != NULL; it.forward()) {
370  WERD_RES* word = it.word();
371  row_total_conf += word->best_choice->certainty();
372  ++row_word_count;
373  if (it.next_row() != it.row()) {
374  row_total_conf /= row_word_count;
375  if (best_row == NULL || best_conf < row_total_conf) {
376  best_row = it.row();
377  best_conf = row_total_conf;
378  }
379  row_total_conf = 0.0f;
380  row_word_count = 0;
381  }
382  }
383  // Now eliminate any word not in the best row.
384  for (it.restart_page(); it.word() != NULL; it.forward()) {
385  if (it.row() != best_row)
386  it.DeleteCurrentWord();
387  }
388 }
389 
390 } // namespace tesseract.
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
bool textord_heavy_nr
Definition: makerow.cpp:44
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:60
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
WERD_CHOICE * best_choice
Definition: pageres.h:219
ROW_RES * next_row() const
Definition: pageres.h:745
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
int textord_baseline_debug
Definition: textord.h:377
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:141
void DeleteCurrentWord()
Definition: pageres.cpp:1449
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
#define ASSERT_HOST(x)
Definition: errcode.h:84
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: textord.cpp:268
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
WERD_RES * forward()
Definition: pageres.h:713
Treat the image as a single character.
Definition: publictypes.h:164
void ComputeStraightBaselines(bool use_box_bottoms)
WERD_RES * restart_page()
Definition: pageres.h:680
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1049
float certainty() const
Definition: ratngs.h:327
Definition: ocrblock.h:30
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:197
ROW_RES * row() const
Definition: pageres.h:736
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
void rotate(const FCOORD &rotation)
Definition: blobbox.h:706
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
void close_to_win()
Definition: drawtord.cpp:56
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:239
bool textord_show_final_rows
Definition: makerow.cpp:48
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
Definition: textord.cpp:359
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:208
#define NULL
Definition: host.h:144
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: points.h:189
Textord(CCStruct *ccstruct)
Definition: textord.cpp:35
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:147
BLOCK * block
Definition: blobbox.h:773
WERD_RES * word() const
Definition: pageres.h:733
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:104