All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, BOOL8 testing_on)
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
int textord_baseline_debug = 0
 
double textord_blob_size_bigile = 95
 
double textord_noise_area_ratio = 0.7
 
double textord_blob_size_smallile = 20
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = FALSE
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 68 of file textord.h.

Constructor & Destructor Documentation

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 35 of file textord.cpp.

36  : ccstruct_(ccstruct), use_cjk_fp_model_(false),
37  // makerow.cpp ///////////////////////////////////////////
39  "Script has no xheight, so use a single mode",
40  ccstruct_->params()),
41  // tospace.cpp ///////////////////////////////////////////
42  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
43  ccstruct_->params()),
45  "Constrain relative values of inter and intra-word gaps for "
46  "old_to_method.",
47  ccstruct_->params()),
49  "Block stats to use fixed pitch rows?",
50  ccstruct_->params()),
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
56  "Space stats use prechopping?",
57  ccstruct_->params()),
58  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
59  ccstruct_->params()),
61  "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
66  "Only stat OBVIOUS spaces",
67  ccstruct_->params()),
68  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
69  ccstruct_->params()),
71  "Use row alone when inadequate cert spaces",
72  ccstruct_->params()),
73  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
74  ccstruct_->params()),
75  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
76  ccstruct_->params()),
78  "Dont restrict kn->sp fuzzy limit to tables",
79  ccstruct_->params()),
81  "Use within xht gap for wd breaks",
82  ccstruct_->params()),
83  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
84  ccstruct_->params()),
86  "Only use within xht gap for wd breaks",
87  ccstruct_->params()),
89  "Dont chng kn to space next to punct",
90  ccstruct_->params()),
91  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
92  ccstruct_->params()),
93  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
94  ccstruct_->params()),
95  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
96  ccstruct_->params()),
97  INT_MEMBER(tosp_debug_level, 0, "Debug data",
98  ccstruct_->params()),
100  "or should we use mean",
101  ccstruct_->params()),
103  "No.samples reqd to reestimate for row",
104  ccstruct_->params()),
106  "No.gaps reqd with 1 large gap to treat as a table",
107  ccstruct_->params()),
109  "No.gaps reqd with few cert spaces to use certs",
110  ccstruct_->params()),
111  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
112  ccstruct_->params()),
114  "Factor for defining space threshold in terms of space and "
115  "kern sizes",
116  ccstruct_->params()),
118  "how far between kern and space?",
119  ccstruct_->params()),
121  "how far between kern and space?",
122  ccstruct_->params()),
123  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
124  ccstruct_->params()),
126  "narrow if w/h less than this",
127  ccstruct_->params()),
128  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
129  ccstruct_->params()),
130  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
131  ccstruct_->params()),
133  "Fract of xheight for fuzz sp",
134  ccstruct_->params()),
136  "Fract of xheight for fuzz sp",
137  ccstruct_->params()),
139  "Fract of xheight for fuzz sp",
140  ccstruct_->params()),
141  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
142  ccstruct_->params()),
143  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
144  ccstruct_->params()),
145  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
146  ccstruct_->params()),
147  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
148  ccstruct_->params()),
149  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
150  ccstruct_->params()),
151  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
152  ccstruct_->params()),
153  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
154  ccstruct_->params()),
156  "Fract of kerns reqd for isolated row stats",
157  ccstruct_->params()),
159  "Min difference of kn & sp in table",
160  ccstruct_->params()),
162  "Expect spaces bigger than this",
163  ccstruct_->params()),
165  "Fuzzy if less than this",
166  ccstruct_->params()),
167  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
168  ccstruct_->params()),
169  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
170  ccstruct_->params()),
172  "Dont trust spaces less than this time kn",
173  ccstruct_->params()),
175  "Thresh guess - mult kn by this",
176  ccstruct_->params()),
178  "Thresh guess - mult xht by this",
179  ccstruct_->params()),
181  "Multiplier on kn to limit thresh",
182  ccstruct_->params()),
184  "Dont autoflip kn to sp when large separation",
185  ccstruct_->params()),
187  "Limit use of xht gap with large kns",
188  ccstruct_->params()),
190  "Limit use of xht gap with odd small kns",
191  ccstruct_->params()),
193  "Dont reduce box if the top left is non blank",
194  ccstruct_->params()),
196  "Dont let sp minus kn get too small",
197  ccstruct_->params()),
199  "How wide fuzzies need context",
200  ccstruct_->params()),
201  // tordmain.cpp ///////////////////////////////////////////
202  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
203  ccstruct_->params()),
204  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
205  ccstruct_->params()),
206  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
207  ccstruct_->params()),
208  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
209  ccstruct_->params()),
210  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
211  ccstruct_->params()),
212  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
213  ccstruct_->params()),
215  "Fraction of bounding box for noise",
216  ccstruct_->params()),
218  "Percentile for small blobs",
219  ccstruct_->params()),
221  "Ile of sizes for xheight guess",
222  ccstruct_->params()),
224  "Ile of sizes for xheight guess",
225  ccstruct_->params()),
227  "Fraction of size for maxima",
228  ccstruct_->params()),
230  "Fraction of x for big t count",
231  ccstruct_->params()),
232  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
233  ccstruct_->params()),
235  "Dot to norm ratio for deletion",
236  ccstruct_->params()),
237  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
238  ccstruct_->params()),
239  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
240  ccstruct_->params()),
242  "xh fract height error for norm blobs",
243  ccstruct_->params()),
245  "xh fract width error for norm blobs",
246  ccstruct_->params()),
248  "Height fraction to discard outlines as speckle noise",
249  ccstruct_->params()),
250  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
251  ccstruct_->params()),
253  "Dot to norm ratio for deletion",
254  ccstruct_->params()),
255  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
256  ccstruct_->params()),
257  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
258  ccstruct_->params()),
260  "Min size of baseline shift",
261  ccstruct_->params()) {
262 }
bool tosp_use_xht_gaps
Definition: textord.h:293
int textord_max_noise_size
Definition: textord.h:376
bool textord_noise_rejrows
Definition: textord.h:389
double tosp_old_sp_kn_th_factor
Definition: textord.h:314
double tosp_fuzzy_space_factor1
Definition: textord.h:329
bool tosp_fuzzy_limit_all
Definition: textord.h:289
bool tosp_improve_thresh
Definition: textord.h:301
double tosp_near_lh_edge
Definition: textord.h:367
double textord_noise_syfract
Definition: textord.h:390
double tosp_min_sane_kn_sp
Definition: textord.h:353
bool tosp_stats_use_xht_gaps
Definition: textord.h:291
bool tosp_only_use_xht_gaps
Definition: textord.h:295
bool textord_noise_debug
Definition: textord.h:397
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:371
int tosp_sanity_method
Definition: textord.h:311
bool tosp_use_pre_chopping
Definition: textord.h:273
bool tosp_old_to_method
Definition: textord.h:263
double textord_blob_size_smallile
Definition: textord.h:381
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
int textord_baseline_debug
Definition: textord.h:377
int tosp_enough_space_samples_for_median
Definition: textord.h:304
bool tosp_row_use_cert_spaces1
Definition: textord.h:283
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:299
double tosp_init_guess_kn_mult
Definition: textord.h:355
bool tosp_only_small_gaps_for_kern
Definition: textord.h:286
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
bool tosp_block_use_cert_spaces
Definition: textord.h:277
double tosp_gap_factor
Definition: textord.h:332
double tosp_ignore_big_gaps
Definition: textord.h:339
double textord_noise_hfract
Definition: textord.h:394
double textord_blshift_maxshift
Definition: textord.h:398
double tosp_table_kn_sp_ratio
Definition: textord.h:345
bool tosp_only_use_prop_rows
Definition: textord.h:268
int textord_noise_sncount
Definition: textord.h:395
double textord_noise_normratio
Definition: textord.h:387
double tosp_kern_gap_factor3
Definition: textord.h:338
double tosp_ignore_very_big_gaps
Definition: textord.h:340
double tosp_enough_small_gaps
Definition: textord.h:343
double tosp_threshold_bias1
Definition: textord.h:316
bool textord_noise_rejwords
Definition: textord.h:388
double textord_initialasc_ile
Definition: textord.h:383
double textord_blob_size_bigile
Definition: textord.h:378
double tosp_silly_kn_sp_gap
Definition: textord.h:369
bool tosp_old_to_bug_fix
Definition: textord.h:275
double textord_noise_rowratio
Definition: textord.h:396
bool textord_no_rejects
Definition: textord.h:373
double textord_initialx_ile
Definition: textord.h:382
double tosp_fuzzy_sp_fraction
Definition: textord.h:351
double tosp_wide_fraction
Definition: textord.h:323
bool tosp_narrow_blobs_not_cert
Definition: textord.h:281
int tosp_redo_kern_limit
Definition: textord.h:306
bool tosp_all_flips_fuzzy
Definition: textord.h:287
double tosp_rep_space
Definition: textord.h:341
double textord_blshift_xfraction
Definition: textord.h:399
bool textord_show_blobs
Definition: textord.h:374
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
double textord_noise_sxfract
Definition: textord.h:392
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:298
int textord_noise_sizefraction
Definition: textord.h:384
double tosp_fuzzy_kn_fraction
Definition: textord.h:350
ParamsVectors * params()
Definition: ccutil.h:65
bool textord_show_boxes
Definition: textord.h:375
bool tosp_force_wordbreak_on_punct
Definition: textord.h:271
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:349
bool tosp_row_use_cert_spaces
Definition: textord.h:279
double tosp_init_guess_xht_mult
Definition: textord.h:357
double textord_noise_sizelimit
Definition: textord.h:385
double tosp_fuzzy_space_factor
Definition: textord.h:327
double tosp_large_kerning
Definition: textord.h:363
double tosp_kern_gap_factor2
Definition: textord.h:336
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
double tosp_table_xht_sp_ratio
Definition: textord.h:347
double tosp_fuzzy_space_factor2
Definition: textord.h:331
bool tosp_rule_9_test_punct
Definition: textord.h:297
double tosp_threshold_bias2
Definition: textord.h:318
double tosp_wide_aspect_ratio
Definition: textord.h:325
double tosp_narrow_fraction
Definition: textord.h:320
double tosp_kern_gap_factor1
Definition: textord.h:334
double tosp_max_sane_kn_thresh
Definition: textord.h:359
double tosp_flip_caution
Definition: textord.h:361
bool tosp_recovery_isolated_row_stats
Definition: textord.h:285
double textord_noise_area_ratio
Definition: textord.h:380
double tosp_narrow_aspect_ratio
Definition: textord.h:322
bool textord_single_height_mode
Definition: textord.h:261
double tosp_dont_fool_with_small_kerns
Definition: textord.h:365
int textord_noise_translimit
Definition: textord.h:386
tesseract::Textord::~Textord ( )

Definition at line 264 of file textord.cpp.

264  {
265 }

Member Function Documentation

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 359 of file textord.cpp.

360  {
361  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
362  return; // No cleanup required.
363  PAGE_RES_IT it(page_res);
364  // Find the best row, being the greatest mean word conf.
365  float row_total_conf = 0.0f;
366  int row_word_count = 0;
367  ROW_RES* best_row = NULL;
368  float best_conf = 0.0f;
369  for (it.restart_page(); it.word() != NULL; it.forward()) {
370  WERD_RES* word = it.word();
371  row_total_conf += word->best_choice->certainty();
372  ++row_word_count;
373  if (it.next_row() != it.row()) {
374  row_total_conf /= row_word_count;
375  if (best_row == NULL || best_conf < row_total_conf) {
376  best_row = it.row();
377  best_conf = row_total_conf;
378  }
379  row_total_conf = 0.0f;
380  row_word_count = 0;
381  }
382  }
383  // Now eliminate any word not in the best row.
384  for (it.restart_page(); it.word() != NULL; it.forward()) {
385  if (it.row() != best_row)
386  it.DeleteCurrentWord();
387  }
388 }
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
float certainty() const
Definition: ratngs.h:327
WERD * word
Definition: pageres.h:175
ROW * row
Definition: pageres.h:127
#define NULL
Definition: host.h:144
void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Definition at line 1285 of file makerow.cpp.

1285  {
1286  TO_ROW *row; // current row
1287  float asc_frac_xheight = CCStruct::kAscenderFraction /
1289  float desc_frac_xheight = CCStruct::kDescenderFraction /
1291  inT32 min_height, max_height; // limits on xheight
1292  TO_ROW_IT row_it = block->get_rows();
1293  if (row_it.empty()) return; // no rows
1294 
1295  // Compute the best guess of xheight of each row individually.
1296  // Use xheight and ascrise values of the rows where ascenders were found.
1297  get_min_max_xheight(block->line_size, &min_height, &max_height);
1298  STATS row_asc_xheights(min_height, max_height + 1);
1299  STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1300  static_cast<int>(max_height * asc_frac_xheight) + 1);
1301  int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1302  int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1303  STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1304  STATS row_desc_xheights(min_height, max_height + 1);
1305  STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1306  STATS row_cap_xheights(min_height, max_height + 1);
1307  STATS row_cap_floating_xheights(min_height, max_height + 1);
1308  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1309  row = row_it.data();
1310  // Compute the xheight of this row if it has not been computed before.
1311  if (row->xheight <= 0.0) {
1313  gradient, block->line_size);
1314  }
1315  ROW_CATEGORY row_category = get_row_category(row);
1316  if (row_category == ROW_ASCENDERS_FOUND) {
1317  row_asc_xheights.add(static_cast<inT32>(row->xheight),
1318  row->xheight_evidence);
1319  row_asc_ascrise.add(static_cast<inT32>(row->ascrise),
1320  row->xheight_evidence);
1321  row_asc_descdrop.add(static_cast<inT32>(-row->descdrop),
1322  row->xheight_evidence);
1323  } else if (row_category == ROW_DESCENDERS_FOUND) {
1324  row_desc_xheights.add(static_cast<inT32>(row->xheight),
1325  row->xheight_evidence);
1326  row_desc_descdrop.add(static_cast<inT32>(-row->descdrop),
1327  row->xheight_evidence);
1328  } else if (row_category == ROW_UNKNOWN) {
1329  fill_heights(row, gradient, min_height, max_height,
1330  &row_cap_xheights, &row_cap_floating_xheights);
1331  }
1332  }
1333 
1334  float xheight = 0.0;
1335  float ascrise = 0.0;
1336  float descdrop = 0.0;
1337  // Compute our best guess of xheight of this block.
1338  if (row_asc_xheights.get_total() > 0) {
1339  // Determine xheight from rows where ascenders were found.
1340  xheight = row_asc_xheights.median();
1341  ascrise = row_asc_ascrise.median();
1342  descdrop = -row_asc_descdrop.median();
1343  } else if (row_desc_xheights.get_total() > 0) {
1344  // Determine xheight from rows where descenders were found.
1345  xheight = row_desc_xheights.median();
1346  descdrop = -row_desc_descdrop.median();
1347  } else if (row_cap_xheights.get_total() > 0) {
1348  // All the rows in the block were (a/de)scenderless.
1349  // Try to search for two modes in row_cap_heights that could
1350  // be the xheight and the capheight (e.g. some of the rows
1351  // were lowercase, but did not have enough (a/de)scenders.
1352  // If such two modes can not be found, this block is most
1353  // likely all caps (or all small caps, in which case the code
1354  // still works as intended).
1355  compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
1357  block->block->classify_rotation().y() == 0.0,
1358  min_height, max_height, &(xheight), &(ascrise));
1359  if (ascrise == 0) { // assume only caps in the whole block
1360  xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
1361  }
1362  } else { // default block sizes
1363  xheight = block->line_size * CCStruct::kXHeightFraction;
1364  }
1365  // Correct xheight, ascrise and descdrop if necessary.
1366  bool corrected_xheight = false;
1367  if (xheight < textord_min_xheight) {
1368  xheight = static_cast<float>(textord_min_xheight);
1369  corrected_xheight = true;
1370  }
1371  if (corrected_xheight || ascrise <= 0.0) {
1372  ascrise = xheight * asc_frac_xheight;
1373  }
1374  if (corrected_xheight || descdrop >= 0.0) {
1375  descdrop = -(xheight * desc_frac_xheight);
1376  }
1377  block->xheight = xheight;
1378 
1379  if (textord_debug_xheights) {
1380  tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1381  xheight, ascrise, descdrop);
1382  }
1383  // Correct xheight, ascrise, descdrop of rows based on block averages.
1384  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1385  correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
1386  }
1387 }
static const double kXHeightCapRatio
Definition: ccstruct.h:37
#define tprintf(...)
Definition: tprintf.h:31
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:116
Definition: statistc.h:33
static const double kDescenderFraction
Definition: ccstruct.h:33
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1498
int xheight_evidence
Definition: blobbox.h:654
float xheight
Definition: blobbox.h:784
bool textord_debug_xheights
Definition: makerow.cpp:57
FCOORD classify_rotation() const
Definition: ocrblock.h:144
static const double kAscenderFraction
Definition: ccstruct.h:35
ROW_CATEGORY
Definition: makerow.h:36
float ascrise
Definition: blobbox.h:655
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1397
int textord_min_xheight
Definition: makerow.cpp:69
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1716
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float y() const
Definition: points.h:212
static const double kXHeightFraction
Definition: ccstruct.h:34
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1437
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
float xheight
Definition: blobbox.h:653
float descdrop
Definition: blobbox.h:656
bool textord_single_height_mode
Definition: textord.h:261
BLOCK * block
Definition: blobbox.h:773
float line_size
Definition: blobbox.h:781
int inT32
Definition: host.h:102
void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

Definition at line 1397 of file makerow.cpp.

1400  {
1401  // Find blobs representing repeated characters in rows and mark them.
1402  // This information is used for computing row xheight and at a later
1403  // stage when words are formed by make_words.
1404  if (!row->rep_chars_marked()) {
1405  mark_repeated_chars(row);
1406  }
1407 
1408  int min_height, max_height;
1409  get_min_max_xheight(block_line_size, &min_height, &max_height);
1410  STATS heights(min_height, max_height + 1);
1411  STATS floating_heights(min_height, max_height + 1);
1412  fill_heights(row, gradient, min_height, max_height,
1413  &heights, &floating_heights);
1414  row->ascrise = 0.0f;
1415  row->xheight = 0.0f;
1416  row->xheight_evidence =
1417  compute_xheight_from_modes(&heights, &floating_heights,
1419  rotation.y() == 0.0,
1420  min_height, max_height,
1421  &(row->xheight), &(row->ascrise));
1422  row->descdrop = 0.0f;
1423  if (row->xheight > 0.0) {
1424  row->descdrop = static_cast<float>(
1425  compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
1426  }
1427 }
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:116
Definition: statistc.h:33
inT32 compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1594
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1498
int xheight_evidence
Definition: blobbox.h:654
bool rep_chars_marked() const
Definition: blobbox.h:627
float ascrise
Definition: blobbox.h:655
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2671
float y() const
Definition: points.h:212
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1437
float xheight
Definition: blobbox.h:653
float descdrop
Definition: blobbox.h:656
bool textord_single_height_mode
Definition: textord.h:261
void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
BOOL8  testing_on 
)

Definition at line 239 of file tordmain.cpp.

241  { // for plotting
242  TO_BLOCK_IT block_it = blocks; // destination iterator
243  TO_BLOCK *block; // created block
244 
245  #ifndef GRAPHICS_DISABLED
246  if (to_win != NULL)
247  to_win->Clear();
248  #endif // GRAPHICS_DISABLED
249 
250  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
251  block_it.forward()) {
252  block = block_it.data();
253  block->line_size = filter_noise_blobs(&block->blobs,
254  &block->noise_blobs,
255  &block->small_blobs,
256  &block->large_blobs);
257  block->line_spacing = block->line_size *
264 
265  #ifndef GRAPHICS_DISABLED
266  if (textord_show_blobs && testing_on) {
267  if (to_win == NULL)
268  create_to_win(page_tr);
269  block->plot_graded_blobs(to_win);
270  }
271  if (textord_show_boxes && testing_on) {
272  if (to_win == NULL)
273  create_to_win(page_tr);
278  }
279  #endif // GRAPHICS_DISABLED
280  }
281 }
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
static const double kDescenderFraction
Definition: ccstruct.h:33
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
void Clear()
Definition: scrollview.cpp:595
static const double kAscenderFraction
Definition: ccstruct.h:35
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
double textord_excess_blobsize
Definition: makerow.cpp:85
bool textord_show_blobs
Definition: textord.h:374
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:70
bool textord_show_boxes
Definition: textord.h:375
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1065
float line_spacing
Definition: blobbox.h:775
static const double kXHeightFraction
Definition: ccstruct.h:34
double textord_min_linesize
Definition: makerow.cpp:83
#define NULL
Definition: host.h:144
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
float max_blob_size
Definition: blobbox.h:782
float line_size
Definition: blobbox.h:781
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 208 of file tordmain.cpp.

209  {
210  int width = pixGetWidth(pix);
211  int height = pixGetHeight(pix);
212  if (width > MAX_INT16 || height > MAX_INT16) {
213  tprintf("Input image too large! (%d, %d)\n", width, height);
214  return; // Can't handle it.
215  }
216 
218 
219  BLOCK_IT block_it(blocks); // iterator
220  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
221  block_it.forward()) {
222  BLOCK* block = block_it.data();
223  if (block->poly_block() == NULL || block->poly_block()->IsText()) {
224  extract_edges(pix, block);
225  }
226  }
227 
228  assign_blobs_to_blocks2(pix, blocks, to_blocks);
229  ICOORD page_tr(width, height);
230  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
231 }
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:79
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
#define tprintf(...)
Definition: tprintf.h:31
#define LOC_EDGE_PROG
Definition: errcode.h:44
bool IsText() const
Definition: polyblk.h:52
Definition: ocrblock.h:30
bool textord_test_landscape
Definition: makerow.cpp:50
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:239
integer coordinate
Definition: points.h:30
#define MAX_INT16
Definition: host.h:119
#define NULL
Definition: host.h:144
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: tordmain.cpp:157
ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1177 of file tospace.cpp.

1180  {
1181  bool bol; // start of line
1182  ROW *real_row; // output row
1183  C_OUTLINE_IT cout_it;
1184  C_BLOB_LIST cblobs;
1185  C_BLOB_IT cblob_it = &cblobs;
1186  WERD_LIST words;
1187  WERD_IT word_it; // new words
1188  WERD *word; // new word
1189  BLOBNBOX *bblob; // current blob
1190  TBOX blob_box; // bounding box
1191  BLOBNBOX_IT box_it; // iterator
1192  inT16 word_count = 0;
1193 
1194  cblob_it.set_to_list(&cblobs);
1195  box_it.set_to_list(row->blob_list());
1196  word_it.set_to_list(&words);
1197  bol = TRUE;
1198  if (!box_it.empty()) {
1199 
1200  do {
1201  bblob = box_it.data();
1202  blob_box = bblob->bounding_box();
1203  if (bblob->joined_to_prev()) {
1204  if (bblob->cblob() != NULL) {
1205  cout_it.set_to_list(cblob_it.data()->out_list());
1206  cout_it.move_to_last();
1207  cout_it.add_list_after(bblob->cblob()->out_list());
1208  delete bblob->cblob();
1209  }
1210  } else {
1211  if (bblob->cblob() != NULL)
1212  cblob_it.add_after_then_move(bblob->cblob());
1213  }
1214  box_it.forward(); // next one
1215  bblob = box_it.data();
1216  blob_box = bblob->bounding_box();
1217 
1218  if (!bblob->joined_to_prev() && !cblobs.empty()) {
1219  word = new WERD(&cblobs, 1, NULL);
1220  word_count++;
1221  word_it.add_after_then_move(word);
1222  if (bol) {
1223  word->set_flag(W_BOL, TRUE);
1224  bol = FALSE;
1225  }
1226  if (box_it.at_first()) { // at end of line
1227  word->set_flag(W_EOL, TRUE);
1228  }
1229  }
1230  }
1231  while (!box_it.at_first()); // until back at start
1232  /* Setup the row with created words. */
1233  real_row = new ROW(row, (inT16) row->kern_size, (inT16) row->space_size);
1234  word_it.set_to_list(real_row->word_list());
1235  //put words in row
1236  word_it.add_list_after(&words);
1237  real_row->recalc_bounding_box();
1238  if (tosp_debug_level > 4) {
1239  tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1240  word_count,
1241  real_row->bounding_box().left(),
1242  real_row->bounding_box().bottom(),
1243  real_row->bounding_box().right(),
1244  real_row->bounding_box().top());
1245  }
1246  return real_row;
1247  }
1248  return NULL;
1249 }
float kern_size
Definition: blobbox.h:662
bool joined_to_prev() const
Definition: blobbox.h:241
#define tprintf(...)
Definition: tprintf.h:31
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: ocrrow.h:32
Definition: werd.h:35
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Definition: werd.h:36
inT16 left() const
Definition: rect.h:68
C_BLOB * cblob() const
Definition: blobbox.h:253
TBOX bounding_box() const
Definition: ocrrow.h:85
Definition: werd.h:60
inT16 bottom() const
Definition: rect.h:61
float space_size
Definition: blobbox.h:663
#define FALSE
Definition: capi.h:29
Definition: rect.h:30
#define TRUE
Definition: capi.h:28
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
WERD_LIST * word_list()
Definition: ocrrow.h:52
void recalc_bounding_box()
Definition: ocrrow.cpp:101
short inT16
Definition: host.h:100
ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 884 of file tospace.cpp.

887  {
888  BOOL8 bol; // start of line
889  /* prev_ values are for start of word being built. non prev_ values are for
890  the gap between the word being built and the next one. */
891  BOOL8 prev_fuzzy_sp; // probably space
892  BOOL8 prev_fuzzy_non; // probably not
893  uinT8 prev_blanks; // in front of word
894  BOOL8 fuzzy_sp = false; // probably space
895  BOOL8 fuzzy_non = false; // probably not
896  uinT8 blanks = 0; // in front of word
897  BOOL8 prev_gap_was_a_space = FALSE;
898  BOOL8 break_at_next_gap = FALSE;
899  ROW *real_row; // output row
900  C_OUTLINE_IT cout_it;
901  C_BLOB_LIST cblobs;
902  C_BLOB_IT cblob_it = &cblobs;
903  WERD_LIST words;
904  WERD_IT word_it; // new words
905  WERD *word; // new word
906  WERD_IT rep_char_it; // repeated char words
907  inT32 next_rep_char_word_right = MAX_INT32;
908  float repetition_spacing; // gap between repetitions
909  inT32 xstarts[2]; // row ends
910  inT32 prev_x; // end of prev blob
911  BLOBNBOX *bblob; // current blob
912  TBOX blob_box; // bounding box
913  BLOBNBOX_IT box_it; // iterator
914  TBOX prev_blob_box;
915  TBOX next_blob_box;
916  inT16 prev_gap = MAX_INT16;
917  inT16 current_gap = MAX_INT16;
918  inT16 next_gap = MAX_INT16;
919  inT16 prev_within_xht_gap = MAX_INT16;
920  inT16 current_within_xht_gap = MAX_INT16;
921  inT16 next_within_xht_gap = MAX_INT16;
922  inT16 word_count = 0;
923 
924  rep_char_it.set_to_list (&(row->rep_words));
925  if (!rep_char_it.empty ()) {
926  next_rep_char_word_right =
927  rep_char_it.data ()->bounding_box ().right ();
928  }
929 
930  prev_x = -MAX_INT16;
931  cblob_it.set_to_list (&cblobs);
932  box_it.set_to_list (row->blob_list ());
933  word_it.set_to_list (&words);
934  bol = TRUE;
935  prev_blanks = 0;
936  prev_fuzzy_sp = FALSE;
937  prev_fuzzy_non = FALSE;
938  if (!box_it.empty ()) {
939  xstarts[0] = box_it.data ()->bounding_box ().left ();
940  if (xstarts[0] > next_rep_char_word_right) {
941  /* We need to insert a repeated char word at the start of the row */
942  word = rep_char_it.extract ();
943  word_it.add_after_then_move (word);
944  /* Set spaces before repeated char word */
945  word->set_flag (W_BOL, TRUE);
946  bol = FALSE;
947  word->set_blanks (0);
948  //NO uncertainty
949  word->set_flag (W_FUZZY_SP, FALSE);
950  word->set_flag (W_FUZZY_NON, FALSE);
951  xstarts[0] = word->bounding_box ().left ();
952  /* Set spaces after repeated char word (and leave current word set) */
953  repetition_spacing = find_mean_blob_spacing (word);
954  current_gap = box_it.data ()->bounding_box ().left () -
955  next_rep_char_word_right;
956  current_within_xht_gap = current_gap;
957  if (current_gap > tosp_rep_space * repetition_spacing) {
958  prev_blanks = (uinT8) floor (current_gap / row->space_size);
959  if (prev_blanks < 1)
960  prev_blanks = 1;
961  }
962  else
963  prev_blanks = 0;
964  if (tosp_debug_level > 5)
965  tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
966  box_it.data ()->bounding_box ().left (),
967  box_it.data ()->bounding_box ().bottom (),
968  repetition_spacing, current_gap);
969  prev_fuzzy_sp = FALSE;
970  prev_fuzzy_non = FALSE;
971  if (rep_char_it.empty ()) {
972  next_rep_char_word_right = MAX_INT32;
973  }
974  else {
975  rep_char_it.forward ();
976  next_rep_char_word_right =
977  rep_char_it.data ()->bounding_box ().right ();
978  }
979  }
980 
981  peek_at_next_gap(row,
982  box_it,
983  next_blob_box,
984  next_gap,
985  next_within_xht_gap);
986  do {
987  bblob = box_it.data ();
988  blob_box = bblob->bounding_box ();
989  if (bblob->joined_to_prev ()) {
990  if (bblob->cblob () != NULL) {
991  cout_it.set_to_list (cblob_it.data ()->out_list ());
992  cout_it.move_to_last ();
993  cout_it.add_list_after (bblob->cblob ()->out_list ());
994  delete bblob->cblob ();
995  }
996  } else {
997  if (bblob->cblob() != NULL)
998  cblob_it.add_after_then_move (bblob->cblob ());
999  prev_x = blob_box.right ();
1000  }
1001  box_it.forward (); //next one
1002  bblob = box_it.data ();
1003  blob_box = bblob->bounding_box ();
1004 
1005  if (!bblob->joined_to_prev() && bblob->cblob() != NULL) {
1006  /* Real Blob - not multiple outlines or pre-chopped */
1007  prev_gap = current_gap;
1008  prev_within_xht_gap = current_within_xht_gap;
1009  prev_blob_box = next_blob_box;
1010  current_gap = next_gap;
1011  current_within_xht_gap = next_within_xht_gap;
1012  peek_at_next_gap(row,
1013  box_it,
1014  next_blob_box,
1015  next_gap,
1016  next_within_xht_gap);
1017 
1018  inT16 prev_gap_arg = prev_gap;
1019  inT16 next_gap_arg = next_gap;
1020  if (tosp_only_use_xht_gaps) {
1021  prev_gap_arg = prev_within_xht_gap;
1022  next_gap_arg = next_within_xht_gap;
1023  }
1024  // Decide if a word-break should be inserted
1025  if (blob_box.left () > next_rep_char_word_right ||
1026  make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1027  current_gap, current_within_xht_gap,
1028  next_blob_box, next_gap_arg,
1029  blanks, fuzzy_sp, fuzzy_non,
1030  prev_gap_was_a_space,
1031  break_at_next_gap) ||
1032  box_it.at_first()) {
1033  /* Form a new word out of the blobs collected */
1034  word = new WERD (&cblobs, prev_blanks, NULL);
1035  word_count++;
1036  word_it.add_after_then_move (word);
1037  if (bol) {
1038  word->set_flag (W_BOL, TRUE);
1039  bol = FALSE;
1040  }
1041  if (prev_fuzzy_sp)
1042  //probably space
1043  word->set_flag (W_FUZZY_SP, TRUE);
1044  else if (prev_fuzzy_non)
1045  word->set_flag (W_FUZZY_NON, TRUE);
1046  //probably not
1047 
1048  if (blob_box.left () > next_rep_char_word_right) {
1049  /* We need to insert a repeated char word */
1050  word = rep_char_it.extract ();
1051  word_it.add_after_then_move (word);
1052 
1053  /* Set spaces before repeated char word */
1054  repetition_spacing = find_mean_blob_spacing (word);
1055  current_gap = word->bounding_box ().left () - prev_x;
1056  current_within_xht_gap = current_gap;
1057  if (current_gap > tosp_rep_space * repetition_spacing) {
1058  blanks =
1059  (uinT8) floor (current_gap / row->space_size);
1060  if (blanks < 1)
1061  blanks = 1;
1062  }
1063  else
1064  blanks = 0;
1065  if (tosp_debug_level > 5)
1066  tprintf
1067  ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1068  word->bounding_box ().left (),
1069  word->bounding_box ().bottom (),
1070  repetition_spacing, current_gap, blanks);
1071  word->set_blanks (blanks);
1072  //NO uncertainty
1073  word->set_flag (W_FUZZY_SP, FALSE);
1074  word->set_flag (W_FUZZY_NON, FALSE);
1075 
1076  /* Set spaces after repeated char word (and leave current word set) */
1077  current_gap =
1078  blob_box.left () - next_rep_char_word_right;
1079  if (current_gap > tosp_rep_space * repetition_spacing) {
1080  blanks = (uinT8) (current_gap / row->space_size);
1081  if (blanks < 1)
1082  blanks = 1;
1083  }
1084  else
1085  blanks = 0;
1086  if (tosp_debug_level > 5)
1087  tprintf (" Rgap:%d (%d blanks)\n",
1088  current_gap, blanks);
1089  fuzzy_sp = FALSE;
1090  fuzzy_non = FALSE;
1091 
1092  if (rep_char_it.empty ()) {
1093  next_rep_char_word_right = MAX_INT32;
1094  }
1095  else {
1096  rep_char_it.forward ();
1097  next_rep_char_word_right =
1098  rep_char_it.data ()->bounding_box ().right ();
1099  }
1100  }
1101 
1102  if (box_it.at_first () && rep_char_it.empty ()) {
1103  //at end of line
1104  word->set_flag (W_EOL, TRUE);
1105  xstarts[1] = prev_x;
1106  }
1107  else {
1108  prev_blanks = blanks;
1109  prev_fuzzy_sp = fuzzy_sp;
1110  prev_fuzzy_non = fuzzy_non;
1111  }
1112  }
1113  }
1114  }
1115  while (!box_it.at_first ()); //until back at start
1116 
1117  /* Insert any further repeated char words */
1118  while (!rep_char_it.empty ()) {
1119  word = rep_char_it.extract ();
1120  word_it.add_after_then_move (word);
1121 
1122  /* Set spaces before repeated char word */
1123  repetition_spacing = find_mean_blob_spacing (word);
1124  current_gap = word->bounding_box ().left () - prev_x;
1125  if (current_gap > tosp_rep_space * repetition_spacing) {
1126  blanks = (uinT8) floor (current_gap / row->space_size);
1127  if (blanks < 1)
1128  blanks = 1;
1129  }
1130  else
1131  blanks = 0;
1132  if (tosp_debug_level > 5)
1133  tprintf
1134  ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
1135  word->bounding_box ().left (), word->bounding_box ().bottom (),
1136  repetition_spacing, current_gap, blanks);
1137  word->set_blanks (blanks);
1138  //NO uncertainty
1139  word->set_flag (W_FUZZY_SP, FALSE);
1140  word->set_flag (W_FUZZY_NON, FALSE);
1141  prev_x = word->bounding_box ().right ();
1142  if (rep_char_it.empty ()) {
1143  //at end of line
1144  word->set_flag (W_EOL, TRUE);
1145  xstarts[1] = prev_x;
1146  }
1147  else {
1148  rep_char_it.forward ();
1149  }
1150  }
1151  real_row = new ROW (row,
1152  (inT16) row->kern_size, (inT16) row->space_size);
1153  word_it.set_to_list (real_row->word_list ());
1154  //put words in row
1155  word_it.add_list_after (&words);
1156  real_row->recalc_bounding_box ();
1157 
1158  if (tosp_debug_level > 4) {
1159  tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1160  word_count,
1161  real_row->bounding_box ().left (),
1162  real_row->bounding_box ().bottom (),
1163  real_row->bounding_box ().right (),
1164  real_row->bounding_box ().top ());
1165  }
1166  return real_row;
1167  }
1168  return NULL;
1169 }
float kern_size
Definition: blobbox.h:662
bool tosp_only_use_xht_gaps
Definition: textord.h:295
bool joined_to_prev() const
Definition: blobbox.h:241
#define tprintf(...)
Definition: tprintf.h:31
unsigned char BOOL8
Definition: host.h:113
TBOX bounding_box() const
Definition: werd.cpp:160
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
Definition: ocrrow.h:32
Definition: werd.h:35
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Definition: werd.h:36
inT16 left() const
Definition: rect.h:68
C_BLOB * cblob() const
Definition: blobbox.h:253
double tosp_rep_space
Definition: textord.h:341
TBOX bounding_box() const
Definition: ocrrow.h:85
WERD_LIST rep_words
Definition: blobbox.h:664
#define MAX_INT32
Definition: host.h:120
Definition: werd.h:60
inT16 bottom() const
Definition: rect.h:61
float space_size
Definition: blobbox.h:663
#define FALSE
Definition: capi.h:29
Definition: rect.h:30
#define TRUE
Definition: capi.h:28
#define MAX_INT16
Definition: host.h:119
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
void set_blanks(uinT8 new_blanks)
Definition: werd.h:107
WERD_LIST * word_list()
Definition: ocrrow.h:52
void recalc_bounding_box()
Definition: ocrrow.cpp:101
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
unsigned char uinT8
Definition: host.h:99
void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
BOOL8  testing_on 
)

Definition at line 2034 of file makerow.cpp.

2036  {
2037 #ifndef GRAPHICS_DISABLED
2038  ScrollView::Color colour; //of row
2039 #endif
2040  TO_ROW_IT row_it = block->get_rows ();
2041 
2042  row_it.move_to_first ();
2043  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2044  if (row_it.data ()->blob_list ()->empty ())
2045  delete row_it.extract (); //nothing in it
2046  else
2047  make_baseline_spline (row_it.data (), block);
2048  }
2049  if (textord_old_baselines) {
2050 #ifndef GRAPHICS_DISABLED
2051  if (testing_on) {
2052  colour = ScrollView::RED;
2053  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2054  row_it.forward ()) {
2055  row_it.data ()->baseline.plot (to_win, colour);
2056  colour = (ScrollView::Color) (colour + 1);
2057  if (colour > ScrollView::MAGENTA)
2058  colour = ScrollView::RED;
2059  }
2060  }
2061 #endif
2062  make_old_baselines(block, testing_on, gradient);
2063  }
2064 #ifndef GRAPHICS_DISABLED
2065  if (testing_on) {
2066  colour = ScrollView::RED;
2067  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2068  row_it.data ()->baseline.plot (to_win, colour);
2069  colour = (ScrollView::Color) (colour + 1);
2070  if (colour > ScrollView::MAGENTA)
2071  colour = ScrollView::RED;
2072  }
2073  }
2074 #endif
2075 }
bool textord_old_baselines
Definition: makerow.cpp:53
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2087
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 95 of file textord.h.

95  {
96  use_cjk_fp_model_ = flag;
97  }
void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 268 of file textord.cpp.

272  {
273  page_tr_.set_x(width);
274  page_tr_.set_y(height);
275  if (to_blocks->empty()) {
276  // AutoPageSeg was not used, so we need to find_components first.
277  find_components(binary_pix, blocks, to_blocks);
278  TO_BLOCK_IT it(to_blocks);
279  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
280  TO_BLOCK* to_block = it.data();
281  // Compute the edge offsets whether or not there is a grey_pix.
282  // We have by-passed auto page seg, so we have to run it here.
283  // By page segmentation mode there is no non-text to avoid running on.
284  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
285  }
286  } else if (!PSM_SPARSE(pageseg_mode)) {
287  // AutoPageSeg does not need to find_components as it did that already.
288  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
289  filter_blobs(page_tr_, to_blocks, true);
290  }
291 
292  ASSERT_HOST(!to_blocks->empty());
293  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
294  const FCOORD anticlockwise90(0.0f, 1.0f);
295  const FCOORD clockwise90(0.0f, -1.0f);
296  TO_BLOCK_IT it(to_blocks);
297  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
298  TO_BLOCK* to_block = it.data();
299  BLOCK* block = to_block->block;
300  // Create a fake poly_block in block from its bounding box.
301  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
303  // Rotate the to_block along with its contained block and blobnbox lists.
304  to_block->rotate(anticlockwise90);
305  // Set the block's rotation values to obey the convention followed in
306  // layout analysis for vertical text.
307  block->set_re_rotation(clockwise90);
308  block->set_classify_rotation(clockwise90);
309  }
310  }
311 
312  TO_BLOCK_IT to_block_it(to_blocks);
313  TO_BLOCK* to_block = to_block_it.data();
314  // Make the rows in the block.
315  float gradient;
316  // Do it the old fashioned way.
317  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
318  gradient = make_rows(page_tr_, to_blocks);
319  } else if (!PSM_SPARSE(pageseg_mode)) {
320  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
321  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
322  to_block, to_blocks);
323  }
324  BaselineDetect baseline_detector(textord_baseline_debug,
325  reskew, to_blocks);
326  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
327  baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true,
330  this);
331  // Now make the words in the lines.
332  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
333  // SINGLE_LINE uses the old word maker on the single line.
334  make_words(this, page_tr_, gradient, blocks, to_blocks);
335  } else {
336  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
337  // single word, and in SINGLE_CHAR mode, all the outlines
338  // go in a single blob.
339  TO_BLOCK* to_block = to_block_it.data();
340  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
341  to_block->get_rows(), to_block->block->row_list());
342  }
343  // Remove empties.
344  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
345  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
346  // Compute the margins for each row in the block, to be used later for
347  // paragraph detection.
348  BLOCK_IT b_it(blocks);
349  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
350  b_it.data()->compute_row_margins();
351  }
352 #ifndef GRAPHICS_DISABLED
353  close_to_win();
354 #endif
355 }
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
bool textord_heavy_nr
Definition: makerow.cpp:44
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:60
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
int textord_baseline_debug
Definition: textord.h:377
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:141
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
Treat the image as a single character.
Definition: publictypes.h:164
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1049
Definition: ocrblock.h:30
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:197
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
void rotate(const FCOORD &rotation)
Definition: blobbox.h:706
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
void close_to_win()
Definition: drawtord.cpp:56
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:239
bool textord_show_final_rows
Definition: makerow.cpp:48
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:208
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: points.h:189
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:147
BLOCK * block
Definition: blobbox.h:773
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:104
void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 33 of file tospace.cpp.

36  {
37  TO_BLOCK_IT block_it; //iterator
38  TO_BLOCK *block; //current block;
39  TO_ROW_IT row_it; //row iterator
40  TO_ROW *row; //current row
41  int block_index; //block number
42  int row_index; //row number
43  //estimated width of real spaces for whole block
44  inT16 block_space_gap_width;
45  //estimated width of non space gaps for whole block
46  inT16 block_non_space_gap_width;
47  BOOL8 old_text_ord_proportional;//old fixed/prop result
48  GAPMAP *gapmap = NULL; //map of big vert gaps in blk
49 
50  block_it.set_to_list (blocks);
51  block_index = 1;
52  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
53  block_it.forward ()) {
54  block = block_it.data ();
55  gapmap = new GAPMAP (block);
56  block_spacing_stats(block,
57  gapmap,
58  old_text_ord_proportional,
59  block_space_gap_width,
60  block_non_space_gap_width);
61  // Make sure relative values of block-level space and non-space gap
62  // widths are reasonable. The ratio of 1:3 is also used in
63  // block_spacing_stats, to corrrect the block_space_gap_width
64  // Useful for arabic and hindi, when the non-space gap width is
65  // often over-estimated and should not be trusted. A similar ratio
66  // is found in block_spacing_stats.
68  (float) block_space_gap_width / block_non_space_gap_width < 3.0) {
69  block_non_space_gap_width = (inT16) floor (block_space_gap_width / 3.0);
70  }
71  row_it.set_to_list (block->get_rows ());
72  row_index = 1;
73  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
74  row = row_it.data ();
75  if ((row->pitch_decision == PITCH_DEF_PROP) ||
76  (row->pitch_decision == PITCH_CORR_PROP)) {
77  if ((tosp_debug_level > 0) && !old_text_ord_proportional)
78  tprintf ("Block %d Row %d: Now Proportional\n",
79  block_index, row_index);
80  row_spacing_stats(row,
81  gapmap,
82  block_index,
83  row_index,
84  block_space_gap_width,
85  block_non_space_gap_width);
86  }
87  else {
88  if ((tosp_debug_level > 0) && old_text_ord_proportional)
89  tprintf
90  ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
91  block_index, row_index, row->pitch_decision,
92  row->fixed_pitch);
93  }
94 #ifndef GRAPHICS_DISABLED
97 #endif
98  row_index++;
99  }
100  delete gapmap;
101  block_index++;
102  }
103 }
#define tprintf(...)
Definition: tprintf.h:31
bool tosp_old_to_method
Definition: textord.h:263
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
unsigned char BOOL8
Definition: host.h:113
float fixed_pitch
Definition: blobbox.h:647
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
Definition: gap_map.h:6
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
Definition: drawtord.cpp:250
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
#define NULL
Definition: host.h:144
short inT16
Definition: host.h:100
bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 92 of file textord.h.

92  {
93  return use_cjk_fp_model_;
94  }

Member Data Documentation

int tesseract::Textord::textord_baseline_debug = 0

"Baseline debug level"

Definition at line 377 of file textord.h.

double tesseract::Textord::textord_blob_size_bigile = 95

"Percentile for large blobs"

Definition at line 378 of file textord.h.

double tesseract::Textord::textord_blob_size_smallile = 20

"Percentile for small blobs"

Definition at line 381 of file textord.h.

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 398 of file textord.h.

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 399 of file textord.h.

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 383 of file textord.h.

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 382 of file textord.h.

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 376 of file textord.h.

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 373 of file textord.h.

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 380 of file textord.h.

bool tesseract::Textord::textord_noise_debug = FALSE

"Debug row garbage detector"

Definition at line 397 of file textord.h.

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 394 of file textord.h.

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 387 of file textord.h.

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 389 of file textord.h.

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 388 of file textord.h.

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 396 of file textord.h.

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 384 of file textord.h.

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 385 of file textord.h.

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 395 of file textord.h.

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 392 of file textord.h.

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 390 of file textord.h.

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 386 of file textord.h.

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 374 of file textord.h.

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 375 of file textord.h.

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 261 of file textord.h.

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 287 of file textord.h.

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 277 of file textord.h.

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 302 of file textord.h.

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 365 of file textord.h.

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 343 of file textord.h.

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 304 of file textord.h.

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 308 of file textord.h.

double tesseract::Textord::tosp_flip_caution = 0.0

"Dont autoflip kn to sp when large separation"

Definition at line 361 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 298 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 299 of file textord.h.

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 271 of file textord.h.

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 350 of file textord.h.

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Dont restrict kn->sp fuzzy limit to tables"

Definition at line 289 of file textord.h.

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 351 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 327 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 329 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 331 of file textord.h.

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 332 of file textord.h.

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 339 of file textord.h.

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 340 of file textord.h.

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 301 of file textord.h.

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 355 of file textord.h.

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 357 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 334 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 336 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 338 of file textord.h.

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 363 of file textord.h.

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 359 of file textord.h.

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Dont trust spaces less than this time kn"

Definition at line 353 of file textord.h.

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 322 of file textord.h.

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 281 of file textord.h.

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 320 of file textord.h.

double tesseract::Textord::tosp_near_lh_edge = 0

"Dont reduce box if the top left is non blank"

Definition at line 367 of file textord.h.

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 314 of file textord.h.

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 275 of file textord.h.

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 266 of file textord.h.

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 263 of file textord.h.

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 286 of file textord.h.

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 268 of file textord.h.

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 295 of file textord.h.

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 371 of file textord.h.

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 285 of file textord.h.

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 306 of file textord.h.

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 341 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 279 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 283 of file textord.h.

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Dont chng kn to space next to punct"

Definition at line 297 of file textord.h.

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 311 of file textord.h.

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 310 of file textord.h.

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Dont let sp minus kn get too small"

Definition at line 369 of file textord.h.

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 291 of file textord.h.

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 349 of file textord.h.

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 345 of file textord.h.

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 347 of file textord.h.

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 316 of file textord.h.

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 318 of file textord.h.

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 273 of file textord.h.

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 293 of file textord.h.

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 325 of file textord.h.

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 323 of file textord.h.


The documentation for this class was generated from the following files: