22# include "config_auto.h"
37 , use_cjk_fp_model_(false)
40 BOOL_MEMBER(textord_single_height_mode, false,
"Script has no xheight, so use a single mode",
44 BOOL_MEMBER(tosp_old_to_method, false,
"Space stats use prechopping?", ccstruct_->params())
46 "Constrain relative values of inter and intra-word gaps for "
49 ,
BOOL_MEMBER(tosp_only_use_prop_rows, true,
"Block stats to use fixed pitch rows?",
52 "Force word breaks on punct to break long lines in non-space "
55 ,
BOOL_MEMBER(tosp_use_pre_chopping, false,
"Space stats use prechopping?", ccstruct_->params())
56 ,
BOOL_MEMBER(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code", ccstruct_->params())
57 ,
BOOL_MEMBER(tosp_block_use_cert_spaces, true,
"Only stat OBVIOUS spaces", ccstruct_->params())
58 ,
BOOL_MEMBER(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces", ccstruct_->params())
59 ,
BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
"Only stat OBVIOUS spaces", ccstruct_->params())
60 ,
BOOL_MEMBER(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces", ccstruct_->params())
61 ,
BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
62 "Use row alone when inadequate cert spaces", ccstruct_->params())
63 ,
BOOL_MEMBER(tosp_only_small_gaps_for_kern, false,
"Better guess", ccstruct_->params())
64 ,
BOOL_MEMBER(tosp_all_flips_fuzzy, false,
"Pass ANY flip to context?", ccstruct_->params())
65 ,
BOOL_MEMBER(tosp_fuzzy_limit_all, true,
"Don't restrict kn->sp fuzzy limit to tables",
67 ,
BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
"Use within xht gap for wd breaks",
69 ,
BOOL_MEMBER(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks", ccstruct_->params())
70 ,
BOOL_MEMBER(tosp_only_use_xht_gaps, false,
"Only use within xht gap for wd breaks",
72 ,
BOOL_MEMBER(tosp_rule_9_test_punct, false,
"Don't chng kn to space next to punct",
74 ,
BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true,
"Default flip", ccstruct_->params())
75 ,
BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true,
"Default flip", ccstruct_->params())
76 ,
BOOL_MEMBER(tosp_improve_thresh, false,
"Enable improvement heuristic", ccstruct_->params())
77 ,
INT_MEMBER(tosp_debug_level, 0,
"Debug data", ccstruct_->params())
78 ,
INT_MEMBER(tosp_enough_space_samples_for_median, 3,
"or should we use mean",
80 ,
INT_MEMBER(tosp_redo_kern_limit, 10,
"No.samples reqd to reestimate for row",
82 ,
INT_MEMBER(tosp_few_samples, 40,
"No.gaps reqd with 1 large gap to treat as a table",
84 ,
INT_MEMBER(tosp_short_row, 20,
"No.gaps reqd with few cert spaces to use certs",
86 ,
INT_MEMBER(tosp_sanity_method, 1,
"How to avoid being silly", ccstruct_->params())
88 "Factor for defining space threshold in terms of space and "
91 ,
double_MEMBER(tosp_threshold_bias1, 0,
"how far between kern and space?", ccstruct_->params())
92 ,
double_MEMBER(tosp_threshold_bias2, 0,
"how far between kern and space?", ccstruct_->params())
93 ,
double_MEMBER(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow", ccstruct_->params())
94 ,
double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
"narrow if w/h less than this",
96 ,
double_MEMBER(tosp_wide_fraction, 0.52,
"Fract of xheight for wide", ccstruct_->params())
97 ,
double_MEMBER(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this", ccstruct_->params())
98 ,
double_MEMBER(tosp_fuzzy_space_factor, 0.6,
"Fract of xheight for fuzz sp",
100 ,
double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
"Fract of xheight for fuzz sp",
102 ,
double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
"Fract of xheight for fuzz sp",
104 ,
double_MEMBER(tosp_gap_factor, 0.83,
"gap ratio to flip sp->kern", ccstruct_->params())
105 ,
double_MEMBER(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp", ccstruct_->params())
106 ,
double_MEMBER(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp", ccstruct_->params())
107 ,
double_MEMBER(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp", ccstruct_->params())
108 ,
double_MEMBER(tosp_ignore_big_gaps, -1,
"xht multiplier", ccstruct_->params())
109 ,
double_MEMBER(tosp_ignore_very_big_gaps, 3.5,
"xht multiplier", ccstruct_->params())
110 ,
double_MEMBER(tosp_rep_space, 1.6,
"rep gap multiplier for space", ccstruct_->params())
111 ,
double_MEMBER(tosp_enough_small_gaps, 0.65,
"Fract of kerns reqd for isolated row stats",
113 ,
double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
"Min difference of kn & sp in table",
115 ,
double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
"Expect spaces bigger than this",
117 ,
double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
"Fuzzy if less than this",
119 ,
double_MEMBER(tosp_fuzzy_kn_fraction, 0.5,
"New fuzzy kn alg", ccstruct_->params())
120 ,
double_MEMBER(tosp_fuzzy_sp_fraction, 0.5,
"New fuzzy sp alg", ccstruct_->params())
121 ,
double_MEMBER(tosp_min_sane_kn_sp, 1.5,
"Don't trust spaces less than this time kn",
123 ,
double_MEMBER(tosp_init_guess_kn_mult, 2.2,
"Thresh guess - mult kn by this",
125 ,
double_MEMBER(tosp_init_guess_xht_mult, 0.28,
"Thresh guess - mult xht by this",
127 ,
double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
"Multiplier on kn to limit thresh",
129 ,
double_MEMBER(tosp_flip_caution, 0.0,
"Don't autoflip kn to sp when large separation",
131 ,
double_MEMBER(tosp_large_kerning, 0.19,
"Limit use of xht gap with large kns",
133 ,
double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
"Limit use of xht gap with odd small kns",
135 ,
double_MEMBER(tosp_near_lh_edge, 0,
"Don't reduce box if the top left is non blank",
137 ,
double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
"Don't let sp minus kn get too small",
139 ,
double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
"How wide fuzzies need context",
143 BOOL_MEMBER(textord_no_rejects, false,
"Don't remove noise blobs", ccstruct_->params())
144 ,
BOOL_MEMBER(textord_show_blobs, false,
"Display unsorted blobs", ccstruct_->params())
145 ,
BOOL_MEMBER(textord_show_boxes, false,
"Display unsorted blobs", ccstruct_->params())
146 ,
INT_MEMBER(textord_max_noise_size, 7,
"Pixel size of noise", ccstruct_->params())
147 ,
INT_MEMBER(textord_baseline_debug, 0,
"Baseline debug level", ccstruct_->params())
148 ,
double_MEMBER(textord_noise_area_ratio, 0.7,
"Fraction of bounding box for noise",
150 ,
double_MEMBER(textord_initialx_ile, 0.75,
"Ile of sizes for xheight guess",
152 ,
double_MEMBER(textord_initialasc_ile, 0.90,
"Ile of sizes for xheight guess",
154 ,
INT_MEMBER(textord_noise_sizefraction, 10,
"Fraction of size for maxima", ccstruct_->params())
155 ,
double_MEMBER(textord_noise_sizelimit, 0.5,
"Fraction of x for big t count",
157 ,
INT_MEMBER(textord_noise_translimit, 16,
"Transitions for normal blob", ccstruct_->params())
158 ,
double_MEMBER(textord_noise_normratio, 2.0,
"Dot to norm ratio for deletion",
160 ,
BOOL_MEMBER(textord_noise_rejwords, true,
"Reject noise-like words", ccstruct_->params())
161 ,
BOOL_MEMBER(textord_noise_rejrows, true,
"Reject noise-like rows", ccstruct_->params())
162 ,
double_MEMBER(textord_noise_syfract, 0.2,
"xh fract height error for norm blobs",
164 ,
double_MEMBER(textord_noise_sxfract, 0.4,
"xh fract width error for norm blobs",
167 "Height fraction to discard outlines as speckle noise", ccstruct_->params())
168 ,
INT_MEMBER(textord_noise_sncount, 1,
"super norm blobs to save row", ccstruct_->params())
169 ,
double_MEMBER(textord_noise_rowratio, 6.0,
"Dot to norm ratio for deletion",
171 ,
BOOL_MEMBER(textord_noise_debug, false,
"Debug row garbage detector", ccstruct_->params())
172 ,
double_MEMBER(textord_blshift_maxshift, 0.00,
"Max baseline shift", ccstruct_->params())
173 ,
double_MEMBER(textord_blshift_xfraction, 9.99,
"Min size of baseline shift",
174 ccstruct_->params()) {}
178 Image binary_pix,
Image thresholds_pix,
Image grey_pix,
bool use_box_bottoms,
179 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
180 TO_BLOCK_LIST *to_blocks) {
181 page_tr_.
set_x(width);
182 page_tr_.
set_y(height);
183 if (to_blocks->empty()) {
186 TO_BLOCK_IT it(to_blocks);
187 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
202 const FCOORD anticlockwise90(0.0f, 1.0f);
203 const FCOORD clockwise90(0.0f, -1.0f);
204 TO_BLOCK_IT it(to_blocks);
205 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
211 to_block->
rotate(anticlockwise90);
219 TO_BLOCK_IT to_block_it(to_blocks);
220 TO_BLOCK *to_block = to_block_it.data();
225 gradient =
make_rows(page_tr_, to_blocks);
232 BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
239 make_words(
this, page_tr_, gradient, blocks, to_blocks);
244 TO_BLOCK *to_block = to_block_it.data();
250 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
253 BLOCK_IT b_it(blocks);
254 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
255 b_it.data()->compute_row_margins();
257#ifndef GRAPHICS_DISABLED
270 float row_total_conf = 0.0f;
271 int row_word_count = 0;
273 float best_conf = 0.0f;
279 row_total_conf /= row_word_count;
280 if (best_row ==
nullptr || best_conf < row_total_conf) {
282 best_conf = row_total_conf;
284 row_total_conf = 0.0f;
290 if (it.
row() != best_row) {
#define INT_MEMBER(name, val, comment, vec)
#define double_MEMBER(name, val, comment, vec)
#define BOOL_MEMBER(name, val, comment, vec)
bool textord_show_final_rows
@ PSM_SINGLE_BLOCK_VERT_TEXT
@ PSM_SINGLE_CHAR
Treat the image as a single character.
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
bool PSM_SPARSE(int pageseg_mode)
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
void rotate(const FCOORD &rotation)
void ComputeEdgeOffsets(Image thresholds, Image grey)
PDBLK pdblk
Page Description Block.
ROW_LIST * row_list()
get rows
void set_classify_rotation(const FCOORD &rotation)
void set_re_rotation(const FCOORD &rotation)
WERD_CHOICE * best_choice
WERD_RES * restart_page()
ROW_RES * next_row() const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void set_x(TDimension xin)
rewrite function
void set_y(TDimension yin)
rewrite function
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Textord(CCStruct *ccstruct)