21#ifndef TESSERACT_TEXTORD_TEXTORD_H_
22#define TESSERACT_TEXTORD_TEXTORD_H_
46 int height = bounding_box_.
height();
47 bounding_box_.
pad(height, height);
90 void TextordPage(
PageSegMode pageseg_mode,
const FCOORD &reskew,
int width,
int height,
91 Image binary_pix,
Image thresholds_pix,
Image grey_pix,
bool use_box_bottoms,
92 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
99 return use_cjk_fp_model_;
102 use_cjk_fp_model_ = flag;
106 void to_spacing(
ICOORD page_tr,
107 TO_BLOCK_LIST *blocks
116 void find_components(
Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
117 void filter_blobs(
ICOORD page_tr, TO_BLOCK_LIST *blocks,
bool testing_on);
126 bool use_cjk_fp_model_;
130 void MakeRows(
PageSegMode pageseg_mode,
const FCOORD &skew,
int width,
int height,
131 TO_BLOCK_LIST *to_blocks);
133 void MakeBlockRows(
int min_spacing,
int max_spacing,
const FCOORD &skew,
TO_BLOCK *block,
137 void compute_block_xheight(
TO_BLOCK *block,
float gradient);
138 void compute_row_xheight(
TO_ROW *row,
141 int block_line_size);
142 void make_spline_rows(
TO_BLOCK *block,
148 void make_old_baselines(
TO_BLOCK *block,
151 void correlate_lines(
TO_BLOCK *block,
float gradient);
152 void correlate_neighbours(
TO_BLOCK *block,
155 int correlate_with_stats(
TO_ROW **rows,
158 void find_textlines(
TO_BLOCK *block,
164 void block_spacing_stats(
TO_BLOCK *block,
GAPMAP *gapmap,
bool &old_text_ord_proportional,
166 int16_t &block_space_gap_width,
168 int16_t &block_non_space_gap_width);
169 void row_spacing_stats(
TO_ROW *row,
GAPMAP *gapmap, int16_t block_idx, int16_t row_idx,
171 int16_t block_space_gap_width,
173 int16_t block_non_space_gap_width);
174 void old_to_method(
TO_ROW *row,
STATS *all_gap_stats,
STATS *space_gap_stats,
175 STATS *small_gap_stats, int16_t block_space_gap_width,
177 int16_t block_non_space_gap_width);
178 bool isolated_row_stats(
TO_ROW *row,
GAPMAP *gapmap,
STATS *all_gap_stats,
bool suspected_table,
179 int16_t block_idx, int16_t row_idx);
180 int16_t stats_count_under(
STATS *stats, int16_t threshold);
181 void improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats);
182 bool make_a_word_break(
TO_ROW *row,
184 int16_t prev_gap,
TBOX prev_blob_box, int16_t real_current_gap,
185 int16_t within_xht_current_gap,
TBOX next_blob_box, int16_t next_gap,
186 uint8_t &blanks,
bool &fuzzy_sp,
bool &fuzzy_non,
187 bool &prev_gap_was_a_space,
bool &break_at_next_gap);
190 bool suspected_punct_blob(
TO_ROW *row,
TBOX box);
191 void peek_at_next_gap(
TO_ROW *row, BLOBNBOX_IT box_it,
TBOX &next_blob_box, int16_t &next_gap,
192 int16_t &next_within_xht_gap);
193 void mark_gap(
TBOX blob,
195 int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap,
196 int16_t next_blob_width, int16_t next_gap);
197 float find_mean_blob_spacing(
WERD *word);
198 bool ignore_big_gap(
TO_ROW *row, int32_t row_length,
GAPMAP *gapmap, int16_t left, int16_t right);
205 float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list,
206 BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list);
211 void cleanup_nontext_block(
BLOCK *block);
212 void cleanup_blocks(
bool clean_noise, BLOCK_LIST *blocks);
213 bool clean_noise_from_row(
ROW *row);
214 void clean_noise_from_words(
ROW *row);
217 void clean_small_noise_from_words(
ROW *row);
221 void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks);
225 void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
const FCOORD &rotation,
#define CLISTIZEH(CLASSNAME)
TDimension height() const
void pad(int xpad, int ypad)
TBOX true_bounding_box() const
C_BLOB_LIST * rej_cblob_list()
const TBOX & bounding_box() const
C_BLOB_LIST * RejBlobs() const
const WERD * word() const
TBOX true_bounding_box() const
INT_VAR_H(textord_noise_sizefraction)
double_VAR_H(tosp_enough_small_gaps)
double_VAR_H(tosp_pass_wide_fuzz_sp_to_context)
INT_VAR_H(tosp_short_row)
BOOL_VAR_H(textord_single_height_mode)
BOOL_VAR_H(tosp_old_to_bug_fix)
BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp)
double_VAR_H(tosp_fuzzy_sp_fraction)
double_VAR_H(tosp_threshold_bias1)
INT_VAR_H(tosp_sanity_method)
BOOL_VAR_H(tosp_narrow_blobs_not_cert)
double_VAR_H(tosp_max_sane_kn_thresh)
double_VAR_H(tosp_kern_gap_factor2)
bool use_cjk_fp_model() const
double_VAR_H(tosp_table_fuzzy_kn_sp_ratio)
INT_VAR_H(tosp_debug_level)
INT_VAR_H(textord_noise_translimit)
BOOL_VAR_H(tosp_block_use_cert_spaces)
BOOL_VAR_H(textord_noise_rejrows)
double_VAR_H(tosp_dont_fool_with_small_kerns)
BOOL_VAR_H(tosp_fuzzy_limit_all)
BOOL_VAR_H(tosp_row_use_cert_spaces1)
double_VAR_H(tosp_near_lh_edge)
BOOL_VAR_H(tosp_recovery_isolated_row_stats)
double_VAR_H(tosp_wide_aspect_ratio)
BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn)
double_VAR_H(tosp_fuzzy_space_factor)
double_VAR_H(tosp_wide_fraction)
double_VAR_H(tosp_rep_space)
INT_VAR_H(tosp_few_samples)
double_VAR_H(tosp_large_kerning)
double_VAR_H(textord_noise_hfract)
BOOL_VAR_H(textord_no_rejects)
BOOL_VAR_H(textord_show_blobs)
BOOL_VAR_H(tosp_improve_thresh)
double_VAR_H(tosp_threshold_bias2)
BOOL_VAR_H(textord_noise_rejwords)
double_VAR_H(tosp_kern_gap_factor3)
BOOL_VAR_H(textord_noise_debug)
BOOL_VAR_H(tosp_only_use_prop_rows)
double_VAR_H(textord_noise_sxfract)
INT_VAR_H(textord_max_noise_size)
INT_VAR_H(tosp_enough_space_samples_for_median)
double_VAR_H(tosp_ignore_big_gaps)
void set_use_cjk_fp_model(bool flag)
INT_VAR_H(textord_baseline_debug)
BOOL_VAR_H(textord_show_boxes)
double_VAR_H(tosp_table_kn_sp_ratio)
BOOL_VAR_H(tosp_use_pre_chopping)
double_VAR_H(tosp_table_xht_sp_ratio)
BOOL_VAR_H(tosp_old_to_method)
BOOL_VAR_H(tosp_only_use_xht_gaps)
double_VAR_H(tosp_init_guess_kn_mult)
INT_VAR_H(tosp_redo_kern_limit)
double_VAR_H(textord_noise_syfract)
BOOL_VAR_H(tosp_use_xht_gaps)
double_VAR_H(tosp_fuzzy_space_factor1)
BOOL_VAR_H(tosp_stats_use_xht_gaps)
double_VAR_H(tosp_kern_gap_factor1)
double_VAR_H(textord_blshift_maxshift)
BOOL_VAR_H(tosp_only_small_gaps_for_kern)
double_VAR_H(textord_blshift_xfraction)
double_VAR_H(tosp_fuzzy_space_factor2)
double_VAR_H(tosp_min_sane_kn_sp)
double_VAR_H(textord_initialasc_ile)
BOOL_VAR_H(tosp_force_wordbreak_on_punct)
double_VAR_H(tosp_flip_caution)
double_VAR_H(tosp_gap_factor)
BOOL_VAR_H(tosp_row_use_cert_spaces)
double_VAR_H(textord_noise_rowratio)
double_VAR_H(tosp_init_guess_xht_mult)
BOOL_VAR_H(tosp_old_to_constrain_sp_kn)
double_VAR_H(tosp_old_sp_kn_th_factor)
double_VAR_H(tosp_silly_kn_sp_gap)
double_VAR_H(textord_noise_area_ratio)
double_VAR_H(tosp_fuzzy_kn_fraction)
double_VAR_H(tosp_ignore_very_big_gaps)
INT_VAR_H(textord_noise_sncount)
double_VAR_H(tosp_narrow_aspect_ratio)
double_VAR_H(tosp_narrow_fraction)
double_VAR_H(textord_noise_normratio)
double_VAR_H(textord_noise_sizelimit)
BOOL_VAR_H(tosp_rule_9_test_punct)
double_VAR_H(textord_initialx_ile)
BOOL_VAR_H(tosp_all_flips_fuzzy)