21# include "config_auto.h"
44#define BLOCK_STATS_CLUSTERS 10
54 TO_ROW_IT to_row_it(rows);
55 ROW_IT row_it(real_rows);
56 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); to_row_it.forward()) {
57 TO_ROW *row = to_row_it.data();
61 C_BLOB_IT cblob_it(&cblobs);
63 for (; !box_it.empty(); box_it.forward()) {
67 if (cblob !=
nullptr) {
68 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
69 cout_it.move_to_last();
70 cout_it.add_list_after(cblob->out_list());
75 if (cblob !=
nullptr) {
76 cblob_it.add_after_then_move(cblob);
85 WERD *word =
new WERD(&cblobs, 0,
nullptr);
89 word_it.add_after_then_move(word);
90 row_it.add_after_then_move(real_row);
103 TO_BLOCK_LIST *port_blocks) {
104 TO_BLOCK_IT block_it;
114 block_it.set_to_list(port_blocks);
115 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
116 block = block_it.data();
134 TO_ROW_IT row_it = block->
get_rows();
136 if (row_it.empty()) {
139 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
154#ifndef GRAPHICS_DISABLED
178 int32_t cluster_count;
180 int32_t smooth_factor;
188 STATS gap_stats(0, maxwidth - 1);
189 STATS cluster_stats[4];
198 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
199 blob = blob_it.data();
207 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
208 blob = blob_it.data();
211 if (prev_valid && blob_box.
left() - prev_x < maxwidth) {
212 gap_stats.
add(blob_box.
left() - prev_x, 1);
215 prev_x = blob_box.
right();
223 gap_stats.
smooth(smooth_factor);
227 while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) {
229 upper = (upper * 3 + lower) / 4;
230 lower = (lower * 3 + upper) / 4;
233 if (cluster_count < 2) {
238 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
239 gaps[gap_index] = cluster_stats[gap_index + 1].
ile(0.5);
242 if (cluster_count > 2) {
245 cluster_stats[1].
ile(0.5), cluster_stats[2].
ile(0.5), cluster_stats[3].
ile(0.5));
248 if (gaps[1] > lower) {
259 tprintf(
"Had to switch most common from lower to upper!!\n");
268 if (gaps[1] < gaps[0]) {
270 tprintf(
"Had to switch most common from lower to upper!!\n");
289 tprintf(
"Lower=%g, upper=%g, Stats:\n", lower, upper);
334 int32_t cluster_count;
337 int32_t smooth_factor;
344 STATS gap_stats(0, maxwidth - 1);
356 const bool testing_row =
false;
358 min_width =
static_cast<int32_t
>(block->
pr_space);
360 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
361 blob = blob_it.data();
364 this_valid = blob_box.
width() >= min_width;
365 if (this_valid && prev_valid && blob_box.
left() - prev_x < maxwidth) {
366 gap_stats.
add(blob_box.
left() - prev_x, 1);
369 prev_x = blob_box.
right();
370 prev_valid = this_valid;
377 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
378 blob = blob_it.data();
381 if (blob_box.
left() - prev_x < maxwidth) {
382 gap_stats.
add(blob_box.
left() - prev_x, 1);
384 prev_x = blob_box.
right();
397 gap_stats.
smooth(smooth_factor);
399 prev_count = cluster_count;
403 if (cluster_count < 1) {
408 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
409 gaps[gap_index] = cluster_stats[gap_index + 1].
ile(0.5);
413 tprintf(
"cluster_count=%d:", cluster_count);
414 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
415 tprintf(
" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
421 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] > block->
max_nonspace;
425 if (gap_index < cluster_count) {
426 lower = gaps[gap_index];
429 tprintf(
"No cluster below block threshold!, using default=%g\n", block->
pr_nonsp);
433 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] <= block->
max_nonspace;
437 if (gap_index < cluster_count) {
438 upper = gaps[gap_index];
441 tprintf(
"No cluster above block threshold!, using default=%g\n", block->
pr_space);
478 TO_ROW_IT row_it = block->
get_rows();
479 ROW *real_row =
nullptr;
482 if (row_it.empty()) {
485 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
508 if (real_row !=
nullptr) {
510 real_row_it.add_after_then_move(real_row);
535 if (word_it.empty()) {
538 word_box = word_it.data()->bounding_box();
539 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
540 word_box += word_it.data()->bounding_box();
545 word_it.set_to_list(real_row->
word_list());
564 C_OUTLINE_IT cout_it;
566 C_BLOB_IT cblob_it = &cblobs;
568 for (
int blobindex = 0; blobindex < blobcount; blobindex++) {
569 auto bblob = box_it->extract();
570 if (bblob->joined_to_prev()) {
571 auto cblob = bblob->remove_cblob();
572 if (cblob !=
nullptr) {
573 cout_it.set_to_list(cblob_it.data()->out_list());
574 cout_it.move_to_last();
575 cout_it.add_list_after(cblob->out_list());
579 auto cblob = bblob->remove_cblob();
580 if (cblob !=
nullptr) {
581 cblob_it.add_after_then_move(cblob);
592 auto word =
new WERD(&cblobs, blanks,
nullptr);
595 word->set_flag(
W_BOL,
true);
597 if (box_it->at_first()) {
598 word->set_flag(
W_EOL,
true);
#define BOOL_VAR(name, val, comment)
#define BLOCK_STATS_CLUSTERS
@ W_DONT_CHOP
fixed pitch chopped
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
double words_initial_upper
double textord_wordstats_smooth_factor
double words_initial_lower
bool textord_force_make_prop_words
void tprintf(const char *format,...)
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
void set_row_spaces(TO_BLOCK *block, FCOORD rotation, bool testing_on)
double textord_words_min_minspace
double textord_words_initial_upper
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
bool textord_chopper_test
bool textord_test_landscape
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
double textord_words_initial_lower
double textord_words_definite_spread
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
bool textord_show_initial_words
int32_t row_words2(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
double textord_spacesize_ratioprop
int32_t row_words(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
double textord_words_minlarge
const TBOX & bounding_box() const
bool joined_to_prev() const
BLOBNBOX_LIST * blob_list()
PITCH_TYPE pitch_decision
void check_pitch()
check proportional
PDBLK pdblk
Page Description Block.
ROW_LIST * row_list()
get rows
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
void recalc_bounding_box()
POLY_BLOCK * poly_block() const
bool contains(const FCOORD pt) const
void print_summary() const
void add(int32_t value, int32_t count)
int32_t get_total() const
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
void smooth(int32_t factor)
double ile(double frac) const
void set_flag(WERD_FLAGS mask, bool value)
bool use_cjk_fp_model() const
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)