21# include "config_auto.h"
41static BOOL_VAR(textord_all_prop,
false,
"All doc is proportial text");
43static BOOL_VAR(textord_disable_pitch_test,
false,
"Turn off dp fixed pitch algorithm");
52#define BLOCK_STATS_CLUSTERS 10
53#define MAX_ALLOWED_PITCH 100
56static int sort_floats(
const void *arg1,
const void *arg2) {
57 float diff = *
reinterpret_cast<const float *
>(arg1) - *
reinterpret_cast<const float *
>(arg2);
60 }
else if (diff < 0) {
76 TO_BLOCK_LIST *port_blocks,
86#ifndef GRAPHICS_DISABLED
94 block_it.set_to_list(port_blocks);
96 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
97 block = block_it.data();
104 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
105 block = block_it.data();
114 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
115 block = block_it.data();
117 if (pb !=
nullptr && !pb->
IsText()) {
121 TO_ROW_IT row_it(block->
get_rows());
123 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
125 fix_row_pitch(row, block, port_blocks, row_index, block_index);
130#ifndef GRAPHICS_DISABLED
146 TO_BLOCK_LIST *blocks,
148 int32_t block_target) {
156 TO_BLOCK_IT block_it = blocks;
163 block_votes = like_votes = other_votes = 0;
169 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
170 block = block_it.data();
172 if (pb !=
nullptr && !pb->
IsText()) {
176 TO_ROW_IT row_it(block->
get_rows());
177 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
187 if (block_index == block_target) {
236 }
else if (block_votes <= textord_words_veto_power && like_votes > 0) {
241 if (block_votes == 0 && like_votes == 0 && other_votes > 0 &&
244 "Warning:row %d of block %d set prop with no like rows against "
246 row_target, block_target);
251 tprintf(
":b_votes=%d:l_votes=%d:o_votes=%d", block_votes, like_votes, other_votes);
256 if (block_votes > 0) {
258 }
else if (block_votes == 0 && like_votes > 0) {
261 tprintf(
"Warning:guessing pitch as xheight on row %d, block %d\n", row_target,
278 sp_sd, mid_cuts, &bad_row->
char_cells,
false);
301 tprintf(
"Block %d at (%d,%d)->(%d,%d)\n", block_index, block_box.
left(), block_box.
bottom(),
302 block_box.
right(), block_box.
top());
314#ifndef GRAPHICS_DISABLED
339 TO_ROW_IT row_it = block->
get_rows();
342 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
373 TO_BLOCK_LIST *port_blocks,
383 TO_BLOCK_IT block_it = port_blocks;
386 int16_t projection_left;
387 int16_t projection_right;
402 block_it.empty() || block_it.data()->get_rows()->empty()) {
405 shift_factor = gradient / (gradient * gradient + 1);
407 TO_ROW_IT row_it(block_it.data()->get_rows());
408 master_x = row_it.data()->projection_left;
409 master_y = row_it.data()->baseline.y(master_x);
410 projection_left = INT16_MAX;
411 projection_right = -INT16_MAX;
416 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
417 block = block_it.data();
418 row_it.set_to_list(block->
get_rows());
419 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
427 row_left =
static_cast<int16_t
>(row->
projection_left - shift_factor * (master_y - row_y));
428 row_right =
static_cast<int16_t
>(row->
projection_right - shift_factor * (master_y - row_y));
429 if (row_left < projection_left) {
430 projection_left = row_left;
432 if (row_right > projection_right) {
433 projection_right = row_right;
440 projection.
set_range(projection_left, projection_right - 1);
442 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
443 block = block_it.data();
444 row_it.set_to_list(block->
get_rows());
445 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
448 row_left =
static_cast<int16_t
>(row->
projection_left - shift_factor * (master_y - row_y));
455 row_it.set_to_list(block_it.data()->get_rows());
457#ifndef GRAPHICS_DISABLED
462 final_pitch = pitches.
ile(0.5);
463 pitch =
static_cast<int16_t
>(final_pitch);
464 pitch_sd =
tune_row_pitch(row, &projection, projection_left, projection_right, pitch * 0.75,
465 final_pitch, sp_sd, mid_cuts, &row->
char_cells,
false);
469 "try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%"
470 "g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
471 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, pitch_sd / total_row_count,
472 pitch_sd / pitch, pitch_sd / total_row_count / pitch);
475#ifndef GRAPHICS_DISABLED
478 ICOORDELT_LIST *master_cells;
480 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
481 block = block_it.data();
482 row_it.set_to_list(block->
get_rows());
483 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
486 row_shift = shift_factor * (master_y - row_y);
522 int32_t def_fixed = 0;
523 int32_t def_prop = 0;
524 int32_t maybe_fixed = 0;
525 int32_t maybe_prop = 0;
527 int32_t corr_fixed = 0;
528 int32_t corr_prop = 0;
530 TO_ROW_IT row_it = block->
get_rows();
533 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
546 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
557 }
else if (def_fixed > 0 || def_prop > 0) {
579 int32_t def_fixed = 0;
580 int32_t def_prop = 0;
581 int32_t maybe_fixed = 0;
582 int32_t maybe_prop = 0;
584 int32_t corr_fixed = 0;
585 int32_t corr_prop = 0;
587 count_block_votes(block, def_fixed, def_prop, maybe_fixed, maybe_prop, corr_fixed, corr_prop,
589 tprintf(
"Block %d has (%d,%d,%d)", block_index, def_fixed, maybe_fixed, corr_fixed);
593 tprintf(
" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
597 tprintf(
" prop, %d dunno\n", dunno);
609 int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed,
610 int32_t &corr_prop, int32_t &dunno) {
612 TO_ROW_IT row_it = block->
get_rows();
614 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
656 int32_t cluster_count;
658 int32_t smooth_factor;
665 STATS gap_stats(0, maxwidth - 1);
670 if (!blob_it.empty()) {
671 prev_x = blob_it.data()->bounding_box().right();
673 while (!blob_it.at_first()) {
674 blob = blob_it.data();
677 if (blob_box.
left() - prev_x < maxwidth) {
678 gap_stats.
add(blob_box.
left() - prev_x, 1);
680 prev_x = blob_box.
right();
691 gap_stats.
smooth(smooth_factor);
693 prev_count = cluster_count;
697 if (cluster_count < 1) {
700 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
701 gaps[gap_index] = cluster_stats[gap_index + 1].
ile(0.5);
705 tprintf(
"cluster_count=%d:", cluster_count);
706 for (gap_index = 0; gap_index < cluster_count; gap_index++) {
707 tprintf(
" %g(%d)", gaps[gap_index], cluster_stats[gap_index + 1].get_total());
711 qsort(gaps, cluster_count,
sizeof(
float), sort_floats);
716 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < lower; gap_index++) {
719 if (gap_index == 0) {
721 tprintf(
"No clusters below nonspace threshold!!\n");
723 if (cluster_count > 1) {
731 row->
pr_nonsp = gaps[gap_index - 1];
732 while (gap_index < cluster_count && gaps[gap_index] < upper) {
735 if (gap_index == cluster_count) {
737 tprintf(
"No clusters above nonspace threshold!!\n");
747 for (gap_index = 0; gap_index < cluster_count && gaps[gap_index] < upper; gap_index++) {
750 if (gap_index == 0) {
752 tprintf(
"No clusters below space threshold!!\n");
757 row->
fp_nonsp = gaps[gap_index - 1];
758 if (gap_index == cluster_count) {
760 tprintf(
"No clusters above space threshold!!\n");
769 "Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, "
803 STATS gap_stats(0, maxwidth - 1);
805 STATS pitch_stats(0, maxwidth - 1);
813 if (non_space > initial_pitch) {
814 non_space = initial_pitch;
816 min_space = (initial_pitch + non_space) / 2;
818 if (!
count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space,
true,
false,
820 dm_gap_iqr = 0.0001f;
821 dm_pitch_iqr = maxwidth * 2.0f;
822 dm_pitch = initial_pitch;
824 dm_gap_iqr = gap_stats.
ile(0.75) - gap_stats.
ile(0.25);
825 dm_pitch_iqr = pitch_stats.
ile(0.75) - pitch_stats.
ile(0.25);
826 dm_pitch = pitch_stats.
ile(0.5);
830 if (!
count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch, min_space,
true,
false, 0)) {
832 pitch_iqr = maxwidth * 3.0f;
834 gap_iqr = gap_stats.
ile(0.75) - gap_stats.
ile(0.25);
835 pitch_iqr = pitch_stats.
ile(0.75) - pitch_stats.
ile(0.25);
838 "First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
840 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile(0.5));
842 initial_pitch = pitch_stats.
ile(0.5);
843 if (min_space > initial_pitch &&
count_pitch_stats(row, &gap_stats, &pitch_stats, initial_pitch,
844 initial_pitch,
true,
false, 0)) {
845 min_space = initial_pitch;
846 gap_iqr = gap_stats.
ile(0.75) - gap_stats.
ile(0.25);
847 pitch_iqr = pitch_stats.
ile(0.75) - pitch_stats.
ile(0.25);
850 "Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, "
852 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile(0.5));
854 initial_pitch = pitch_stats.
ile(0.5);
858 tprintf(
"Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", block_index,
859 row_index,
'X', pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
860 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth
862 : (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ?
'S' :
'M'));
864 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
871 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
874 "Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
876 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
878 gap_iqr = gap_stats.
ile(0.75) - gap_stats.
ile(0.25);
879 pitch_iqr = pitch_stats.
ile(0.75) - pitch_stats.
ile(0.25);
880 pitch = pitch_stats.
ile(0.5);
881 used_dm_model =
false;
885 "Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, "
887 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
889 gap_iqr = dm_gap_iqr;
890 pitch_iqr = dm_pitch_iqr;
892 used_dm_model =
true;
895 tprintf(
"rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", pitch_iqr, gap_iqr, pitch);
896 tprintf(
"p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", pitch_iqr / gap_iqr, pitch_iqr / block->
xheight,
935 const char *res_string;
946 if (textord_all_prop || (pb !=
nullptr && !pb->
IsText())) {
954 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch &&
957 (row->
used_dm_model || sp_sd > 20 || (pitch_sd == 0 && sp_sd > 10))))) {
958 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch && !row->
all_caps &&
966 if (pitch_sd < textord_words_def_prop * row->fixed_pitch) {
994 tprintf(
":sd/p=%g:occ=%g:init_res=%s\n", pitch_sd / row->
fixed_pitch, sp_sd, res_string);
1012 float initial_pitch,
1014 bool ignore_outsize,
1023 int32_t prev_centre;
1026 int32_t width_units;
1032 pitch_stats->
clear();
1033 if (blob_it.empty()) {
1039 joined_box = blob_it.data()->bounding_box();
1042 blob = blob_it.data();
1045 if ((blob_box.
left() - joined_box.
right() < dm_gap && !blob_it.at_first()) ||
1046 blob->
cblob() ==
nullptr) {
1047 joined_box += blob_box;
1049 blob_width = joined_box.
width();
1050 if (split_outsize) {
1052 static_cast<int32_t
>(floor(
static_cast<float>(blob_width) / initial_pitch + 0.5));
1053 if (width_units < 1) {
1057 }
else if (ignore_outsize) {
1058 width =
static_cast<float>(blob_width) / initial_pitch;
1065 x_centre =
static_cast<int32_t
>(joined_box.
left() +
1066 (blob_width - width_units * initial_pitch) / 2);
1067 if (prev_valid && width_units >= 0) {
1076 gap_stats->
add(joined_box.
left() - prev_right, 1);
1077 pitch_stats->
add(x_centre - prev_centre, 1);
1079 prev_centre =
static_cast<int32_t
>(x_centre + width_units * initial_pitch);
1080 prev_right = joined_box.
right();
1081 prev_valid = blob_box.
left() - joined_box.
right() < min_space;
1082 prev_valid = prev_valid && width_units >= 0;
1083 joined_box = blob_box;
1086 }
while (!blob_it.at_first());
1100 int16_t projection_left,
1101 int16_t projection_right,
1103 float &initial_pitch,
1105 int16_t &best_mid_cuts,
1106 ICOORDELT_LIST *best_cells,
1116 ICOORDELT_LIST test_cells;
1117 ICOORDELT_IT best_it;
1120 return tune_row_pitch2(row, projection, projection_left, projection_right, space_size,
1121 initial_pitch, best_sp_sd,
1123 best_mid_cuts, best_cells, testing_on);
1125 if (textord_disable_pitch_test) {
1126 best_sp_sd = initial_pitch;
1127 return initial_pitch;
1129 initial_sd =
compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1130 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on);
1131 best_sd = initial_sd;
1132 best_pitch = initial_pitch;
1134 tprintf(
"tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1138 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1139 initial_pitch + pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1141 tprintf(
"testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, pitch_sd);
1143 if (pitch_sd < best_sd) {
1145 best_mid_cuts = mid_cuts;
1147 best_pitch = initial_pitch + pitch_delta;
1148 best_cells->clear();
1149 best_it.set_to_list(best_cells);
1150 best_it.add_list_after(&test_cells);
1154 if (pitch_sd > initial_sd) {
1160 compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1161 initial_pitch - pitch_delta, sp_sd, mid_cuts, &test_cells, testing_on);
1163 tprintf(
"testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, pitch_sd);
1165 if (pitch_sd < best_sd) {
1167 best_mid_cuts = mid_cuts;
1169 best_pitch = initial_pitch - pitch_delta;
1170 best_cells->clear();
1171 best_it.set_to_list(best_cells);
1172 best_it.add_list_after(&test_cells);
1176 if (pitch_sd > initial_sd) {
1180 initial_pitch = best_pitch;
1183 print_pitch_sd(row, projection, projection_left, projection_right, space_size, best_pitch);
1199 int16_t projection_left,
1200 int16_t projection_right,
1202 float &initial_pitch,
1204 int16_t &best_mid_cuts,
1205 ICOORDELT_LIST *best_cells,
1218 best_sp_sd = initial_pitch;
1220 best_pitch =
static_cast<int>(initial_pitch);
1222 return initial_pitch;
1229 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1232 (pixel - projection_left) % (best_pitch + pitch_delta), projection->
pile_count(pixel));
1239 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1242 best_delta = pitch_delta;
1248 tprintf(
"tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", initial_pitch, best_delta,
1251 best_pitch += best_delta;
1252 initial_pitch = best_pitch;
1254 best_count += best_count;
1255 for (start = best_pixel - 2;
1256 start > best_pixel - best_pitch &&
1261 for (end = best_pixel + 2;
1262 end < best_pixel + best_pitch &&
1268 best_sd =
compute_pitch_sd(row, projection, projection_left, projection_right, space_size,
1269 initial_pitch, best_sp_sd, best_mid_cuts, best_cells, testing_on,
1272 tprintf(
"tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, best_sd);
1276 print_pitch_sd(row, projection, projection_left, projection_right, space_size, initial_pitch);
1292 int16_t projection_left,
1293 int16_t projection_right,
1295 float initial_pitch,
1298 ICOORDELT_LIST *row_cells,
1306 BLOBNBOX_IT start_it;
1307 BLOBNBOX_IT plot_it;
1314 FPSEGPT_LIST seg_list;
1319 ICOORDELT_IT cell_it = row_cells;
1325 int32_t total_count;
1328 word_sync =
compute_pitch_sd2(row, projection, projection_left, projection_right, initial_pitch,
1329 occupation, mid_cuts, row_cells, testing_on, start, end);
1340 if (blob_it.empty()) {
1341 return space_size * 10;
1343#ifndef GRAPHICS_DISABLED
1344 if (testing_on &&
to_win !=
nullptr) {
1345 blob_box = blob_it.data()->bounding_box();
1352 blob_it.mark_cycle_pt();
1354 for (; blob_count > 0; blob_count--) {
1358 prev_box = blob_box;
1361 }
while (!blob_it.cycled_list() && blob_box.
left() - prev_box.
right() < space_size);
1364 word_sync =
check_pitch_sync2(&start_it, blob_count,
static_cast<int16_t
>(initial_pitch), 2,
1365 projection, projection_left, projection_right,
1369 word_sync =
check_pitch_sync(&start_it, blob_count,
static_cast<int16_t
>(initial_pitch), 2,
1370 projection, &seg_list);
1373 tprintf(
"Word ending at (%d,%d), len=%d, sync rating=%g, ", prev_box.
right(), prev_box.
top(),
1374 seg_list.length() - 1, word_sync);
1375 seg_it.set_to_list(&seg_list);
1376 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1377 if (seg_it.data()->faked) {
1380 tprintf(
"%d, ", seg_it.data()->position());
1388#ifndef GRAPHICS_DISABLED
1393 seg_it.set_to_list(&seg_list);
1394 if (prev_right >= 0) {
1395 sp_var = seg_it.data()->position() - prev_right;
1396 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1401 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1402 segpos = seg_it.data()->position();
1403 if (cell_it.empty() || segpos > cellpos + initial_pitch / 2) {
1405 while (!cell_it.empty() && segpos > cellpos + initial_pitch * 3 / 2) {
1406 cell =
new ICOORDELT(cellpos +
static_cast<int16_t
>(initial_pitch), 0);
1407 cell_it.add_after_then_move(cell);
1408 cellpos +=
static_cast<int16_t
>(initial_pitch);
1412 cell_it.add_after_then_move(cell);
1414 }
else if (segpos > cellpos - initial_pitch / 2) {
1415 cell = cell_it.data();
1417 cell->
set_x((cellpos + segpos) / 2);
1418 cellpos = cell->
x();
1421 seg_it.move_to_last();
1422 prev_right = seg_it.data()->position();
1424 scale_factor = (seg_list.length() - 2) / 2;
1425 if (scale_factor < 1) {
1431 sqsum += word_sync * scale_factor;
1432 total_count += (seg_list.length() - 1) * scale_factor;
1434 }
while (!blob_it.cycled_list());
1435 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1436 return total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1449 int16_t projection_left,
1450 int16_t projection_right,
1451 float initial_pitch,
1452 int16_t &occupation,
1454 ICOORDELT_LIST *row_cells,
1461 BLOBNBOX_IT plot_it;
1464 FPSEGPT_LIST seg_list;
1468 ICOORDELT_IT cell_it = row_cells;
1473 if (blob_it.empty()) {
1475 return initial_pitch * 10;
1477#ifndef GRAPHICS_DISABLED
1478 if (testing_on &&
to_win !=
nullptr) {
1483 blob_it.mark_cycle_pt();
1488 }
while (!blob_it.cycled_list());
1491 &blob_it, blob_count,
static_cast<int16_t
>(initial_pitch), 2, projection, projection_left,
1494 tprintf(
"Row ending at (%d,%d), len=%d, sync rating=%g, ", blob_box.
right(), blob_box.
top(),
1495 seg_list.length() - 1, word_sync);
1496 seg_it.set_to_list(&seg_list);
1497 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1498 if (seg_it.data()->faked) {
1501 tprintf(
"%d, ", seg_it.data()->position());
1509#ifndef GRAPHICS_DISABLED
1514 seg_it.set_to_list(&seg_list);
1515 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
1516 segpos = seg_it.data()->position();
1519 cell_it.add_after_then_move(cell);
1520 if (seg_it.at_last()) {
1521 mid_cuts = seg_it.data()->cheap_cuts();
1525 return occupation > 0 ? sqrt(word_sync / occupation) : initial_pitch * 10;
1538 int16_t projection_left,
1539 int16_t projection_right,
float space_size,
1547 BLOBNBOX_IT start_it;
1548 BLOBNBOX_IT row_start;
1550 int16_t total_blob_count;
1556 FPSEGPT_LIST seg_list;
1564 if (blob_it.empty()) {
1567 row_start = blob_it;
1568 total_blob_count = 0;
1575 blob_it = row_start;
1579 blob_it.mark_cycle_pt();
1581 for (; blob_count > 0; blob_count--) {
1585 prev_box = blob_box;
1588 }
while (!blob_it.cycled_list() && blob_box.
left() - prev_box.
right() < space_size);
1590 &start_it, blob_count,
static_cast<int16_t
>(initial_pitch), 2, projection, projection_left,
1592 total_blob_count += blob_count;
1593 seg_it.set_to_list(&seg_list);
1594 if (prev_right >= 0) {
1595 sp_var = seg_it.data()->position() - prev_right;
1596 sp_var -= floor(sp_var / initial_pitch + 0.5) * initial_pitch;
1601 seg_it.move_to_last();
1602 prev_right = seg_it.data()->position();
1604 scale_factor = (seg_list.length() - 2) / 2;
1605 if (scale_factor < 1) {
1611 sqsum += word_sync * scale_factor;
1612 total_count += (seg_list.length() - 1) * scale_factor;
1614 }
while (!blob_it.cycled_list());
1615 sp_sd = sp_count > 0 ? sqrt(spsum / sp_count) : 0;
1616 word_sync = total_count > 0 ? sqrt(sqsum / total_count) : space_size * 10;
1617 tprintf(
"new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", word_sync, word_sync / initial_pitch, sp_sd,
1620 start_it = row_start;
1621 blob_it = row_start;
1623 check_pitch_sync2(&blob_it, total_blob_count,
static_cast<int16_t
>(initial_pitch), 2,
1624 projection, projection_left, projection_right,
1626 if (occupation > 1) {
1627 word_sync /= occupation;
1629 word_sync = sqrt(word_sync);
1631#ifndef GRAPHICS_DISABLED
1638 if (word_sync < textord_words_def_fixed * initial_pitch && !row->all_caps) {
1647 "row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, "
1649 word_sync, word_sync / initial_pitch,
1663 if (pb !=
nullptr && !pb->
IsText()) {
1669 BLOBNBOX_IT search_it;
1672 int blobcount, repeated_set;
1674 TO_ROW_IT row_it = block->
get_rows();
1675 if (row_it.empty()) {
1678 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1679 row = row_it.data();
1681 if (box_it.empty()) {
1693 if (box_it.data()->repeated_set() != 0 && !box_it.data()->joined_to_prev()) {
1695 repeated_set = box_it.data()->repeated_set();
1697 search_it.forward();
1698 while (!search_it.at_first() && search_it.data()->repeated_set() == repeated_set) {
1700 search_it.forward();
1706 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1707 tprintf(
"Bad box joined to prev at");
1708 box_it.data()->bounding_box().print();
1709 tprintf(
"After repeated word:");
1712 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1715 word_it.add_after_then_move(word);
1719 }
while (!box_it.at_first());
1729#ifndef GRAPHICS_DISABLED
1736 TO_ROW_IT row_it = block->
get_rows();
1738 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1739 row = row_it.data();
1740 row->
min_space =
static_cast<int32_t
>((pitch + nonspace) / 2);
#define BOOL_VAR(name, val, comment)
#define double_VAR(name, val, comment)
#define MAX_ALLOWED_PITCH
#define BLOCK_STATS_CLUSTERS
@ W_DONT_CHOP
fixed pitch chopped
@ W_REP_CHAR
repeated character
int textord_dotmatrix_gap
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
double words_initial_upper
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
bool textord_blocksall_prop
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
double textord_wordstats_smooth_factor
double words_initial_lower
int textord_words_veto_power
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
void plot_fp_word(TO_BLOCK *block, float pitch, float nonspace)
double textord_words_default_nonspace
void tprintf(const char *format,...)
bool textord_show_fixed_cuts
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
double words_default_fixed_space
int pitsync_linear_version
void mark_repeated_chars(TO_ROW *row)
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
double textord_words_default_maxspace
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
double textord_projection_scale
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
bool textord_blockndoc_fixed
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
bool textord_pitch_scalebigwords
double textord_words_min_minspace
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
bool textord_blocksall_fixed
bool textord_debug_pitch_metric
double textord_words_maxspace
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
void print_block_counts(TO_BLOCK *block, int32_t block_index)
bool textord_debug_pitch_test
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
double textord_balance_factor
bool textord_show_row_cuts
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
double words_default_prop_nonspace
bool textord_fast_pitch_test
double textord_fpiqr_ratio
ScrollView * create_to_win(ICOORD page_tr)
double textord_words_pitchsd_threshold
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
bool textord_show_initial_words
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
double words_default_fixed_limit
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
double textord_max_pitch_iqr
double textord_words_def_prop
double textord_words_default_minspace
bool textord_show_page_cuts
double textord_spacesize_ratioprop
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
TBOX box_next(BLOBNBOX_IT *it)
double textord_pitch_rowsimilarity
const TBOX & bounding_box() const
bool joined_to_prev() const
bool rep_chars_marked() const
ICOORDELT_LIST char_cells
BLOBNBOX_LIST * blob_list()
int num_repeated_sets() const
void compute_vertical_projection()
PITCH_TYPE pitch_decision
PITCH_TYPE pitch_decision
PDBLK pdblk
Page Description Block.
POLY_BLOCK * poly_block() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void set_x(TDimension xin)
rewrite function
TDimension x() const
access function
TDimension bottom() const
void add(int32_t value, int32_t count)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
int32_t pile_count(int32_t value) const
int32_t get_total() const
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
void smooth(int32_t factor)
double ile(double frac) const
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value)
void set_flag(WERD_FLAGS mask, bool value)
TBOX bounding_box() const