21# include "config_auto.h"
92 : left_margin_(-INT32_MAX),
93 right_margin_(INT32_MAX),
94 median_bottom_(INT32_MAX),
95 median_top_(-INT32_MAX),
96 median_left_(INT32_MAX),
97 median_right_(-INT32_MAX),
98 blob_type_(blob_type),
100 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
113 part->set_type(block_type);
114 part->set_flow(
flow);
116 part->set_left_margin(box.
left());
117 part->set_right_margin(box.
right());
118 part->SetBlobTypes();
119 part->ComputeLimits();
130 ColPartition_LIST *big_part_list) {
135 single->ComputeLimits();
136 single->ClaimBoxes();
137 single->SetBlobTypes();
138 single->set_block_owned(
true);
139 if (big_part_list !=
nullptr) {
140 ColPartition_IT part_it(big_part_list);
141 part_it.add_to_end(single);
149 ColPartition_C_IT it(&upper_partners_);
150 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
151 it.data()->RemovePartner(
false,
this);
153 it.set_to_list(&lower_partners_);
154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155 it.data()->RemovePartner(
true,
this);
162 const ICOORD &vertical,
int left,
163 int bottom,
int right,
int top) {
165 part->bounding_box_ =
TBOX(left, bottom, right, top);
166 part->median_bottom_ = bottom;
167 part->median_top_ = top;
168 part->median_height_ = top - bottom;
169 part->median_left_ = left;
170 part->median_right_ = right;
171 part->median_width_ = right - left;
172 part->left_key_ = part->BoxLeftKey();
173 part->right_key_ = part->BoxRightKey();
183 if (boxes_.empty()) {
186 bounding_box_ += box;
190 if (!last_add_was_vertical_) {
191 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
192 last_add_was_vertical_ =
true;
194 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
196 if (last_add_was_vertical_) {
197 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
198 last_add_was_vertical_ =
false;
200 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
202 if (!left_key_tab_) {
205 if (!right_key_tab_) {
209 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
211 bounding_box_.
left(), bounding_box_.
right());
217 BLOBNBOX_C_IT bb_it(&boxes_);
218 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219 if (box == bb_it.data()) {
231 BLOBNBOX_C_IT bb_it(&boxes_);
232 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
235 if (biggest ==
nullptr ||
240 if (biggest ==
nullptr ||
252 BLOBNBOX_C_IT bb_it(&boxes_);
253 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
254 if (box != bb_it.data()) {
255 result += bb_it.data()->bounding_box();
264 BLOBNBOX_C_IT bb_it(&boxes_);
265 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
268 if (other ==
nullptr) {
280 BLOBNBOX_C_IT bb_it(&boxes_);
281 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
293 BLOBNBOX_C_IT bb_it(&boxes_);
294 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
296 if (bblob->
owner() ==
this) {
307 BLOBNBOX_C_IT bb_it(&boxes_);
308 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
311 if (bblob->
owner() ==
this) {
330 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
343 BLOBNBOX_CLIST reversed_boxes;
344 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
346 BLOBNBOX_C_IT bb_it(&boxes_);
347 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
348 reversed_it.add_before_then_move(bb_it.extract());
350 bb_it.add_list_after(&reversed_boxes);
352 int tmp = left_margin_;
353 left_margin_ = -right_margin_;
354 right_margin_ = -tmp;
365 if (bounding_box_.
left() > bounding_box_.
right()) {
367 tprintf(
"Bounding box invalid\n");
372 if (left_margin_ > bounding_box_.
left() ||
373 right_margin_ < bounding_box_.
right()) {
438 if (bounding_box_.
right() < other.bounding_box_.
left() &&
442 if (other.bounding_box_.
right() < bounding_box_.
left() &&
446 if (bounding_box_.
left() > other.bounding_box_.
right() &&
450 if (other.bounding_box_.
left() > bounding_box_.
right() &&
459 double fractional_tolerance,
460 double constant_tolerance)
const {
462 int nonmatch_count = 0;
463 BLOBNBOX_C_IT box_it(
const_cast<BLOBNBOX_CLIST *
>(&boxes_));
464 BLOBNBOX_C_IT other_it(
const_cast<BLOBNBOX_CLIST *
>(&other.boxes_));
465 box_it.mark_cycle_pt();
466 other_it.mark_cycle_pt();
467 while (!box_it.cycled_list() && !other_it.cycled_list()) {
468 if (box_it.data()->MatchingStrokeWidth(
469 *other_it.data(), fractional_tolerance, constant_tolerance)) {
477 return match_count > nonmatch_count;
488 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST *
>(&boxes_));
489 int min_top = INT32_MAX;
490 int max_bottom = -INT32_MAX;
491 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
495 tprintf(
"Blob is not a diacritic:");
510 min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
515 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top,
516 median_bottom_, median_top_);
526 if (tab_vector !=
nullptr) {
530 left_key_tab_ =
false;
532 if (!left_key_tab_) {
539 if (tab_vector !=
nullptr) {
540 right_key_ = tab_vector->
sort_key();
543 right_key_tab_ =
false;
545 if (!right_key_tab_) {
553 left_key_tab_ = take_box ? false : src.left_key_tab_;
555 left_key_ = src.left_key_;
560 if (left_margin_ > bounding_box_.
left()) {
561 left_margin_ = src.left_margin_;
567 right_key_tab_ = take_box ? false : src.right_key_tab_;
568 if (right_key_tab_) {
569 right_key_ = src.right_key_;
574 if (right_margin_ < bounding_box_.
right()) {
575 right_margin_ = src.right_margin_;
581 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST *
>(&boxes_));
582 return it.data()->left_rule();
586 BLOBNBOX_C_IT it(
const_cast<BLOBNBOX_CLIST *
>(&boxes_));
588 return it.data()->right_rule();
593 return special_blobs_densities_[
type];
598 BLOBNBOX_C_IT blob_it(&boxes_);
600 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
612 const float density) {
614 special_blobs_densities_[
type] = density;
618 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
619 if (boxes_.empty()) {
623 BLOBNBOX_C_IT blob_it(&boxes_);
624 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
627 special_blobs_densities_[
type]++;
630 for (
float &special_blobs_density : special_blobs_densities_) {
631 special_blobs_density /= boxes_.length();
640 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true,
642 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
644 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true,
646 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
654 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
655 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
656 if (it.data() == partner) {
665 ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
666 if (!partners->singleton()) {
669 ColPartition_C_IT it(partners);
681 bounding_box_.
bottom()) ||
683 other->bounding_box_.
bottom())) {
690 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
692 unsigned w1 = boxes_.length();
693 unsigned w2 = other->boxes_.length();
694 float new_val = special_blobs_densities_[
type] * w1 +
695 other->special_blobs_densities_[
type] * w2;
698 special_blobs_densities_[
type] = new_val / (w1 + w2);
703 BLOBNBOX_C_IT it(&boxes_);
704 BLOBNBOX_C_IT it2(&other->boxes_);
705 for (; !it2.empty(); it2.forward()) {
708 if (prev_owner != other && prev_owner !=
nullptr) {
712 ASSERT_HOST(prev_owner == other || prev_owner ==
nullptr);
713 if (prev_owner == other) {
716 it.add_to_end(bbox2);
718 left_margin_ = std::min(left_margin_, other->left_margin_);
719 right_margin_ = std::max(right_margin_, other->right_margin_);
720 if (other->left_key_ < left_key_) {
721 left_key_ = other->left_key_;
722 left_key_tab_ = other->left_key_tab_;
724 if (other->right_key_ > right_key_) {
725 right_key_ = other->right_key_;
726 right_key_tab_ = other->right_key_tab_;
731 flow_ = other->flow_;
732 blob_type_ = other->blob_type_;
736 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
737 last_add_was_vertical_ =
true;
739 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
740 last_add_was_vertical_ =
false;
745 for (
int upper = 0; upper < 2; ++upper) {
746 ColPartition_CLIST partners;
747 ColPartition_C_IT part_it(&partners);
748 part_it.add_list_after(upper ? &other->upper_partners_
749 : &other->lower_partners_);
750 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
775 int ok_box_overlap,
bool debug) {
779 tprintf(
"Vertical partition\n");
794 if (merged_box.
bottom() < median_top_ && merged_box.
top() > median_bottom_ &&
795 merged_box.
bottom() < bounding_box_.
top() - ok_box_overlap &&
796 merged_box.
top() > bounding_box_.
bottom() + ok_box_overlap) {
798 tprintf(
"Excessive box overlap\n");
809 if (boxes_.empty() || boxes_.singleton()) {
812 BLOBNBOX_C_IT it(&boxes_);
813 TBOX left_box(it.data()->bounding_box());
814 for (it.forward(); !it.at_first(); it.forward()) {
831 BLOBNBOX_C_IT it(&boxes_);
832 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
836 if (bbox == split_blob || !split_part->boxes_.empty()) {
837 split_part->
AddBox(it.extract());
850 right_key_tab_ =
false;
851 split_part->left_key_tab_ =
false;
866 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right()) {
871 BLOBNBOX_C_IT it(&boxes_);
872 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
877 if (box.
left() >= split_x) {
878 split_part->
AddBox(it.extract());
886 it.add_list_after(&split_part->boxes_);
895 right_key_tab_ =
false;
896 split_part->left_key_tab_ =
false;
897 right_margin_ = split_x;
898 split_part->left_margin_ = split_x;
906 bounding_box_ =
TBOX();
907 BLOBNBOX_C_IT it(&boxes_);
909 int non_leader_count = 0;
911 bounding_box_.
set_left(left_margin_);
916 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
924 if (!left_key_tab_) {
930 tprintf(
"Computed left-illegal partition\n");
933 if (!right_key_tab_) {
937 tprintf(
"Computed right-illegal partition\n");
945 median_top_ = bounding_box_.
top();
946 median_bottom_ = bounding_box_.
bottom();
947 median_height_ = bounding_box_.
height();
948 median_left_ = bounding_box_.
left();
949 median_right_ = bounding_box_.
right();
950 median_width_ = bounding_box_.
width();
958 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
962 int area = box.
area();
963 top_stats.
add(box.
top(), area);
966 left_stats.
add(box.
left(), area);
971 median_top_ =
static_cast<int>(top_stats.
median() + 0.5);
972 median_bottom_ =
static_cast<int>(bottom_stats.
median() + 0.5);
973 median_height_ =
static_cast<int>(height_stats.
median() + 0.5);
974 median_left_ =
static_cast<int>(left_stats.
median() + 0.5);
975 median_right_ =
static_cast<int>(right_stats.
median() + 0.5);
976 median_width_ =
static_cast<int>(width_stats.
median() + 0.5);
980 tprintf(
"Made partition with bad right coords, %d < %d\n", right_margin_,
981 bounding_box_.
right());
985 tprintf(
"Made partition with bad left coords, %d > %d\n", left_margin_,
986 bounding_box_.
left());
992 for (
int upper = 0; upper < 2; ++upper) {
993 ColPartition_CLIST partners;
994 ColPartition_C_IT part_it(&partners);
995 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
996 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1003 bounding_box_.
bottom())) {
1004 tprintf(
"Recomputed box for partition %p\n",
static_cast<void *
>(
this));
1011 BLOBNBOX_C_IT it(&boxes_);
1012 int overlap_count = 0;
1013 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1019 return overlap_count;
1025 int first_spanned_col = -1;
1027 resolution, bounding_box_.
left(), bounding_box_.
right(),
1029 left_margin_, right_margin_, &first_column_, &last_column_,
1030 &first_spanned_col);
1031 column_set_ = columns;
1032 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
1036 if (first_spanned_col >= 0) {
1037 first_column_ = first_spanned_col;
1038 last_column_ = first_spanned_col;
1040 if ((first_column_ & 1) == 0) {
1041 last_column_ = first_column_;
1042 }
else if ((last_column_ & 1) == 0) {
1043 first_column_ = last_column_;
1045 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1065 switch (blob_type_) {
1108 int *first_col,
int *last_col) {
1109 int first_spanned_col = -1;
1111 resolution, bounding_box_.
left(), bounding_box_.
right(),
1113 left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
1121 good_width_ = cb(width);
1122 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1132 bool result =
false;
1134 int part_width = bounding_box_.
width();
1135 STATS gap_stats(0, part_width - 1);
1136 STATS width_stats(0, part_width - 1);
1137 BLOBNBOX_C_IT it(&boxes_);
1142 for (it.forward(); !it.at_first(); it.forward()) {
1147 width_stats.
add(right - left, 1);
1152 double median_gap = gap_stats.
median();
1156 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1158 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count,
1168 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1169 int min_step =
static_cast<int>(median_gap +
median_width + 0.5);
1170 int max_step = min_step + offset;
1173 int part_left = bounding_box_.
left() - min_step / 2;
1174 part_width += min_step;
1175 auto *projection =
new DPPoint[part_width];
1176 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1181 for (
int x = left;
x < right; ++
x) {
1182 projection[left - part_left].AddLocalCost(height);
1187 part_width, projection);
1188 if (best_end !=
nullptr && best_end->
total_cost() < blob_count) {
1191 bool modified_blob_list =
false;
1192 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1195 if (it.at_first()) {
1200 modified_blob_list =
true;
1206 it.data_relative(-1)->bounding_box().right();
1209 modified_blob_list =
true;
1216 if (modified_blob_list) {
1222 if (best_end ==
nullptr) {
1229 delete[] projection;
1243 int good_blob_score_ = 0;
1244 int noisy_count = 0;
1245 int hline_count = 0;
1246 int vline_count = 0;
1247 BLOBNBOX_C_IT it(&boxes_);
1248 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1262 if (hline_count > vline_count) {
1265 }
else if (vline_count > hline_count) {
1272 long_side = bounding_box_.
width();
1273 short_side = bounding_box_.
height();
1276 long_side = bounding_box_.
height();
1277 short_side = bounding_box_.
width();
1298 if (flow_ ==
BTFT_CHAIN && strong_score == 3) {
1308 if (noisy_count >= blob_count) {
1314 bounding_box_.
bottom())) {
1315 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1316 blob_count, noisy_count, good_blob_score_);
1317 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
value, flow_,
1330 BLOBNBOX_C_IT it(&boxes_);
1331 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1349 int total_height = 0;
1351 int height_count = 0;
1353 BLOBNBOX_C_IT it(&boxes_);
1354 TBOX box(it.data()->bounding_box());
1363 linepoints.
Add(first_pt);
1364 for (it.forward(); !it.at_last(); it.forward()) {
1368 linepoints.
Add(box_pt);
1369 total_height += box.
width();
1370 coverage += box.
height();
1373 box = it.data()->bounding_box();
1375 linepoints.
Add(last_pt);
1376 width = last_pt.
y() - first_pt.
y();
1380 TBOX box(it.data()->bounding_box());
1384 linepoints.
Add(first_pt);
1385 for (it.forward(); !it.at_last(); it.forward()) {
1389 linepoints.
Add(box_pt);
1390 total_height += box.
height();
1391 coverage += box.
width();
1394 box = it.data()->bounding_box();
1396 linepoints.
Add(last_pt);
1397 width = last_pt.
x() - first_pt.
x();
1400 if (height_count == 0) {
1405 double error = linepoints.
Fit(&start_pt, &end_pt);
1413 ColPartition_LIST *used_parts,
1414 WorkingPartSet_LIST *working_sets) {
1418 block_owned_ =
true;
1419 WorkingPartSet_IT it(working_sets);
1422 if (partner !=
nullptr && partner->working_set_ !=
nullptr) {
1423 working_set_ = partner->working_set_;
1428 tprintf(
"Partition with partner has no working set!:");
1436 for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_;
1437 it.forward(), ++col_index) {
1441 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1445 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1448 work_set = it.data();
1451 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1453 BLOCK_LIST completed_blocks;
1454 TO_BLOCK_LIST to_blocks;
1455 for (; !it.cycled_list() && col_index <= last_column_;
1456 it.forward(), ++col_index) {
1459 &completed_blocks, &to_blocks);
1463 working_set_ = work_set;
1475 ColPartition_LIST *block_parts,
1476 ColPartition_LIST *used_parts,
1477 BLOCK_LIST *completed_blocks,
1478 TO_BLOCK_LIST *to_blocks) {
1479 int page_height = tright.
y() - bleft.
y();
1481 ColPartition_IT it(block_parts);
1483 int max_line_height = 0;
1489 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1496 BLOBNBOX_C_IT blob_it(part->
boxes());
1497 int prev_bottom = blob_it.data()->bounding_box().bottom();
1498 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1501 int step = bottom - prev_bottom;
1505 side_steps.
add(step, 1);
1506 prev_bottom = bottom;
1509 if (!it.at_last()) {
1520 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1525 if (part_count == 0) {
1529 SmoothSpacings(resolution, page_height, block_parts);
1532 BLOCK_IT block_it(completed_blocks);
1533 TO_BLOCK_IT to_block_it(to_blocks);
1534 ColPartition_LIST spacing_parts;
1535 ColPartition_IT sp_block_it(&spacing_parts);
1537 for (it.mark_cycle_pt(); !it.empty();) {
1539 sp_block_it.add_to_end(part);
1541 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1542 !part->SpacingsEqual(*it.data(), resolution)) {
1545 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1549 ColPartition *third_part = it.at_last() ? nullptr : it.data_relative(1);
1552 "Spacings unequal: upper:%d/%d, lower:%d/%d,"
1553 " sizes %d %d %d\n",
1561 if (part->SizesSimilar(*next_part) &&
1568 if (third_part ==
nullptr || !next_part->SizesSimilar(*third_part) ||
1575 sp_block_it.add_to_end(it.extract());
1578 tprintf(
"Added line to current block.\n");
1584 if (to_block !=
nullptr) {
1585 to_block_it.add_to_end(to_block);
1586 block_it.add_to_end(to_block->
block);
1588 sp_block_it.set_to_list(&spacing_parts);
1592 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1603 if (pos->
x() < bleft.
x()) {
1606 if (pos->
x() > tright.
x()) {
1609 if (pos->
y() < bleft.
y()) {
1612 if (pos->
y() > tright.
y()) {
1621static TO_BLOCK *MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1622 BLOCK *block, ColPartition_LIST *block_parts,
1623 ColPartition_LIST *used_parts) {
1628 TBOX block_box(block->pdblk.bounding_box());
1629 STATS sizes(0, std::max(block_box.width(), block_box.height()) - 1);
1630 bool text_type = block->pdblk.poly_block()->IsText();
1631 ColPartition_IT it(block_parts);
1632 auto *to_block =
new TO_BLOCK(block);
1633 BLOBNBOX_IT blob_it(&to_block->blobs);
1634 ColPartition_IT used_it(used_parts);
1635 for (it.move_to_first(); !it.empty(); it.forward()) {
1636 ColPartition *part = it.extract();
1640 for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); bb_it.forward()) {
1641 BLOBNBOX *bblob = bb_it.extract();
1642 if (bblob->owner() != part) {
1643 tprintf(
"Ownership incorrect for blob:");
1644 bblob->bounding_box().print();
1647 if (bblob->owner() ==
nullptr) {
1651 bblob->owner()->Print();
1659 C_OUTLINE_LIST *outlines = bblob->cblob()->out_list();
1660 C_OUTLINE_IT ol_it(outlines);
1661 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1662 if (vertical_text) {
1663 sizes.add(bblob->bounding_box().width(), 1);
1665 sizes.add(bblob->bounding_box().height(), 1);
1667 blob_it.add_after_then_move(bblob);
1669 used_it.add_to_end(part);
1671 if (text_type && blob_it.empty()) {
1676 to_block->line_size = sizes.median();
1677 if (vertical_text) {
1678 int block_width = block->pdblk.bounding_box().width();
1679 if (block_width < line_spacing) {
1680 line_spacing = block_width;
1682 to_block->line_spacing =
static_cast<float>(line_spacing);
1683 to_block->max_blob_size =
static_cast<float>(block_width + 1);
1685 int block_height = block->pdblk.bounding_box().height();
1686 if (block_height < line_spacing) {
1687 line_spacing = block_height;
1689 to_block->line_spacing =
static_cast<float>(line_spacing);
1690 to_block->max_blob_size =
static_cast<float>(block_height + 1);
1698 ColPartition_LIST *block_parts,
1699 ColPartition_LIST *used_parts) {
1700 if (block_parts->empty()) {
1707 ColPartition_IT it(block_parts);
1720 ICOORDELT_LIST vertices;
1721 ICOORDELT_IT vert_it(&vertices);
1723 int min_x = INT32_MAX;
1724 int max_x = -INT32_MAX;
1725 int min_y = INT32_MAX;
1726 int max_y = -INT32_MAX;
1729 if (iteration == 0) {
1730 ColPartition::LeftEdgeRun(&it, &start, &end);
1732 ColPartition::RightEdgeRun(&it, &start, &end);
1734 ClipCoord(bleft, tright, &start);
1735 ClipCoord(bleft, tright, &end);
1736 vert_it.add_after_then_move(
new ICOORDELT(start));
1737 vert_it.add_after_then_move(
new ICOORDELT(end));
1742 if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) {
1746 }
while (iteration < 2);
1748 tprintf(
"Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y);
1750 auto *block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1752 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1759 ColPartition_LIST *block_parts,
1760 ColPartition_LIST *used_parts) {
1761 if (block_parts->empty()) {
1764 ColPartition_IT it(block_parts);
1767 int line_spacing = block_box.
width();
1769 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1770 block_box += it.data()->bounding_box();
1776 auto *block =
new BLOCK(
"",
true, 0, 0, block_box.
left(), block_box.
bottom(),
1777 block_box.
right(), block_box.
top());
1779 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1785 BLOBNBOX_C_IT blob_it(&boxes_);
1787 int line_size =
IsVerticalType() ? median_width_ : median_height_;
1789 for (; !blob_it.empty(); blob_it.forward()) {
1790 BLOBNBOX *blob = blob_it.extract();
1794 if (row ==
nullptr) {
1796 new TO_ROW(blob,
static_cast<float>(top),
static_cast<float>(bottom),
1797 static_cast<float>(line_size));
1799 row->
add_blob(blob,
static_cast<float>(top),
static_cast<float>(bottom),
1800 static_cast<float>(line_size));
1810 part->left_margin_ = left_margin_;
1811 part->right_margin_ = right_margin_;
1812 part->bounding_box_ = bounding_box_;
1813 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1814 sizeof(special_blobs_densities_));
1815 part->median_bottom_ = median_bottom_;
1816 part->median_top_ = median_top_;
1817 part->median_height_ = median_height_;
1818 part->median_left_ = median_left_;
1819 part->median_right_ = median_right_;
1820 part->median_width_ = median_width_;
1821 part->good_width_ = good_width_;
1822 part->good_column_ = good_column_;
1823 part->left_key_tab_ = left_key_tab_;
1824 part->right_key_tab_ = right_key_tab_;
1825 part->type_ = type_;
1826 part->flow_ = flow_;
1827 part->left_key_ = left_key_;
1828 part->right_key_ = right_key_;
1829 part->first_column_ = first_column_;
1830 part->last_column_ = last_column_;
1831 part->owns_blobs_ =
false;
1838 BLOBNBOX_C_IT inserter(copy->
boxes());
1839 BLOBNBOX_C_IT traverser(
boxes());
1840 for (traverser.mark_cycle_pt(); !traverser.cycled_list();
1841 traverser.forward()) {
1842 inserter.add_after_then_move(traverser.data());
1847#ifndef GRAPHICS_DISABLED
1859static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1865 "ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1866 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1867 " ts=%d bs=%d ls=%d rs=%d\n",
1868 boxes_.empty() ?
'E' :
' ', left_margin_, left_key_tab_ ?
'T' :
'B',
1871 right_key_tab_ ?
'T' :
'B', right_margin_, median_right_,
1872 bounding_box_.
top(), median_top_, good_width_, good_column_, type_,
1873 kBlobTypes[blob_type_], flow_, first_column_, last_column_,
1874 boxes_.length(), space_above_, space_below_, space_to_left_,
1880 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED],
1881 color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL],
1882 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1887 STATS left_stats(0, working_set_count - 1);
1888 STATS right_stats(0, working_set_count - 1);
1893 if (partner->type_ > max_type) {
1894 max_type = partner->type_;
1896 if (column_set_ == partner->column_set_) {
1897 left_stats.
add(partner->first_column_, 1);
1898 right_stats.
add(partner->last_column_, 1);
1906 first_column_ = left_stats.
mode();
1907 last_column_ = right_stats.
mode();
1908 if (last_column_ < first_column_)
1909 last_column_ = first_column_;
1914 partner->type_ = max_type;
1916 if (column_set_ == partner->column_set_) {
1917 partner->first_column_ = first_column_;
1918 partner->last_column_ = last_column_;
1959 RefinePartnersInternal(
true, get_desperate, grid);
1960 RefinePartnersInternal(
false, get_desperate, grid);
1964 RefinePartnersByType(
true, &upper_partners_);
1965 RefinePartnersByType(
false, &lower_partners_);
1969 if (!upper_partners_.empty() && !upper_partners_.singleton()) {
1970 RefinePartnersByOverlap(
true, &upper_partners_);
1972 if (!lower_partners_.empty() && !lower_partners_.singleton()) {
1973 RefinePartnersByOverlap(
false, &lower_partners_);
1983void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1985 ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
1986 if (!partners->empty() && !partners->singleton()) {
1987 RefinePartnersByType(upper, partners);
1988 if (!partners->empty() && !partners->singleton()) {
1990 RefinePartnerShortcuts(upper, partners);
1991 if (!partners->empty() && !partners->singleton()) {
1995 RefineTextPartnersByMerge(upper,
false, partners, grid);
1996 if (!partners->empty() && !partners->singleton()) {
1997 RefineTextPartnersByMerge(upper,
true, partners, grid);
2001 if (!partners->empty() && !partners->singleton()) {
2002 RefinePartnersByOverlap(upper, partners);
2012void ColPartition::RefinePartnersByType(
bool upper,
2013 ColPartition_CLIST *partners) {
2017 tprintf(
"Refining %d %s partners by type for:\n", partners->length(),
2018 upper ?
"Upper" :
"Lower");
2021 ColPartition_C_IT it(partners);
2027 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2034 partner->RemovePartner(!upper,
this);
2043 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2051 partner->RemovePartner(!upper,
this);
2066void ColPartition::RefinePartnerShortcuts(
bool upper,
2067 ColPartition_CLIST *partners) {
2068 bool done_any =
false;
2071 ColPartition_C_IT it(partners);
2072 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2076 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
2077 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
2082 a->RemovePartner(!upper,
this);
2085 ColPartition_C_IT it2(partners);
2086 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2091 b2->RemovePartner(!upper,
this);
2106 }
while (done_any && !partners->empty() && !partners->singleton());
2117void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2118 ColPartition_CLIST *partners,
2119 ColPartitionGrid *grid) {
2123 tprintf(
"Refining %d %s partners by merge for:\n", partners->length(),
2124 upper ?
"Upper" :
"Lower");
2127 while (!partners->empty() && !partners->singleton()) {
2130 ColPartition_C_IT it(partners);
2134 ColPartition_CLIST candidates;
2135 ColPartition_C_IT cand_it(&candidates);
2136 for (it.forward(); !it.at_first(); it.forward()) {
2138 if (part->first_column_ == candidate->last_column_ &&
2139 part->last_column_ == candidate->first_column_) {
2140 cand_it.add_after_then_move(it.data());
2143 int overlap_increase;
2145 part, &candidates, debug,
nullptr, &overlap_increase);
2146 if (candidate !=
nullptr && (overlap_increase <= 0 || desperate)) {
2148 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2149 part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2153 grid->RemoveBBox(candidate);
2154 grid->RemoveBBox(part);
2155 part->Absorb(candidate,
nullptr);
2157 grid->InsertBBox(
true,
true, part);
2158 if (overlap_increase > 0) {
2159 part->desperately_merged_ =
true;
2169void ColPartition::RefinePartnersByOverlap(
bool upper,
2170 ColPartition_CLIST *partners) {
2174 tprintf(
"Refining %d %s partners by overlap for:\n", partners->length(),
2175 upper ?
"Upper" :
"Lower");
2178 ColPartition_C_IT it(partners);
2181 int best_overlap = 0;
2182 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2185 std::min(bounding_box_.
right(), partner->bounding_box_.right()) -
2186 std::max(bounding_box_.
left(), partner->bounding_box_.left());
2187 if (overlap > best_overlap) {
2188 best_overlap = overlap;
2189 best_partner = partner;
2193 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2195 if (partner != best_partner) {
2200 partner->RemovePartner(!upper,
this);
2207bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox,
2208 const ColPartition &other) {
2209 const TBOX &box = bbox->bounding_box();
2211 int left = box.left();
2212 int right = box.right();
2213 if (left < left_margin_ || right > right_margin_) {
2216 if (left < other.left_margin_ || right > other.right_margin_) {
2219 int top = box.top();
2220 int bottom = box.bottom();
2222 std::min(top, median_top_) - std::max(bottom, median_bottom_);
2224 std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_);
2225 int this_miss = median_top_ - median_bottom_ - this_overlap;
2226 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2228 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2229 box.left(), box.bottom(), box.right(), box.top(), this_overlap,
2230 other_overlap, this_miss, other_miss, median_top_,
2233 if (this_miss < other_miss) {
2236 if (this_miss > other_miss) {
2239 if (this_overlap > other_overlap) {
2242 if (this_overlap < other_overlap) {
2245 return median_top_ >= other.median_top_;
2252static int MedianSpacing(
int page_height, ColPartition_IT it) {
2253 STATS stats(0, page_height - 1);
2254 while (!it.cycled_list()) {
2255 ColPartition *part = it.data();
2257 stats.add(part->bottom_spacing(), 1);
2258 stats.add(part->top_spacing(), 1);
2260 return static_cast<int>(stats.median() + 0.5);
2274 return (last_column_ >= part.first_column_) &&
2275 (first_column_ <= part.last_column_);
2281void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2282 ColPartition_LIST *parts) {
2290 ColPartition_IT it(parts);
2297 int median_space = MedianSpacing(page_height, it);
2298 ColPartition_IT start_it(it);
2299 ColPartition_IT end_it(it);
2302 neighbourhood[
i] =
nullptr;
2307 neighbourhood[
i] = it.data();
2311 while (neighbourhood[
PN_UPPER] !=
nullptr) {
2333 if (neighbourhood[
PN_LOWER] ==
nullptr ||
2336 (neighbourhood[
PN_UPPER] ==
nullptr ||
2337 neighbourhood[
PN_LOWER] ==
nullptr ||
2338 !OKSpacingBlip(resolution, median_space, neighbourhood, 0)) &&
2339 (neighbourhood[
PN_UPPER - 1] ==
nullptr ||
2340 neighbourhood[
PN_LOWER - 1] ==
nullptr ||
2341 !OKSpacingBlip(resolution, median_space, neighbourhood, -1) ||
2342 !neighbourhood[
PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2343 (neighbourhood[
PN_UPPER + 1] ==
nullptr ||
2344 neighbourhood[
PN_LOWER + 1] ==
nullptr ||
2345 !OKSpacingBlip(resolution, median_space, neighbourhood, 1) ||
2346 !neighbourhood[
PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2349 ColPartition_IT sum_it(start_it);
2351 double total_bottom = 0.0;
2352 double total_top = 0.0;
2353 int total_count = 0;
2356 while (upper != last_part) {
2357 total_bottom += upper->bottom_spacing();
2358 total_top += upper->top_spacing();
2361 upper = sum_it.data();
2363 if (total_count > 0) {
2365 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2366 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2368 tprintf(
"Spacing run ended. Cause:");
2369 if (neighbourhood[
PN_LOWER] ==
nullptr) {
2372 tprintf(
"Spacing change. Spacings:\n");
2374 if (neighbourhood[
i] ==
nullptr) {
2376 if (
i > 0 && neighbourhood[
i - 1] !=
nullptr) {
2382 tprintf(
" nullptr lower partner:\n");
2388 tprintf(
"Top = %d, bottom = %d\n",
2397 upper = sum_it.data();
2398 while (upper != last_part) {
2406 upper = sum_it.data();
2413 median_space = MedianSpacing(page_height, end_it);
2416 for (
int j = 1; j <
PN_COUNT; ++j) {
2417 neighbourhood[j - 1] = neighbourhood[j];
2419 if (it.cycled_list()) {
2420 neighbourhood[
PN_COUNT - 1] =
nullptr;
2422 neighbourhood[
PN_COUNT - 1] = it.data();
2432bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2433 ColPartition **parts,
int offset) {
2440 parts[
PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2442 parts[
PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2447bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2448 int bottom_error = BottomSpacingMargin(resolution);
2449 int top_error = TopSpacingMargin(resolution);
2450 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2456bool ColPartition::SpacingsEqual(
const ColPartition &other,
2457 int resolution)
const {
2458 int bottom_error = std::max(BottomSpacingMargin(resolution),
2459 other.BottomSpacingMargin(resolution));
2460 int top_error = std::max(TopSpacingMargin(resolution),
2461 other.TopSpacingMargin(resolution));
2462 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2463 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2464 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2471bool ColPartition::SummedSpacingOK(
const ColPartition &other,
int spacing,
2472 int resolution)
const {
2473 int bottom_error = std::max(BottomSpacingMargin(resolution),
2474 other.BottomSpacingMargin(resolution));
2475 int top_error = std::max(TopSpacingMargin(resolution),
2476 other.TopSpacingMargin(resolution));
2477 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2478 int top_total = top_spacing_ + other.top_spacing_;
2479 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2481 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2487int ColPartition::BottomSpacingMargin(
int resolution)
const {
2493int ColPartition::TopSpacingMargin(
int resolution)
const {
2495 BottomSpacingMargin(resolution);
2500bool ColPartition::SizesSimilar(
const ColPartition &other)
const {
2501 return median_height_ <= other.median_height_ *
kMaxSizeRatio &&
2508static bool UpdateLeftMargin(
const ColPartition &part,
int *margin_left,
2509 int *margin_right) {
2510 const TBOX &part_box = part.bounding_box();
2511 int top = part_box.top();
2512 int bottom = part_box.bottom();
2513 int tl_key = part.SortKey(part.left_margin(), top);
2514 int tr_key = part.SortKey(part_box.left(), top);
2515 int bl_key = part.SortKey(part.left_margin(), bottom);
2516 int br_key = part.SortKey(part_box.left(), bottom);
2517 int left_key = std::max(tl_key, bl_key);
2518 int right_key = std::min(tr_key, br_key);
2519 if (left_key <= *margin_right && right_key >= *margin_left) {
2521 *margin_right = std::min(*margin_right, right_key);
2522 *margin_left = std::max(*margin_left, left_key);
2533void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start,
2537 int start_y = part->bounding_box_.top();
2538 if (!part_it->at_first()) {
2539 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2540 if (prev_bottom < start_y) {
2541 start_y = prev_bottom;
2542 }
else if (prev_bottom > start_y) {
2543 start_y = (start_y + prev_bottom) / 2;
2546 int end_y = part->bounding_box_.bottom();
2547 int margin_right = INT32_MAX;
2548 int margin_left = -INT32_MAX;
2549 UpdateLeftMargin(*part, &margin_left, &margin_right);
2552 part = part_it->data();
2553 }
while (!part_it->at_first() &&
2554 UpdateLeftMargin(*part, &margin_left, &margin_right));
2558 int next_margin_right = INT32_MAX;
2559 int next_margin_left = -INT32_MAX;
2560 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2561 if (next_margin_left > margin_right) {
2562 ColPartition_IT next_it(*part_it);
2565 part = next_it.data();
2566 }
while (!next_it.at_first() &&
2567 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2571 part_it->backward();
2572 part = part_it->data();
2573 }
while (part != start_part &&
2574 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2578 part = part_it->data_relative(-1);
2579 end_y = part->bounding_box_.bottom();
2580 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y) {
2581 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2583 start->set_y(start_y);
2584 start->set_x(part->XAtY(margin_right, start_y));
2586 end->set_x(part->XAtY(margin_right, end_y));
2588 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2589 start_y, end_y, part->XAtY(margin_left, end_y), end->x(),
2590 part->left_margin_, part->bounding_box_.left());
2597static bool UpdateRightMargin(
const ColPartition &part,
int *margin_left,
2598 int *margin_right) {
2599 const TBOX &part_box = part.bounding_box();
2600 int top = part_box.top();
2601 int bottom = part_box.bottom();
2602 int tl_key = part.SortKey(part_box.right(), top);
2603 int tr_key = part.SortKey(part.right_margin(), top);
2604 int bl_key = part.SortKey(part_box.right(), bottom);
2605 int br_key = part.SortKey(part.right_margin(), bottom);
2606 int left_key = std::max(tl_key, bl_key);
2607 int right_key = std::min(tr_key, br_key);
2608 if (left_key <= *margin_right && right_key >= *margin_left) {
2610 *margin_right = std::min(*margin_right, right_key);
2611 *margin_left = std::max(*margin_left, left_key);
2623void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
2627 int start_y = part->bounding_box_.bottom();
2628 if (!part_it->at_last()) {
2629 int next_y = part_it->data_relative(1)->bounding_box_.top();
2630 if (next_y > start_y) {
2632 }
else if (next_y < start_y) {
2633 start_y = (start_y + next_y) / 2;
2636 int end_y = part->bounding_box_.top();
2637 int margin_right = INT32_MAX;
2638 int margin_left = -INT32_MAX;
2639 UpdateRightMargin(*part, &margin_left, &margin_right);
2641 part_it->backward();
2642 part = part_it->data();
2643 }
while (!part_it->at_last() &&
2644 UpdateRightMargin(*part, &margin_left, &margin_right));
2647 int next_margin_right = INT32_MAX;
2648 int next_margin_left = -INT32_MAX;
2649 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2650 if (next_margin_right < margin_left) {
2651 ColPartition_IT next_it(*part_it);
2654 part = next_it.data();
2655 }
while (!next_it.at_last() &&
2656 UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
2661 part = part_it->data();
2662 }
while (part != start_part &&
2663 UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
2664 part_it->backward();
2667 part = part_it->data_relative(1);
2668 end_y = part->bounding_box().top();
2669 if (!part_it->at_last() && part_it->data()->bounding_box_.bottom() > end_y) {
2670 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2672 start->set_y(start_y);
2673 start->set_x(part->XAtY(margin_left, start_y));
2675 end->set_x(part->XAtY(margin_left, end_y));
2677 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2678 start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
2679 part->bounding_box_.right(), part->right_margin_);
const double kMaxLeaderGapFractionOfMin
const int kColumnWidthFactor
const int kMinChainTextValue
const double kMaxSizeRatio
bool NearlyEqual(T x, T y, T tolerance)
const int kHorzStrongTextlineCount
void tprintf(const char *format,...)
const int kMaxColorDistance
std::function< bool(int)> WidthCallback
const int kHorzStrongTextlineHeight
const double kMinBaselineCoverage
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
const double kMaxBaselineError
const int kHorzStrongTextlineAspect
int textord_debug_tabfind
const int kMinLeaderCount
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
const double kMaxSameBlockLineSpacing
const double kMaxTopSpacingFraction
const int kMaxRMSColorNoise
const int kMinStrongTextValue
const double kMaxSpacingDrift
const double kMaxLeaderGapFractionOfMax
int base_char_bottom() const
int base_char_top() const
const TBOX & bounding_box() const
BlobRegionType region_type() const
int NoisyNeighbours() const
void set_flow(BlobTextFlowType value)
void set_owner(tesseract::ColPartition *new_owner)
BlobTextFlowType flow() const
BlobSpecialTextType special_text_type() const
tesseract::ColPartition * owner() const
void set_region_type(BlobRegionType new_type)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
void Add(const ICOORD &pt)
double Fit(ICOORD *pt1, ICOORD *pt2)
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
int64_t CostWithVariance(const DPPoint *prev)
void set_x(TDimension xin)
rewrite function
TDimension y() const
access_function
void set_y(TDimension yin)
rewrite function
TDimension x() const
access function
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
TDimension height() const
bool overlap(const TBOX &box) const
TDimension bottom() const
void add(int32_t value, int32_t count)
double ile(double frac) const
static C_BLOB * FakeBlob(const TBOX &box)
static bool WithinTestRegion(int detail_level, int x, int y)
bool ReleaseNonLeaderBoxes()
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
BlobTextFlowType flow() const
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
float SpecialBlobsDensity(const BlobSpecialTextType type) const
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
TBOX BoundsWithoutBox(BLOBNBOX *box)
int SpecialBlobsCount(const BlobSpecialTextType type)
PolyBlockType type() const
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
void set_side_step(int step)
ColPartition * CopyButDontOwnBlobs()
ColPartition * SplitAt(int split_x)
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void SetColumnGoodness(const WidthCallback &cb)
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
int median_bottom() const
void SetRightTab(const TabVector *tab_vector)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
int bottom_spacing() const
void SetRegionAndFlowTypesFromProjectionValue(int value)
bool IsPulloutType() const
bool VSignificantCoreOverlap(const ColPartition &other) const
BlobRegionType blob_type() const
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
void AddBox(BLOBNBOX *box)
void SmoothPartnerRun(int working_set_count)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
int VCoreOverlap(const ColPartition &other) const
const TBOX & bounding_box() const
int XAtY(int sort_key, int y) const
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
bool IsVerticalType() const
void DisownBoxesNoAssert()
void SetLeftTab(const TabVector *tab_vector)
ScrollView::Color BoxColor() const
int CountOverlappingBoxes(const TBOX &box)
bool MatchingTextColor(const ColPartition &other) const
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
void set_owns_blobs(bool owns_blobs)
bool MatchingSizes(const ColPartition &other) const
void ComputeSpecialBlobsDensity()
void RemovePartner(bool upper, ColPartition *partner)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
ColPartition * ShallowCopy() const
PolyBlockType PartitionType(ColumnSpanningType flow) const
bool IsInSameColumnAs(const ColPartition &part) const
void CopyRightTab(const ColPartition &src, bool take_box)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
ColPartition * SingletonPartner(bool upper)
bool MarkAsLeaderIfMonospaced()
bool ConfirmNoTabViolation(const ColPartition &other) const
void set_bottom_spacing(int spacing)
void SetPartitionType(int resolution, ColPartitionSet *columns)
int RightBlobRule() const
void set_top_spacing(int spacing)
int RightAtY(int y) const
void Absorb(ColPartition *other, const WidthCallback &cb)
static int SortByBBox(const void *p1, const void *p2)
bool MatchingColumns(const ColPartition &other) const
void RemoveBox(BLOBNBOX *box)
void AddPartner(bool upper, ColPartition *partner)
int median_height() const
void CopyLeftTab(const ColPartition &src, bool take_box)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
static bool DifferentSizes(int size1, int size2)
void AddPartition(ColPartition *part)
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)