20# include "config_auto.h"
62static BOOL_VAR(textord_tabfind_show_initialtabs,
false,
"Show tab candidates");
63static BOOL_VAR(textord_tabfind_show_finaltabs,
false,
"Show tab vectors");
66 int vertical_x,
int vertical_y,
int resolution)
68 , resolution_(resolution)
69 , image_origin_(0, tright.
y() - 1)
72 v_it_.add_list_after(vlines);
73 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
74 using namespace std::placeholders;
91 BLOBNBOX_IT blob_it(blobs);
94 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
97 if (
InsertBlob(h_spread, v_spread, blob, grid)) {
104 tprintf(
"Inserted %d blobs into grid, %d rejected.\n", b_count, reject_count);
138 BLOBNBOX_IT blob_it(blobs);
139 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
157 int max_gutter_width,
int *required_shift) {
159 int bottom_x = v.
XAtY(bottom_y);
160 int top_x = v.
XAtY(top_y);
161 int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
164 int min_gap = max_gutter_width;
167 while ((blob = sidesearch.
NextSideSearch(right_to_left)) !=
nullptr) {
169 if (box.
bottom() >= top_y || box.
top() <= bottom_y) {
179 int mid_y = (box.
bottom() + box.
top()) / 2;
184 int tab_x = v.
XAtY(mid_y);
187 gap = tab_x - box.
right();
188 if (gap < 0 && box.
left() - tab_x < *required_shift) {
189 *required_shift = box.
left() - tab_x;
192 gap = box.
left() - tab_x;
193 if (gap < 0 && box.
right() - tab_x > *required_shift) {
194 *required_shift = box.
right() - tab_x;
197 if (gap > 0 && gap < min_gap) {
202 return min_gap - abs(*required_shift);
207 BLOBNBOX *bbox,
int *gutter_width,
int *neighbour_gap) {
210 int gutter_x = left ? box.
left() : box.
right();
211 int internal_x = left ? box.
right() : box.
left();
213 int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
214 *gutter_width = max_gutter;
218 *gutter_width += tab_gap;
222 tprintf(
"Looking in gutter\n");
227 if (gutter_bbox !=
nullptr) {
229 *gutter_width = left ? tab_x - gutter_box.
right() : gutter_box.
left() - tab_x;
231 if (*gutter_width >= max_gutter) {
233 TBOX gutter_box(box);
235 gutter_box.
set_left(tab_x - max_gutter - 1);
236 gutter_box.
set_right(tab_x - max_gutter);
238 if (tab_gutter < tab_x - 1) {
239 *gutter_width = tab_x - tab_gutter;
242 gutter_box.
set_left(tab_x + max_gutter);
243 gutter_box.
set_right(tab_x + max_gutter + 1);
245 if (tab_gutter > tab_x + 1) {
246 *gutter_width = tab_gutter - tab_x;
250 if (*gutter_width > max_gutter) {
251 *gutter_width = max_gutter;
255 tprintf(
"Looking for neighbour\n");
260 if (neighbour !=
nullptr) {
266 if (left && n_box.
left() < neighbour_edge) {
267 neighbour_edge = n_box.
left();
268 }
else if (!left && n_box.
right() > neighbour_edge) {
269 neighbour_edge = n_box.
right();
272 *neighbour_gap = left ? neighbour_edge - internal_x : internal_x - neighbour_edge;
306 int top_y = box.
top();
307 int bottom_y = box.
bottom();
308 int mid_y = (top_y + bottom_y) / 2;
309 int right = crossing ? (box.
left() + box.
right()) / 2 : box.
right();
310 int min_key, max_key;
313 while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) {
316 while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) {
325 int x = v->
XAtY(mid_y);
326 if (
x >= right && (v->
VOverlap(top_y, bottom_y) > 0 ||
328 if (best_v ==
nullptr ||
x < best_x) {
333 key_limit = v->
sort_key() + max_key - min_key;
338 if (v_it_.at_last() || (best_v !=
nullptr && v->
sort_key() > key_limit)) {
342 }
while (!v_it_.at_first());
351 int top_y = box.
top();
352 int bottom_y = box.
bottom();
353 int mid_y = (top_y + bottom_y) / 2;
354 int left = crossing ? (box.
left() + box.
right()) / 2 : box.
left();
355 int min_key, max_key;
358 while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) {
361 while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
370 int x = v->
XAtY(mid_y);
371 if (
x <= left && (v->
VOverlap(top_y, bottom_y) > 0 ||
373 if (best_v ==
nullptr ||
x > best_x) {
378 key_limit = v->
sort_key() - (max_key - min_key);
383 if (v_it_.at_first() || (best_v !=
nullptr && v->
sort_key() < key_limit)) {
387 }
while (!v_it_.at_last());
395 ICOORDELT_IT it(&column_widths_);
396 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
398 if (w->
x() - 1 <= width && width <= w->
y() + 1) {
408 return size1 > size2 * 2 || size2 > size1 * 2;
414 return size1 > size2 * 5 || size2 > size1 * 5;
423 int min_gutter_width,
double tabfind_aligned_gap_fraction,
427 ComputeColumnWidths(tab_win, part_grid);
431 if (!Deskew(hlines, image_blobs, block, deskew, reskew)) {
434 part_grid->
Deskew(*deskew);
435 ApplyTabConstraints();
436#ifndef GRAPHICS_DISABLED
437 if (textord_tabfind_show_finaltabs) {
464 BLOBNBOX_IT blob_it = &block->
blobs;
466 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
467 BLOBNBOX *large_blob = large_it.data();
468 if (large_blob->
owner() !=
nullptr) {
469 blob_it.add_to_end(large_it.extract());
474 tprintf(
"Moved %d large blobs to normal list\n", b_count);
475#ifndef GRAPHICS_DISABLED
489 *min_key = std::min(key1, key2);
490 *max_key = std::max(key1, key2);
493#ifndef GRAPHICS_DISABLED
497 TabVector_IT it(&vectors_);
498 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
513 double tabfind_aligned_gap_fraction,
TO_BLOCK *block) {
514#ifndef GRAPHICS_DISABLED
515 if (textord_tabfind_show_initialtabs) {
521 if (image_blobs !=
nullptr) {
525 ScrollView *initial_win = FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction);
526 FindAllTabVectors(min_gutter_width);
531#ifndef GRAPHICS_DISABLED
532 if (textord_tabfind_show_initialtabs && initial_win !=
nullptr) {
540#ifndef GRAPHICS_DISABLED
543static void DisplayBoxVector(
const std::vector<BLOBNBOX *> &boxes,
ScrollView *win) {
544 for (
auto boxe : boxes) {
545 TBOX box = boxe->bounding_box();
546 int left_x = box.
left();
547 int right_x = box.
right();
548 int top_y = box.
top();
549 int bottom_y = box.
bottom();
552 win->
Rectangle(left_x, bottom_y, right_x, top_y);
561ScrollView *TabFind::FindTabBoxes(
int min_gutter_width,
double tabfind_aligned_gap_fraction) {
562 left_tab_boxes_.clear();
563 right_tab_boxes_.clear();
565 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(
this);
566 gsearch.StartFullSearch();
568 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
569 if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
571 if (bbox->left_tab_type() !=
TT_NONE) {
572 left_tab_boxes_.push_back(bbox);
574 if (bbox->right_tab_type() !=
TT_NONE) {
575 right_tab_boxes_.push_back(bbox);
581 std::sort(left_tab_boxes_.begin(), left_tab_boxes_.end(), StdSortByBoxLeft<BLOBNBOX>);
582 std::sort(right_tab_boxes_.begin(), right_tab_boxes_.end(), StdSortRightToLeft<BLOBNBOX>);
583 ScrollView *tab_win =
nullptr;
584#ifndef GRAPHICS_DISABLED
585 if (textord_tabfind_show_initialtabs) {
590 DisplayBoxVector(left_tab_boxes_, tab_win);
591 DisplayBoxVector(right_tab_boxes_, tab_win);
598bool TabFind::TestBoxForTabs(BLOBNBOX *bbox,
int min_gutter_width,
599 double tabfind_aligned_gap_fraction) {
600 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(
this);
601 TBOX box = bbox->bounding_box();
603 int left_column_edge = bbox->left_rule();
604 int right_column_edge = bbox->right_rule();
606 int left_x = box.
left();
607 int right_x = box.right();
608 int top_y = box.top();
609 int bottom_y = box.bottom();
610 int height = box.height();
613 tprintf(
"Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", left_x, top_y, right_x,
614 bottom_y, left_column_edge, right_column_edge);
618 radsearch.StartRadSearch((left_x + right_x) / 2, (top_y + bottom_y) / 2, radius);
623 int min_spacing =
static_cast<int>(height * tabfind_aligned_gap_fraction);
624 if (min_gutter_width > min_spacing) {
625 min_spacing = min_gutter_width;
628 if (min_gutter_width > min_ragged_gutter) {
629 min_ragged_gutter = min_gutter_width;
631 int target_right = left_x - min_spacing;
632 int target_left = right_x + min_spacing;
648 bool is_left_tab =
true;
649 bool is_right_tab =
true;
650 bool maybe_ragged_left =
true;
651 bool maybe_ragged_right =
true;
652 int maybe_left_tab_up = 0;
653 int maybe_right_tab_up = 0;
654 int maybe_left_tab_down = 0;
655 int maybe_right_tab_down = 0;
656 if (bbox->leader_on_left()) {
658 maybe_ragged_left =
false;
659 maybe_left_tab_up = -INT32_MAX;
660 maybe_left_tab_down = -INT32_MAX;
662 if (bbox->leader_on_right()) {
663 is_right_tab =
false;
664 maybe_ragged_right =
false;
665 maybe_right_tab_up = -INT32_MAX;
666 maybe_right_tab_down = -INT32_MAX;
669 BLOBNBOX *neighbour =
nullptr;
670 while ((neighbour = radsearch.NextRadSearch()) !=
nullptr) {
671 if (neighbour == bbox) {
674 TBOX nbox = neighbour->bounding_box();
675 int n_left = nbox.left();
676 int n_right = nbox.right();
678 tprintf(
"Neighbour at (%d,%d)->(%d,%d)\n", n_left, nbox.bottom(), n_right, nbox.top());
682 if (n_right > right_column_edge || n_left < left_column_edge ||
683 left_x < neighbour->left_rule() || right_x > neighbour->right_rule()) {
686 int n_mid_x = (n_left + n_right) / 2;
687 int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
688 if (n_mid_x <= left_x && n_right >= target_right) {
693 if (n_mid_y < top_y) {
694 maybe_left_tab_down = -INT32_MAX;
696 if (n_mid_y > bottom_y) {
697 maybe_left_tab_up = -INT32_MAX;
699 }
else if (
NearlyEqual(left_x, n_left, alignment_tolerance)) {
703 if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
706 if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
707 ++maybe_left_tab_down;
709 }
else if (n_left < left_x && n_right >= left_x) {
712 tprintf(
"Maybe Not a left tab\n");
714 if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
717 if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
718 --maybe_left_tab_down;
721 if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
722 maybe_ragged_left =
false;
724 tprintf(
"Not a ragged left\n");
727 if (n_mid_x >= right_x && n_left <= target_left) {
731 is_right_tab =
false;
732 if (n_mid_y < top_y) {
733 maybe_right_tab_down = -INT32_MAX;
735 if (n_mid_y > bottom_y) {
736 maybe_right_tab_up = -INT32_MAX;
738 }
else if (
NearlyEqual(right_x, n_right, alignment_tolerance)) {
740 tprintf(
"Maybe a right tab\n");
742 if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
743 ++maybe_right_tab_up;
745 if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
746 ++maybe_right_tab_down;
748 }
else if (n_right > right_x && n_left <= right_x) {
751 tprintf(
"Maybe Not a right tab\n");
753 if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
754 --maybe_right_tab_up;
756 if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
757 --maybe_right_tab_down;
760 if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
761 maybe_ragged_right =
false;
763 tprintf(
"Not a ragged right\n");
766 if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
767 maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) {
771 if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
773 }
else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
776 bbox->set_left_tab_type(
TT_NONE);
778 if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
780 }
else if (maybe_ragged_right && ConfirmRaggedRight(bbox, min_ragged_gutter)) {
783 bbox->set_right_tab_type(
TT_NONE);
786 tprintf(
"Left result = %s, Right result=%s\n",
794 return bbox->left_tab_type() !=
TT_NONE || bbox->right_tab_type() !=
TT_NONE;
799bool TabFind::ConfirmRaggedLeft(BLOBNBOX *bbox,
int min_gutter) {
800 TBOX search_box(bbox->bounding_box());
801 search_box.set_right(search_box.left());
802 search_box.set_left(search_box.left() - min_gutter);
803 return NothingYOverlapsInBox(search_box, bbox->bounding_box());
808bool TabFind::ConfirmRaggedRight(BLOBNBOX *bbox,
int min_gutter) {
809 TBOX search_box(bbox->bounding_box());
810 search_box.set_left(search_box.right());
811 search_box.set_right(search_box.right() + min_gutter);
812 return NothingYOverlapsInBox(search_box, bbox->bounding_box());
817bool TabFind::NothingYOverlapsInBox(
const TBOX &search_box,
const TBOX &target_box) {
819 rsearch.StartRectSearch(search_box);
821 while ((blob = rsearch.NextRectSearch()) !=
nullptr) {
822 const TBOX &box = blob->bounding_box();
823 if (box.y_overlap(target_box) && !(box == target_box)) {
830void TabFind::FindAllTabVectors(
int min_gutter_width) {
832 TabVector_LIST dummy_vectors;
841 &dummy_vectors, &vertical_x, &vertical_y);
843 &vertical_x, &vertical_y);
844 if (vector_count > 0) {
849 dummy_vectors.clear();
850 for (
auto bbox : left_tab_boxes_) {
855 for (
auto bbox : right_tab_boxes_) {
861 tprintf(
"Beginning real tab search with vertical = %d,%d...\n", vertical_x, vertical_y);
869 &vertical_x, &vertical_y);
875 TabVector_IT v_it(&vectors_);
876 v_it.add_list_after(&dummy_vectors);
878 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
883 TabVector_LIST *vectors,
int *vertical_x,
int *vertical_y) {
884 TabVector_IT vector_it(
vectors);
885 int vector_count = 0;
888 const std::vector<BLOBNBOX *> &boxes = right ? right_tab_boxes_ : left_tab_boxes_;
889 for (
auto bbox : boxes) {
892 TabVector *vector = FindTabVector(search_size_multiple, min_gutter_width, alignment, bbox,
893 vertical_x, vertical_y);
894 if (vector !=
nullptr) {
896 vector_it.add_to_end(vector);
910TabVector *TabFind::FindTabVector(
int search_size_multiple,
int min_gutter_width,
911 TabAlignment alignment, BLOBNBOX *bbox,
int *vertical_x,
913 int height = std::max(
static_cast<int>(bbox->bounding_box().height()),
gridsize());
914 AlignedBlobParams align_params(*vertical_x, *vertical_y, height, search_size_multiple,
922void TabFind::SetVerticalSkewAndParallelize(
int vertical_x,
int vertical_y) {
928 v_it_.set_to_list(&vectors_);
929 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
930 TabVector *v = v_it_.data();
938void TabFind::SortVectors() {
940 v_it_.set_to_list(&vectors_);
944void TabFind::EvaluateTabs() {
945 TabVector_IT rule_it(&vectors_);
946 for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
947 TabVector *tab = rule_it.data();
948 if (!tab->IsSeparator()) {
952 tab->Print(
"Too few boxes");
954 delete rule_it.extract();
955 v_it_.set_to_list(&vectors_);
957 tab->Print(
"Evaluated tab");
966void TabFind::ComputeColumnWidths(ScrollView *tab_win, ColPartitionGrid *part_grid) {
967#ifndef GRAPHICS_DISABLED
968 if (tab_win !=
nullptr) {
974 STATS col_widths(0, col_widths_size);
975 ApplyPartitionsToColumnWidths(part_grid, &col_widths);
976#ifndef GRAPHICS_DISABLED
977 if (tab_win !=
nullptr) {
985 MakeColumnWidths(col_widths_size, &col_widths);
987 ApplyPartitionsToColumnWidths(part_grid,
nullptr);
996void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid *part_grid, STATS *col_widths) {
1000 gsearch.StartFullSearch();
1002 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1003 BLOBNBOX_C_IT blob_it(part->boxes());
1004 if (blob_it.empty()) {
1007 BLOBNBOX *left_blob = blob_it.data();
1008 blob_it.move_to_last();
1009 BLOBNBOX *right_blob = blob_it.data();
1010 TabVector *left_vector =
LeftTabForBox(left_blob->bounding_box(),
true,
false);
1011 if (left_vector ==
nullptr || left_vector->IsRightTab()) {
1014 TabVector *right_vector =
RightTabForBox(right_blob->bounding_box(),
true,
false);
1015 if (right_vector ==
nullptr || right_vector->IsLeftTab()) {
1019 int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1020 int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1022 int width = line_right - line_left;
1023 if (col_widths !=
nullptr) {
1024 AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1030 ICOORDELT_IT it(&column_widths_);
1031 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1032 ICOORDELT *w = it.data();
1033 if (NearlyEqual<int>(width, w->y(), 1)) {
1035 if (true_width <= w->
y() && true_width > w->x()) {
1036 w->set_x(true_width);
1048void TabFind::MakeColumnWidths(
int col_widths_size, STATS *col_widths) {
1049 ICOORDELT_IT w_it(&column_widths_);
1050 int total_col_count = col_widths->get_total();
1051 while (col_widths->get_total() > 0) {
1052 int width = col_widths->mode();
1053 int col_count = col_widths->pile_count(width);
1054 col_widths->add(width, -col_count);
1056 for (
int left = width - 1; left > 0 && col_widths->pile_count(left) > 0; --left) {
1057 int new_count = col_widths->pile_count(left);
1058 col_count += new_count;
1059 col_widths->add(left, -new_count);
1061 for (
int right = width + 1; right < col_widths_size && col_widths->pile_count(right) > 0;
1063 int new_count = col_widths->pile_count(right);
1064 col_count += new_count;
1065 col_widths->add(right, -new_count);
1069 auto *w =
new ICOORDELT(0, width);
1070 w_it.add_after_then_move(w);
1073 100.0 * col_count / total_col_count);
1081void TabFind::MarkVerticalText() {
1083 tprintf(
"Checking for vertical lines\n");
1086 gsearch.StartFullSearch();
1087 BLOBNBOX *blob =
nullptr;
1088 while ((blob = gsearch.NextFullSearch()) !=
nullptr) {
1092 if (blob->UniquelyVertical()) {
1098int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1099 TabVector_IT it(lines);
1100 int prev_right = -1;
1102 STATS gaps(0, max_gap - 1);
1103 STATS heights(0, max_gap - 1);
1104 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1105 TabVector *v = it.data();
1106 TabVector *partner = v->GetSinglePartner();
1107 if (!v->IsLeftTab() || v->IsSeparator() || !partner) {
1110 heights.add(partner->startpt().x() - v->startpt().x(), 1);
1111 if (prev_right > 0 && v->startpt().x() > prev_right) {
1112 gaps.add(v->startpt().x() - prev_right, 1);
1114 prev_right = partner->startpt().x();
1117 tprintf(
"TabGutter total %d median_gap %.2f median_hgt %.2f\n", gaps.get_total(),
1118 gaps.median(), heights.median());
1123 return static_cast<int>(gaps.median());
1131BLOBNBOX *TabFind::AdjacentBlob(
const BLOBNBOX *bbox,
bool look_left,
bool ignore_images,
1132 double min_overlap_fraction,
int gap_limit,
int top_y,
1134 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(
this);
1135 const TBOX &box = bbox->bounding_box();
1136 int left = box.left();
1137 int right = box.right();
1138 int mid_x = (left + right) / 2;
1139 sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1142 BLOBNBOX *result =
nullptr;
1143 BLOBNBOX *neighbour =
nullptr;
1144 while ((neighbour = sidesearch.NextSideSearch(look_left)) !=
nullptr) {
1146 tprintf(
"Adjacent blob: considering box:");
1147 neighbour->bounding_box().print();
1149 if (neighbour == bbox || (ignore_images && neighbour->region_type() <
BRT_UNKNOWN)) {
1152 const TBOX &nbox = neighbour->bounding_box();
1153 int n_top_y = nbox.top();
1154 int n_bottom_y = nbox.bottom();
1155 int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
1156 int height = top_y - bottom_y;
1157 int n_height = n_top_y - n_bottom_y;
1158 if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
1159 (min_overlap_fraction == 0.0 || !
DifferentSizes(height, n_height))) {
1160 int n_left = nbox.left();
1161 int n_right = nbox.right();
1162 int h_gap = std::max(n_left, left) - std::min(n_right, right);
1163 int n_mid_x = (n_left + n_right) / 2;
1164 if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1165 if (h_gap > gap_limit) {
1168 tprintf(
"Giving up due to big gap = %d vs %d\n", h_gap, gap_limit);
1172 if (h_gap > 0 && (look_left ? neighbour->right_tab_type() : neighbour->left_tab_type()) >=
1177 tprintf(
"Collision with like tab of type %d at %d,%d\n",
1178 look_left ? neighbour->right_tab_type() : neighbour->left_tab_type(), n_left,
1185 if (result ==
nullptr || h_gap < best_gap) {
1199 tprintf(
"Insufficient overlap\n");
1203 tprintf(
"Giving up due to end of search\n");
1212void TabFind::AddPartnerVector(BLOBNBOX *left_blob, BLOBNBOX *right_blob, TabVector *left,
1214 const TBOX &left_box = left_blob->bounding_box();
1215 const TBOX &right_box = right_blob->bounding_box();
1216 if (left->IsSeparator()) {
1219 if (v !=
nullptr && v != left && v->IsLeftTab() &&
1220 v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1222 left->ExtendToBox(left_blob);
1227 v_it_.move_to_first();
1230 if (right->IsSeparator()) {
1233 tprintf(
"Box edge (%d,%d-%d)", right_box.right(), right_box.bottom(), right_box.top());
1234 right->Print(
" looking for improvement for");
1237 if (v !=
nullptr && v != right && v->IsRightTab() &&
1238 v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1240 right->ExtendToBox(right_blob);
1242 right->Print(
"Extended vector");
1248 v_it_.move_to_first();
1250 right->Print(
"Created new vector");
1254 left->AddPartner(right);
1255 right->AddPartner(left);
1260void TabFind::CleanupTabs() {
1264 TabVector_IT it(&vectors_);
1265 TabVector_IT dead_it(&dead_vectors_);
1266 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1267 TabVector *v = it.data();
1268 if (v->IsSeparator() || v->Partnerless()) {
1269 dead_it.add_after_then_move(it.extract());
1270 v_it_.set_to_list(&vectors_);
1279 BLOBNBOX_IT it(blobs);
1280 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281 it.data()->rotate_box(rotation);
1287bool TabFind::Deskew(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs,
TO_BLOCK *block,
1289 ComputeDeskewVectors(deskew, reskew);
1300 TabVector_IT h_it(hlines);
1301 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1302 TabVector *h = h_it.data();
1305 TabVector_IT d_it(&dead_vectors_);
1306 for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1307 TabVector *d = d_it.data();
1310 SetVerticalSkewAndParallelize(0, 1);
1313 grid_box.rotate_large(*deskew);
1314 Init(
gridsize(), grid_box.botleft(), grid_box.topright());
1324 TabVector_LIST *horizontal_lines,
int *min_gutter_width) {
1328 TabVector_LIST ex_verticals;
1329 TabVector_IT ex_v_it(&ex_verticals);
1330 TabVector_LIST vlines;
1331 TabVector_IT v_it(&vlines);
1332 while (!v_it_.empty()) {
1336 ex_v_it.add_after_then_move(v);
1338 v_it.add_after_then_move(v);
1345 int median_gutter = FindMedianGutterWidth(&vlines);
1346 if (median_gutter > *min_gutter_width) {
1347 *min_gutter_width = median_gutter;
1350 TabVector_IT h_it(horizontal_lines);
1351 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1355 v_it_.add_list_after(horizontal_lines);
1356 v_it_.move_to_first();
1357 h_it.set_to_list(horizontal_lines);
1358 h_it.add_list_after(&ex_verticals);
1369 v_it_.move_to_first();
1370 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1371 if (!v_it_.data()->IsSeparator()) {
1372 delete v_it_.extract();
1381 TabVector_LIST temp_list;
1382 TabVector_IT temp_it(&temp_list);
1383 v_it_.move_to_first();
1387 while (!v_it_.empty()) {
1391 temp_it.add_before_then_move(v);
1393 v_it_.add_list_after(&temp_list);
1394 v_it_.move_to_first();
1397 int tmp = grid_box.
left();
1404void TabFind::ComputeDeskewVectors(
FCOORD *deskew,
FCOORD *reskew) {
1406 length = sqrt(length);
1409 reskew->
set_x(deskew->
x());
1410 reskew->
set_y(-deskew->
y());
1415void TabFind::ApplyTabConstraints() {
1416 TabVector_IT it(&vectors_);
1417 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1418 TabVector *v = it.data();
1419 v->SetupConstraints();
1421 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1422 TabVector *v = it.data();
1426 v->SetupPartnerConstraints();
1431 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1432 TabVector *v = it.data();
1433 if (!v->IsRightTab()) {
1437 TabVector_IT partner_it(it);
1438 for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1439 TabVector *partner = partner_it.data();
1440 if (!partner->IsLeftTab() || !v->VOverlap(*partner)) {
1443 v->SetupPartnerConstraints(partner);
1447 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1448 TabVector *v = it.data();
1449 if (!v->IsSeparator()) {
1450 v->ApplyConstraints();
#define BOOL_VAR(name, val, comment)
const int kColumnWidthFactor
const int kTabRadiusFactor
bool NearlyEqual(T x, T y, T tolerance)
const int kMinVerticalSearch
const int kRaggedGutterMultiple
void tprintf(const char *format,...)
const double kMaxGutterWidthAbsolute
const double kCosMaxSkewAngle
const int kMaxVerticalSearch
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const int kMaxRaggedSearch
const double kLineFragmentAspectRatio
int textord_debug_tabfind
const double kMinFractionalLinesInColumn
const double kAlignedFraction
const int kMinLinesInColumn
const double kMinColumnWidth
const int kMinEvaluatedTabs
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
const TBOX & bounding_box() const
BlobRegionType region_type() const
void set_left_rule(int new_left)
void set_left_crossing_rule(int new_left)
BlobTextFlowType flow() const
void set_right_crossing_rule(int new_right)
tesseract::ColPartition * owner() const
void set_right_rule(int new_right)
bool joined_to_prev() const
static bool UnMergeableType(BlobRegionType type)
BLOBNBOX_LIST small_blobs
void plot_graded_blobs(ScrollView *to_win)
void plot_noise_blobs(ScrollView *to_win)
void DeleteUnownedNoise()
BLOBNBOX_LIST large_blobs
BLOBNBOX_LIST noise_blobs
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
TDimension y() const
access_function
TDimension x() const
access function
void set_y(float yin)
rewrite function
void set_x(float xin)
rewrite function
TDimension height() const
const ICOORD & botleft() const
void rotate_large(const FCOORD &vec)
const ICOORD & topright() const
TDimension bottom() const
static bool WithinTestRegion(int detail_level, int x, int y)
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
BBC * NextSideSearch(bool right_to_left)
void StartSideSearch(int x, int ymin, int ymax)
const ICOORD & bleft() const
const ICOORD & tright() const
void DisplayBoxes(ScrollView *window)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
ScrollView * MakeWindow(int x, int y, const char *window_name)
void Deskew(const FCOORD &deskew)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
static bool DifferentSizes(int size1, int size2)
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
bool CommonWidth(int width)
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
int resolution_
Of source image in pixels per inch.
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
void TidyBlobs(TO_BLOCK *block)
static bool VeryDifferentSizes(int size1, int size2)
void SetBlockRuleEdges(TO_BLOCK *block)
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
TabVector_LIST * vectors()
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
ICOORD vertical_skew_
Estimate of true vertical in this image.
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
ScrollView * DisplayTabVectors(ScrollView *tab_win)
static int SortKey(const ICOORD &vertical, int x, int y)
void Rotate(const FCOORD &rotation)
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
int ExtendedOverlap(int top_y, int bottom_y) const
void Display(ScrollView *tab_win)
int VOverlap(const TabVector &other) const
static int SortVectorsByKey(const void *v1, const void *v2)
void Rectangle(int x1, int y1, int x2, int y2)