20# include "config_auto.h"
56 "max fraction of mean blob width allowed for vertical gaps in "
60 "Fraction of box matches required to declare a line vertical");
65 auto *constraints =
new TabConstraint_LIST;
66 TabConstraint_IT it(constraints);
67 it.add_to_end(constraint);
80 int y_min = -INT32_MAX;
81 int y_max = INT32_MAX;
83 tprintf(
"Testing constraint compatibility\n");
85 GetConstraints(list1, &y_min, &y_max);
86 GetConstraints(list2, &y_min, &y_max);
88 tprintf(
"Resulting range = [%d,%d]\n", y_min, y_max);
90 return y_max >= y_min;
99 TabConstraint_IT it(list2);
101 tprintf(
"Merging constraints\n");
104 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
107 constraint->vector_->
Print(
"Merge");
109 if (constraint->is_top_) {
116 it.add_list_before(list2);
123 int y_min = -INT32_MAX;
124 int y_max = INT32_MAX;
125 GetConstraints(constraints, &y_min, &y_max);
126 int y = (y_min + y_max) / 2;
127 TabConstraint_IT it(constraints);
128 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
131 if (constraint->is_top_) {
144 y_min_ = vector->
endpt().
y();
153void TabConstraint::GetConstraints(TabConstraint_LIST *constraints,
int *y_min,
int *y_max) {
154 TabConstraint_IT it(constraints);
155 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
158 tprintf(
"Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
159 constraint->vector_->Print(
" for");
161 *y_min = std::max(*y_min, constraint->y_min_);
162 *y_max = std::min(*y_max, constraint->y_max_);
177 int extended_end_y, BLOBNBOX_CLIST *good_points,
int *vertical_x,
179 auto *vector =
new TabVector(extended_start_y, extended_end_y, alignment, good_points);
180 if (!vector->
Fit(vertical,
false)) {
185 vertical = vector->endpt_ - vector->startpt_;
187 *vertical_x += vertical.
x() * weight;
188 *vertical_y += vertical.
y() * weight;
198 : extended_ymin_(src.extended_ymin_)
199 , extended_ymax_(src.extended_ymax_)
201 , needs_evaluation_(true)
202 , alignment_(alignment) {
203 BLOBNBOX_C_IT it(&boxes_);
214 SortKey(vertical_skew, (startpt_.
x() + endpt_.
x()) / 2, (startpt_.
y() + endpt_.
y()) / 2);
216 Print(
"Constructed a new tab vector:");
227 copy->startpt_ = startpt_;
228 copy->endpt_ = endpt_;
229 copy->alignment_ = alignment_;
230 copy->extended_ymax_ = extended_ymax_;
231 copy->extended_ymin_ = extended_ymin_;
232 copy->intersects_other_lines_ = intersects_other_lines_;
240 BLOBNBOX_C_IT it(&boxes_);
244 while (!it.at_last() && box.
top() <= new_box.
top()) {
245 if (blob == new_blob) {
252 if (box.
top() >= new_box.
top()) {
253 it.add_before_stay_put(new_blob);
259 it.add_after_stay_put(new_blob);
265 startpt_.
set_y(start_y);
275 startpt_.
rotate(rotation);
277 int dx = endpt_.
x() - startpt_.
x();
278 int dy = endpt_.
y() - startpt_.
y();
279 if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
299 TabVector_C_IT it(&partners_);
301 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
303 if (partner->top_constraints_ ==
nullptr || partner->bottom_constraints_ ==
nullptr) {
304 partner->
Print(
"Impossible: has no constraints");
305 Print(
"This vector has it as a partner");
308 if (prev_partner ==
nullptr) {
316 partner->bottom_constraints_)) {
318 partner->bottom_constraints_);
321 prev_partner = partner;
343 if (top_constraints_ !=
nullptr) {
346 if (bottom_constraints_ !=
nullptr) {
354 TabVector_IT it1(vectors);
355 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
357 TabVector_IT it2(it1);
358 for (it2.forward(); !it2.at_first(); it2.forward()) {
360 if (v2->
SimilarTo(vertical, *v1, grid)) {
364 v2->
Print(
"Merging");
365 v1->
Print(
"by deleting");
369 v2->
Print(
"Producing");
372 merged_vector -= v2->
startpt();
374 v2->
Print(
"Garbage result of merge?");
391 int v_scale = abs(vertical.
y());
406 if (grid ==
nullptr) {
414 int top_y = mover->endpt_.
y();
415 int bottom_y = mover->startpt_.
y();
416 int left = std::min(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
417 int right = std::max(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
418 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
430 if (box.
top() > bottom_y) {
433 if (box.
bottom() < top_y) {
437 int right_at_box = left_at_box;
439 right_at_box += shift;
441 left_at_box -= shift;
443 if (std::min(right_at_box,
static_cast<int>(box.
right())) >
444 std::max(left_at_box,
static_cast<int>(box.
left()))) {
455 extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
456 extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
458 alignment_ = other->alignment_;
461 BLOBNBOX_C_IT it1(&boxes_);
462 BLOBNBOX_C_IT it2(&other->boxes_);
463 while (!it2.empty()) {
469 while (box1.
bottom() < box2.
bottom() && !it1.at_last()) {
475 it1.add_to_end(bbox2);
476 }
else if (bbox1 != bbox2) {
477 it1.add_before_stay_put(bbox2);
492 TabVector_C_IT it(&partners_);
495 if (it.data() == partner) {
499 it.add_after_then_move(partner);
504 TabVector_C_IT it(&partners_);
505 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
506 if (it.data() == other) {
514static const char *
const kAlignmentNames[] = {
"Left Aligned",
"Left Ragged",
"Center",
515 "Right Aligned",
"Right Ragged",
"Separator"};
520 "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
522 prefix, kAlignmentNames[alignment_], startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y(),
523 mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length());
529 BLOBNBOX_C_IT it(&boxes_);
530 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
537#ifndef GRAPHICS_DISABLED
554 tab_win->
Line(startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y());
556 tab_win->
Line(startpt_.
x(), startpt_.
y(), startpt_.
x(), extended_ymin_);
557 tab_win->
Line(endpt_.
x(), extended_ymax_, endpt_.
x(), endpt_.
y());
558 auto score_string = std::to_string(percent_score_);
560 tab_win->
Text(startpt_.
x(), startpt_.
y(), score_string.c_str());
570 if (needs_evaluation_) {
583 needs_evaluation_ =
false;
584 int length = endpt_.
y() - startpt_.
y();
585 if (length == 0 || boxes_.empty()) {
587 Print(
"Zero length in evaluate");
591 BLOBNBOX_C_IT it(&boxes_);
593 int height_count = 0;
594 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
597 int height = box.
height();
598 mean_height += height;
601 if (height_count > 0) {
602 mean_height /= height_count;
611 STATS gutters(0, max_gutter);
615 int num_deleted_boxes = 0;
616 bool text_on_image =
false;
618 const TBOX *prev_good_box =
nullptr;
619 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
622 int mid_y = (box.
top() + box.
bottom()) / 2;
625 tprintf(
"After already deleting %d boxes, ", num_deleted_boxes);
626 Print(
"Starting evaluation");
634 int tab_x =
XAtY(mid_y);
640 tprintf(
"Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.
left(), box.
bottom(),
641 box.
right(), box.
top(), gutter_width, neighbour_gap);
647 gutters.
add(gutter_width, 1);
651 if (prev_good_box !=
nullptr) {
652 int vertical_gap = box.
bottom() - prev_good_box->
top();
653 double size1 = sqrt(
static_cast<double>(prev_good_box->
area()));
654 double size2 = sqrt(
static_cast<double>(box.
area()));
656 good_length += vertical_gap;
659 tprintf(
"Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap,
666 prev_good_box = &box;
668 text_on_image =
true;
673 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.
left(), box.
bottom(),
674 box.
right(), box.
top(), gutter_width, neighbour_gap);
681 Print(
"Evaluating:");
686 int search_top = endpt_.
y();
687 int search_bottom = startpt_.
y();
690 prev_good_box =
nullptr;
691 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
694 int mid_y = (box.
top() + box.
bottom()) / 2;
698 int tab_x =
XAtY(mid_y);
711 if (prev_good_box ==
nullptr) {
714 search_bottom = box.
top();
716 prev_good_box = &box;
717 search_top = box.
bottom();
721 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.
left(),
722 box.
bottom(), box.
right(), box.
top(), gutter_width, median_gutter);
730 if (prev_good_box !=
nullptr) {
733 int length = endpt_.
y() - startpt_.
y();
734 percent_score_ = 100 * good_length / length;
735 if (num_deleted_boxes > 0) {
738 if (boxes_.empty()) {
744 if (search_bottom > search_top) {
745 search_bottom = startpt_.
y();
746 search_top = endpt_.
y();
750 min_gutter_width *= mean_height;
752 if (median_gutter > max_gutter_width) {
753 max_gutter_width = median_gutter;
755 int gutter_width = finder->
GutterWidth(search_bottom, search_top, *
this, text_on_image,
756 max_gutter_width, &required_shift);
757 if (gutter_width < min_gutter_width) {
759 tprintf(
"Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width,
762 boxes_.shallow_clear();
765 tprintf(
"Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width,
766 min_gutter_width, required_shift);
774 Print(
"Evaluation complete:");
784 needs_refit_ =
false;
785 if (boxes_.empty()) {
788 if (!force_parallel) {
795 sort_key_ =
SortKey(vertical, midpt.
x(), midpt.
y());
796 return startpt_.
y() != endpt_.
y();
798 if (!force_parallel && !
IsRagged()) {
801 BLOBNBOX_C_IT it(&boxes_);
803 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
808 linepoints.
Add(boxpt);
811 linepoints.
Add(top_pt);
814 linepoints.
Fit(&startpt_, &endpt_);
815 if (startpt_.
y() != endpt_.
y()) {
817 vertical -= startpt_;
820 int start_y = startpt_.
y();
821 int end_y = endpt_.
y();
822 sort_key_ =
IsLeftTab() ? INT32_MAX : -INT32_MAX;
823 BLOBNBOX_C_IT it(&boxes_);
828 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
831 mean_width_ += box.
width();
836 int bottom_y = box.
bottom();
837 int top_y = box.
top();
838 int key =
SortKey(vertical, x1, bottom_y);
841 startpt_ =
ICOORD(x1, bottom_y);
843 key =
SortKey(vertical, x1, top_y);
846 startpt_ =
ICOORD(x1, top_y);
855 if (width_count > 0) {
856 mean_width_ = (mean_width_ + width_count - 1) / width_count;
858 endpt_ = startpt_ + vertical;
859 needs_evaluation_ =
true;
860 if (start_y != end_y) {
862 startpt_.
set_x(
XAtY(vertical, sort_key_, start_y));
863 startpt_.
set_y(start_y);
864 endpt_.
set_x(
XAtY(vertical, sort_key_, end_y));
873 if (!partners_.singleton()) {
876 TabVector_C_IT partner_it(&partners_);
884 if (!partners_.singleton()) {
887 TabVector_C_IT partner_it(&partners_);
889 BLOBNBOX_C_IT box_it1(&boxes_);
890 BLOBNBOX_C_IT box_it2(&partner->boxes_);
894 Print(
"Testing for vertical text");
895 partner->
Print(
" partner");
898 int num_unmatched = 0;
899 int total_widths = 0;
904 STATS gaps(0, width * 2 - 1);
906 box_it2.mark_cycle_pt();
907 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
910 if (prev_bbox !=
nullptr) {
913 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
923 total_widths += box.
width();
926 if (num_unmatched + num_matched == 0) {
929 double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
934 (gaps.
get_total() > 0 && num_matched >= min_box_match && gaps.
median() <= max_gap);
937 "gaps=%d, matched=%d, unmatched=%d, min_match=%d "
938 "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
939 gaps.
get_total(), num_matched, num_unmatched, min_box_match, gaps.
median(), avg_width,
940 max_gap, is_vertical ?
"Yes" :
"No");
942 return (is_vertical) ? partner :
nullptr;
947 BLOBNBOX_CLIST *boxes)
948 : extended_ymin_(extended_ymin)
949 , extended_ymax_(extended_ymax)
954 , needs_evaluation_(true)
955 , alignment_(alignment)
956 , top_constraints_(nullptr)
957 , bottom_constraints_(nullptr) {
958 BLOBNBOX_C_IT it(&boxes_);
959 it.add_list_after(boxes);
965void TabVector::Delete(TabVector *replacement) {
966 TabVector_C_IT it(&partners_);
967 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
969 TabVector_C_IT p_it(&partner->partners_);
972 TabVector *partner_replacement = replacement;
973 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
975 if (p_partner == partner_replacement) {
976 partner_replacement =
nullptr;
982 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
984 if (p_partner ==
this) {
986 if (partner_replacement !=
nullptr) {
987 p_it.add_before_stay_put(partner_replacement);
991 if (partner_replacement !=
nullptr) {
992 partner_replacement->AddPartner(partner);
#define double_VAR(name, val, comment)
double textord_tabvector_vertical_gap_fraction
const double kMinAlignedGutter
void tprintf(const char *format,...)
int IntCastRounded(double x)
const int kGutterMultiple
bool textord_debug_printable
const int kSimilarVectorDist
const double kMinRaggedGutter
int textord_debug_tabfind
const double kLineCountReciprocal
const int kMaxFillinMultiple
const double kMinGutterFraction
const int kGutterToNeighbourRatio
const int kSimilarRaggedDist
double textord_tabvector_vertical_box_ratio
const TBOX & bounding_box() const
BlobRegionType region_type() const
BlobTextFlowType flow() const
void Add(const ICOORD &pt)
double Fit(ICOORD *pt1, ICOORD *pt2)
void rotate(const FCOORD &vec)
void set_x(TDimension xin)
rewrite function
TDimension y() const
access_function
void set_y(TDimension yin)
rewrite function
TDimension x() const
access function
TDimension height() const
const ICOORD & botleft() const
const ICOORD & topright() const
TDimension bottom() const
void add(int32_t value, int32_t count)
int32_t get_total() const
static bool WithinTestRegion(int detail_level, int x, int y)
void StartVerticalSearch(int xmin, int xmax, int y)
BBC * NextVerticalSearch(bool top_to_bottom)
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
static void CreateConstraint(TabVector *vector, bool is_top)
static void ApplyConstraints(TabConstraint_LIST *constraints)
static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
void set_bottom_constraints(TabConstraint_LIST *constraints)
static int SortKey(const ICOORD &vertical, int x, int y)
const ICOORD & endpt() const
void Rotate(const FCOORD &rotation)
void AddPartner(TabVector *partner)
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
int ExtendedOverlap(int top_y, int bottom_y) const
int extended_ymin() const
bool IsAPartner(const TabVector *other)
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
TabVector * VerticalTextlinePartner()
void Evaluate(const ICOORD &vertical, TabFind *finder)
int extended_ymax() const
void SetupPartnerConstraints()
void set_top_constraints(TabConstraint_LIST *constraints)
bool Fit(ICOORD vertical, bool force_parallel)
void SetYStart(int start_y)
void Print(const char *prefix)
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
const ICOORD & startpt() const
void Display(ScrollView *tab_win)
TabVector * ShallowCopy() const
void MergeWith(const ICOORD &vertical, TabVector *other)
void Debug(const char *prefix)
TabVector * GetSinglePartner()
void ExtendToBox(BLOBNBOX *blob)
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
void Line(int x1, int y1, int x2, int y2)
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
void Text(int x, int y, const char *mystring)