tesseract v5.3.3.20231005
tesseract::ColPartitionGrid Class Reference

#include <colpartitiongrid.h>

Inheritance diagram for tesseract::ColPartitionGrid:
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT > tesseract::GridBase

Public Member Functions

 ColPartitionGrid ()=default
 
 ColPartitionGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~ColPartitionGrid () override=default
 
void HandleClick (int x, int y) override
 
void Merges (const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb)
 
bool MergePart (const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, ColPartition *part)
 
int ComputeTotalOverlap (ColPartitionGrid **overlap_grid)
 
void FindOverlappingPartitions (const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
 
ColPartitionBestMergeCandidate (const ColPartition *part, ColPartition_CLIST *candidates, bool debug, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, int *overlap_increase)
 
void SplitOverlappingPartitions (ColPartition_LIST *big_parts)
 
bool GridSmoothNeighbours (BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation)
 
void ReflectInYAxis ()
 
void Deskew (const FCOORD &deskew)
 
void ExtractPartitionsAsBlocks (BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void SetTabStops (TabFind *tabgrid)
 
bool MakeColPartSets (PartSetVector *part_sets)
 
ColPartitionSetMakeSingleColumnSet (WidthCallback cb)
 
void ClaimBoxes ()
 
void ReTypeBlobs (BLOBNBOX_LIST *im_blobs)
 
void RecomputeBounds (int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
 
void GridFindMargins (ColPartitionSet **best_columns)
 
void ListFindMargins (ColPartitionSet **best_columns, ColPartition_LIST *parts)
 
void DeleteParts ()
 
void DeleteUnknownParts (TO_BLOCK *block)
 
void DeleteNonLeaderParts ()
 
void FindFigureCaptions ()
 
void FindPartitionPartners ()
 
void FindPartitionPartners (bool upper, ColPartition *part)
 
void FindVPartitionPartners (bool to_the_left, ColPartition *part)
 
void RefinePartitionPartners (bool get_desperate)
 
- Public Member Functions inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(ColPartition *))
 
void InsertBBox (bool h_spread, bool v_spread, ColPartition *bbox)
 
void InsertPixPtBBox (int left, int bottom, Image pix, ColPartition *bbox)
 
void RemoveBBox (ColPartition *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Protected Attributes inherited from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
ColPartition_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 32 of file colpartitiongrid.h.

Constructor & Destructor Documentation

◆ ColPartitionGrid() [1/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( )
default

◆ ColPartitionGrid() [2/2]

tesseract::ColPartitionGrid::ColPartitionGrid ( int  gridsize,
const ICOORD bleft,
const ICOORD tright 
)

Definition at line 67 of file colpartitiongrid.cpp.

69 : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75

◆ ~ColPartitionGrid()

tesseract::ColPartitionGrid::~ColPartitionGrid ( )
overridedefault

Member Function Documentation

◆ BestMergeCandidate()

ColPartition * tesseract::ColPartitionGrid::BestMergeCandidate ( const ColPartition part,
ColPartition_CLIST *  candidates,
bool  debug,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb,
int *  overlap_increase 
)

Definition at line 410 of file colpartitiongrid.cpp.

414 {
415 if (overlap_increase != nullptr) {
416 *overlap_increase = 0;
417 }
418 if (candidates->empty()) {
419 return nullptr;
420 }
421 int ok_overlap =
422 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
423 // The best neighbour to merge with is the one that causes least
424 // total pairwise overlap among all the neighbours.
425 // If more than one offers the same total overlap, choose the one
426 // with the least total area.
427 const TBOX &part_box = part->bounding_box();
428 ColPartition_C_IT it(candidates);
429 ColPartition *best_candidate = nullptr;
430 // Find the total combined box of all candidates and the original.
431 TBOX full_box(part_box);
432 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
433 ColPartition *candidate = it.data();
434 full_box += candidate->bounding_box();
435 }
436 // Keep valid neighbours in a list.
437 ColPartition_CLIST neighbours;
438 // Now run a rect search of the merged box for overlapping neighbours, as
439 // we need anything that might be overlapped by the merged box.
440 FindOverlappingPartitions(full_box, part, &neighbours);
441 if (debug) {
442 tprintf("Finding best merge candidate from %d, %d neighbours for box:",
443 candidates->length(), neighbours.length());
444 part_box.print();
445 }
446 // If the best increase in overlap is positive, then we also check the
447 // worst non-candidate overlap. This catches the case of multiple good
448 // candidates that overlap each other when merged. If the worst
449 // non-candidate overlap is better than the best overlap, then return
450 // the worst non-candidate overlap instead.
451 ColPartition_CLIST non_candidate_neighbours;
452 non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
453 &neighbours, candidates);
454 int worst_nc_increase = 0;
455 int best_increase = INT32_MAX;
456 int best_area = 0;
457 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
458 ColPartition *candidate = it.data();
459 if (confirm_cb != nullptr && !confirm_cb(part, candidate)) {
460 if (debug) {
461 tprintf("Candidate not confirmed:");
462 candidate->bounding_box().print();
463 }
464 continue;
465 }
466 int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
467 const TBOX &cand_box = candidate->bounding_box();
468 if (best_candidate == nullptr || increase < best_increase) {
469 best_candidate = candidate;
470 best_increase = increase;
471 best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
472 if (debug) {
473 tprintf("New best merge candidate has increase %d, area %d, over box:",
474 increase, best_area);
475 full_box.print();
476 candidate->Print();
477 }
478 } else if (increase == best_increase) {
479 int area = cand_box.bounding_union(part_box).area() - cand_box.area();
480 if (area < best_area) {
481 best_area = area;
482 best_candidate = candidate;
483 }
484 }
485 increase = IncreaseInOverlap(part, candidate, ok_overlap,
486 &non_candidate_neighbours);
487 if (increase > worst_nc_increase) {
488 worst_nc_increase = increase;
489 }
490 }
491 if (best_increase > 0) {
492 // If the worst non-candidate increase is less than the best increase
493 // including the candidates, then all the candidates can merge together
494 // and the increase in outside overlap would be less, so use that result,
495 // but only if each candidate is either a good diacritic merge with part,
496 // or an ok merge candidate with all the others.
497 // See TestCompatibleCandidates for more explanation and a picture.
498 if (worst_nc_increase < best_increase &&
499 TestCompatibleCandidates(*part, debug, candidates)) {
500 best_increase = worst_nc_increase;
501 }
502 }
503 if (overlap_increase != nullptr) {
504 *overlap_increase = best_increase;
505 }
506 return best_candidate;
507}
@ TBOX
const double kTinyEnoughTextlineOverlapFraction
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)

◆ ClaimBoxes()

void tesseract::ColPartitionGrid::ClaimBoxes ( )

Definition at line 885 of file colpartitiongrid.cpp.

885 {
886 // Iterate the ColPartitions in the grid.
887 ColPartitionGridSearch gsearch(this);
888 gsearch.StartFullSearch();
889 ColPartition *part;
890 while ((part = gsearch.NextFullSearch()) != nullptr) {
891 part->ClaimBoxes();
892 }
893}
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919

◆ ComputeTotalOverlap()

int tesseract::ColPartitionGrid::ComputeTotalOverlap ( ColPartitionGrid **  overlap_grid)

Definition at line 322 of file colpartitiongrid.cpp.

322 {
323 int total_overlap = 0;
324 // Iterate the ColPartitions in the grid.
325 ColPartitionGridSearch gsearch(this);
326 gsearch.StartFullSearch();
327 ColPartition *part;
328 while ((part = gsearch.NextFullSearch()) != nullptr) {
329 ColPartition_CLIST neighbors;
330 const TBOX &part_box = part->bounding_box();
331 FindOverlappingPartitions(part_box, part, &neighbors);
332 ColPartition_C_IT n_it(&neighbors);
333 bool any_part_overlap = false;
334 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
335 const TBOX &n_box = n_it.data()->bounding_box();
336 int overlap = n_box.intersection(part_box).area();
337 if (overlap > 0 && overlap_grid != nullptr) {
338 if (*overlap_grid == nullptr) {
339 *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
340 }
341 (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
342 if (!any_part_overlap) {
343 (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
344 }
345 }
346 any_part_overlap = true;
347 total_overlap += overlap;
348 }
349 }
350 return total_overlap;
351}

◆ DeleteNonLeaderParts()

void tesseract::ColPartitionGrid::DeleteNonLeaderParts ( )

Definition at line 1063 of file colpartitiongrid.cpp.

1063 {
1064 ColPartitionGridSearch gsearch(this);
1065 gsearch.StartFullSearch();
1066 ColPartition *part;
1067 while ((part = gsearch.NextFullSearch()) != nullptr) {
1068 if (part->flow() != BTFT_LEADER) {
1069 gsearch.RemoveBBox();
1070 if (part->ReleaseNonLeaderBoxes()) {
1071 InsertBBox(true, true, part);
1072 gsearch.RepositionIterator();
1073 } else {
1074 delete part;
1075 }
1076 }
1077 }
1078}
@ BTFT_LEADER
Definition: blobbox.h:117
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:529

◆ DeleteParts()

void tesseract::ColPartitionGrid::DeleteParts ( )

Definition at line 1029 of file colpartitiongrid.cpp.

1029 {
1030 ColPartition_LIST dead_parts;
1031 ColPartition_IT dead_it(&dead_parts);
1032 ColPartitionGridSearch gsearch(this);
1033 gsearch.StartFullSearch();
1034 ColPartition *part;
1035 while ((part = gsearch.NextFullSearch()) != nullptr) {
1036 part->DisownBoxes();
1037 dead_it.add_to_end(part); // Parts will be deleted on return.
1038 }
1039 Clear();
1040}

◆ DeleteUnknownParts()

void tesseract::ColPartitionGrid::DeleteUnknownParts ( TO_BLOCK block)

Definition at line 1044 of file colpartitiongrid.cpp.

1044 {
1045 ColPartitionGridSearch gsearch(this);
1046 gsearch.StartFullSearch();
1047 ColPartition *part;
1048 while ((part = gsearch.NextFullSearch()) != nullptr) {
1049 if (part->blob_type() == BRT_UNKNOWN) {
1050 gsearch.RemoveBBox();
1051 // Once marked, the blobs will be swept up by DeleteUnownedNoise.
1052 part->set_flow(BTFT_NONTEXT);
1053 part->set_blob_type(BRT_NOISE);
1054 part->SetBlobTypes();
1055 part->DisownBoxes();
1056 delete part;
1057 }
1058 }
1059 block->DeleteUnownedNoise();
1060}
@ BRT_NOISE
Definition: blobbox.h:75
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONTEXT
Definition: blobbox.h:112

◆ Deskew()

void tesseract::ColPartitionGrid::Deskew ( const FCOORD deskew)

Definition at line 751 of file colpartitiongrid.cpp.

751 {
752 ColPartition_LIST parts;
753 ColPartition_IT part_it(&parts);
754 // Iterate the ColPartitions in the grid to extract them.
755 ColPartitionGridSearch gsearch(this);
756 gsearch.StartFullSearch();
757 ColPartition *part;
758 while ((part = gsearch.NextFullSearch()) != nullptr) {
759 part_it.add_after_then_move(part);
760 }
761 // Rebuild the grid to the new size.
762 TBOX grid_box(bleft_, tright_);
763 grid_box.rotate_large(deskew);
764 Init(gridsize(), grid_box.botleft(), grid_box.topright());
765 // Reinitializing the grid with rotated coords also clears all the
766 // pointers, so parts will now own the ColPartitions. (Briefly).
767 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
768 part = part_it.extract();
769 part->ComputeLimits();
770 InsertBBox(true, true, part);
771 }
772}
ICOORD tright_
Definition: bbgrid.h:91
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:488

◆ ExtractPartitionsAsBlocks()

void tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks ( BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 688 of file colpartitiongrid.cpp.

689 {
690 TO_BLOCK_IT to_block_it(to_blocks);
691 BLOCK_IT block_it(blocks);
692 // All partitions will be put on this list and deleted on return.
693 ColPartition_LIST parts;
694 ColPartition_IT part_it(&parts);
695 // Iterate the ColPartitions in the grid to extract them.
696 ColPartitionGridSearch gsearch(this);
697 gsearch.StartFullSearch();
698 ColPartition *part;
699 while ((part = gsearch.NextFullSearch()) != nullptr) {
700 part_it.add_after_then_move(part);
701 // The partition has to be at least vaguely like text.
702 BlobRegionType blob_type = part->blob_type();
703 if (BLOBNBOX::IsTextType(blob_type) ||
704 (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
707 // Get metrics from the row that will be used for the block.
708 TBOX box = part->bounding_box();
709 int median_width = part->median_width();
710 int median_height = part->median_height();
711 // Turn the partition into a TO_ROW.
712 TO_ROW *row = part->MakeToRow();
713 if (row == nullptr) {
714 // This partition is dead.
715 part->DeleteBoxes();
716 continue;
717 }
718 auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
719 box.right(), box.top());
720 block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
721 auto *to_block = new TO_BLOCK(block);
722 TO_ROW_IT row_it(to_block->get_rows());
723 row_it.add_after_then_move(row);
724 // We haven't differentially rotated vertical and horizontal text at
725 // this point, so use width or height as appropriate.
726 if (blob_type == BRT_VERT_TEXT) {
727 to_block->line_size = static_cast<float>(median_width);
728 to_block->line_spacing = static_cast<float>(box.width());
729 to_block->max_blob_size = static_cast<float>(box.width() + 1);
730 } else {
731 to_block->line_size = static_cast<float>(median_height);
732 to_block->line_spacing = static_cast<float>(box.height());
733 to_block->max_blob_size = static_cast<float>(box.height() + 1);
734 }
735 if (to_block->line_size == 0) {
736 to_block->line_size = 1;
737 }
738 block_it.add_to_end(block);
739 to_block_it.add_to_end(to_block);
740 } else {
741 // This partition is dead.
742 part->DeleteBoxes();
743 }
744 }
745 Clear();
746 // Now it is safe to delete the ColPartitions as parts goes out of scope.
747}
BlobRegionType
Definition: blobbox.h:74
@ BRT_VERT_TEXT
Definition: blobbox.h:81
@ PT_VERTICAL_TEXT
Definition: publictypes.h:59
@ PT_FLOWING_TEXT
Definition: publictypes.h:53
type
Definition: upload.py:458
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:435

◆ FindFigureCaptions()

void tesseract::ColPartitionGrid::FindFigureCaptions ( )

Definition at line 1081 of file colpartitiongrid.cpp.

1081 {
1082 // For each image region find its best candidate text caption region,
1083 // if any and mark it as such.
1084 ColPartitionGridSearch gsearch(this);
1085 gsearch.StartFullSearch();
1086 ColPartition *part;
1087 while ((part = gsearch.NextFullSearch()) != nullptr) {
1088 if (part->IsImageType()) {
1089 const TBOX &part_box = part->bounding_box();
1090 bool debug =
1091 AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
1092 ColPartition *best_caption = nullptr;
1093 int best_dist = 0; // Distance to best_caption.
1094 int best_upper = 0; // Direction of best_caption.
1095 // Handle both lower and upper directions.
1096 for (int upper = 0; upper < 2; ++upper) {
1097 ColPartition_C_IT partner_it(upper ? part->upper_partners()
1098 : part->lower_partners());
1099 // If there are no image partners, then this direction is ok.
1100 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1101 partner_it.forward()) {
1102 ColPartition *partner = partner_it.data();
1103 if (partner->IsImageType()) {
1104 break;
1105 }
1106 }
1107 if (!partner_it.cycled_list()) {
1108 continue;
1109 }
1110 // Find the nearest totally overlapping text partner.
1111 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1112 partner_it.forward()) {
1113 ColPartition *partner = partner_it.data();
1114 if (!partner->IsTextType() || partner->type() == PT_TABLE) {
1115 continue;
1116 }
1117 const TBOX &partner_box = partner->bounding_box();
1118 if (debug) {
1119 tprintf("Finding figure captions for image part:");
1120 part_box.print();
1121 tprintf("Considering partner:");
1122 partner_box.print();
1123 }
1124 if (partner_box.left() >= part_box.left() &&
1125 partner_box.right() <= part_box.right()) {
1126 int dist = partner_box.y_gap(part_box);
1127 if (best_caption == nullptr || dist < best_dist) {
1128 best_dist = dist;
1129 best_caption = partner;
1130 best_upper = upper;
1131 }
1132 }
1133 }
1134 }
1135 if (best_caption != nullptr) {
1136 if (debug) {
1137 tprintf("Best caption candidate:");
1138 best_caption->bounding_box().print();
1139 }
1140 // We have a candidate caption. Qualify it as being separable from
1141 // any body text. We are looking for either a small number of lines
1142 // or a big gap that indicates a separation from the body text.
1143 int line_count = 0;
1144 int biggest_gap = 0;
1145 int smallest_gap = INT16_MAX;
1146 int total_height = 0;
1147 int mean_height = 0;
1148 ColPartition *end_partner = nullptr;
1149 ColPartition *next_partner = nullptr;
1150 for (ColPartition *partner = best_caption;
1151 partner != nullptr && line_count <= kMaxCaptionLines;
1152 partner = next_partner) {
1153 if (!partner->IsTextType()) {
1154 end_partner = partner;
1155 break;
1156 }
1157 ++line_count;
1158 total_height += partner->bounding_box().height();
1159 next_partner = partner->SingletonPartner(best_upper);
1160 if (next_partner != nullptr) {
1161 int gap =
1162 partner->bounding_box().y_gap(next_partner->bounding_box());
1163 if (gap > biggest_gap) {
1164 biggest_gap = gap;
1165 end_partner = next_partner;
1166 mean_height = total_height / line_count;
1167 } else if (gap < smallest_gap) {
1168 smallest_gap = gap;
1169 }
1170 // If the gap looks big compared to the text size and the smallest
1171 // gap seen so far, then we can stop.
1172 if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1173 biggest_gap > smallest_gap * kMinCaptionGapRatio) {
1174 break;
1175 }
1176 }
1177 }
1178 if (debug) {
1179 tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1180 line_count, biggest_gap, smallest_gap, mean_height);
1181 if (end_partner != nullptr) {
1182 tprintf("End partner:");
1183 end_partner->bounding_box().print();
1184 }
1185 }
1186 if (next_partner == nullptr && line_count <= kMaxCaptionLines) {
1187 end_partner = nullptr; // No gap, but line count is small.
1188 }
1189 if (line_count <= kMaxCaptionLines) {
1190 // This is a qualified caption. Mark the text as caption.
1191 for (ColPartition *partner = best_caption;
1192 partner != nullptr && partner != end_partner;
1193 partner = next_partner) {
1194 partner->set_type(PT_CAPTION_TEXT);
1195 partner->SetBlobTypes();
1196 if (debug) {
1197 tprintf("Set caption type for partition:");
1198 partner->bounding_box().print();
1199 }
1200 next_partner = partner->SingletonPartner(best_upper);
1201 }
1202 }
1203 }
1204 }
1205 }
1206}
const int kMaxCaptionLines
const double kMinCaptionGapHeightRatio
@ PT_CAPTION_TEXT
Definition: publictypes.h:60
const double kMinCaptionGapRatio
static bool WithinTestRegion(int detail_level, int x, int y)

◆ FindOverlappingPartitions()

void tesseract::ColPartitionGrid::FindOverlappingPartitions ( const TBOX box,
const ColPartition not_this,
ColPartition_CLIST *  parts 
)

Definition at line 356 of file colpartitiongrid.cpp.

358 {
359 ColPartitionGridSearch rsearch(this);
360 rsearch.StartRectSearch(box);
361 ColPartition *part;
362 while ((part = rsearch.NextRectSearch()) != nullptr) {
363 if (part != not_this) {
364 parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
365 }
366 }
367}

◆ FindPartitionPartners() [1/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( )

Definition at line 1212 of file colpartitiongrid.cpp.

1212 {
1213 ColPartitionGridSearch gsearch(this);
1214 gsearch.StartFullSearch();
1215 ColPartition *part;
1216 while ((part = gsearch.NextFullSearch()) != nullptr) {
1217 if (part->IsVerticalType()) {
1218 FindVPartitionPartners(true, part);
1219 FindVPartitionPartners(false, part);
1220 } else {
1221 FindPartitionPartners(true, part);
1222 FindPartitionPartners(false, part);
1223 }
1224 }
1225}
void FindVPartitionPartners(bool to_the_left, ColPartition *part)

◆ FindPartitionPartners() [2/2]

void tesseract::ColPartitionGrid::FindPartitionPartners ( bool  upper,
ColPartition part 
)

Definition at line 1229 of file colpartitiongrid.cpp.

1229 {
1230 if (part->type() == PT_NOISE) {
1231 return; // Noise is not allowed to partner anything.
1232 }
1233 const TBOX &box = part->bounding_box();
1234 int top = part->median_top();
1235 int bottom = part->median_bottom();
1236 int height = top - bottom;
1237 int mid_y = (bottom + top) / 2;
1238 ColPartitionGridSearch vsearch(this);
1239 // Search down for neighbour below
1240 vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1241 ColPartition *neighbour;
1242 ColPartition *best_neighbour = nullptr;
1243 int best_dist = INT32_MAX;
1244 while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
1245 if (neighbour == part || neighbour->type() == PT_NOISE) {
1246 continue; // Noise is not allowed to partner anything.
1247 }
1248 int neighbour_bottom = neighbour->median_bottom();
1249 int neighbour_top = neighbour->median_top();
1250 int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1251 if (upper != (neighbour_y > mid_y)) {
1252 continue;
1253 }
1254 if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) {
1255 continue;
1256 }
1257 if (!part->TypesMatch(*neighbour)) {
1258 if (best_neighbour == nullptr) {
1259 best_neighbour = neighbour;
1260 }
1261 continue;
1262 }
1263 int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1264 if (dist <= kMaxPartitionSpacing * height) {
1265 if (dist < best_dist) {
1266 best_dist = dist;
1267 best_neighbour = neighbour;
1268 }
1269 } else {
1270 break;
1271 }
1272 }
1273 if (best_neighbour != nullptr) {
1274 part->AddPartner(upper, best_neighbour);
1275 }
1276}
const double kMaxPartitionSpacing

◆ FindVPartitionPartners()

void tesseract::ColPartitionGrid::FindVPartitionPartners ( bool  to_the_left,
ColPartition part 
)

Definition at line 1280 of file colpartitiongrid.cpp.

1281 {
1282 if (part->type() == PT_NOISE) {
1283 return; // Noise is not allowed to partner anything.
1284 }
1285 const TBOX &box = part->bounding_box();
1286 int left = part->median_left();
1287 int right = part->median_right();
1288 int width = right >= left ? right - left : -1;
1289 int mid_x = (left + right) / 2;
1290 ColPartitionGridSearch hsearch(this);
1291 // Search left for neighbour to_the_left
1292 hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1293 ColPartition *neighbour;
1294 ColPartition *best_neighbour = nullptr;
1295 int best_dist = INT32_MAX;
1296 while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
1297 if (neighbour == part || neighbour->type() == PT_NOISE) {
1298 continue; // Noise is not allowed to partner anything.
1299 }
1300 int neighbour_left = neighbour->median_left();
1301 int neighbour_right = neighbour->median_right();
1302 int neighbour_x = (neighbour_left + neighbour_right) / 2;
1303 if (to_the_left != (neighbour_x < mid_x)) {
1304 continue;
1305 }
1306 if (!part->VOverlaps(*neighbour)) {
1307 continue;
1308 }
1309 if (!part->TypesMatch(*neighbour)) {
1310 continue; // Only match to other vertical text.
1311 }
1312 int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1313 if (dist <= kMaxPartitionSpacing * width) {
1314 if (dist < best_dist || best_neighbour == nullptr) {
1315 best_dist = dist;
1316 best_neighbour = neighbour;
1317 }
1318 } else {
1319 break;
1320 }
1321 }
1322 // For vertical partitions, the upper partner is to the left, and lower is
1323 // to the right.
1324 if (best_neighbour != nullptr) {
1325 part->AddPartner(to_the_left, best_neighbour);
1326 }
1327}

◆ GridFindMargins()

void tesseract::ColPartitionGrid::GridFindMargins ( ColPartitionSet **  best_columns)

Definition at line 988 of file colpartitiongrid.cpp.

988 {
989 // Iterate the ColPartitions in the grid.
990 ColPartitionGridSearch gsearch(this);
991 gsearch.StartFullSearch();
992 ColPartition *part;
993 while ((part = gsearch.NextFullSearch()) != nullptr) {
994 // Set up a rectangle search x-bounded by the column and y by the part.
995 ColPartitionSet *columns =
996 best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
997 FindPartitionMargins(columns, part);
998 const TBOX &box = part->bounding_box();
999 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
1000 tprintf("Computed margins for part:");
1001 part->Print();
1002 }
1003 }
1004}

◆ GridSmoothNeighbours()

bool tesseract::ColPartitionGrid::GridSmoothNeighbours ( BlobTextFlowType  source_type,
Image  nontext_map,
const TBOX im_box,
const FCOORD rerotation 
)

Definition at line 635 of file colpartitiongrid.cpp.

638 {
639 // Iterate the ColPartitions in the grid.
640 ColPartitionGridSearch gsearch(this);
641 gsearch.StartFullSearch();
642 ColPartition *part;
643 bool any_changed = false;
644 while ((part = gsearch.NextFullSearch()) != nullptr) {
645 if (part->flow() != source_type ||
646 BLOBNBOX::IsLineType(part->blob_type())) {
647 continue;
648 }
649 const TBOX &box = part->bounding_box();
650 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
651 if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) {
652 any_changed = true;
653 }
654 }
655 return any_changed;
656}
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:443

◆ HandleClick()

void tesseract::ColPartitionGrid::HandleClick ( int  x,
int  y 
)
overridevirtual

Reimplemented from tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >.

Definition at line 73 of file colpartitiongrid.cpp.

73 {
75 y);
76 // Run a radial search for partitions that overlap.
77 ColPartitionGridSearch radsearch(this);
78 radsearch.SetUniqueMode(true);
79 radsearch.StartRadSearch(x, y, 1);
80 ColPartition *neighbour;
81 FCOORD click(x, y);
82 while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
83 const TBOX &nbox = neighbour->bounding_box();
84 if (nbox.contains(click)) {
85 tprintf("Block box:");
86 neighbour->bounding_box().print();
87 neighbour->Print();
88 }
89 }
90}
const double y
virtual void HandleClick(int x, int y)
Definition: bbgrid.h:691

◆ ListFindMargins()

void tesseract::ColPartitionGrid::ListFindMargins ( ColPartitionSet **  best_columns,
ColPartition_LIST *  parts 
)

Definition at line 1011 of file colpartitiongrid.cpp.

1012 {
1013 ColPartition_IT part_it(parts);
1014 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
1015 ColPartition *part = part_it.data();
1016 ColPartitionSet *columns = nullptr;
1017 if (best_columns != nullptr) {
1018 const TBOX &part_box = part->bounding_box();
1019 // Get the columns from the y grid coord.
1020 int grid_x, grid_y;
1021 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
1022 columns = best_columns[grid_y];
1023 }
1024 FindPartitionMargins(columns, part);
1025 }
1026}
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:53

◆ MakeColPartSets()

bool tesseract::ColPartitionGrid::MakeColPartSets ( PartSetVector part_sets)

Definition at line 803 of file colpartitiongrid.cpp.

803 {
804 auto *part_lists = new ColPartition_LIST[gridheight()];
805 part_sets->reserve(gridheight());
806 // Iterate the ColPartitions in the grid to get parts onto lists for the
807 // y bottom of each.
808 ColPartitionGridSearch gsearch(this);
809 gsearch.StartFullSearch();
810 ColPartition *part;
811 bool any_parts_found = false;
812 while ((part = gsearch.NextFullSearch()) != nullptr) {
813 BlobRegionType blob_type = part->blob_type();
814 if (blob_type != BRT_NOISE &&
815 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
816 int grid_x, grid_y;
817 const TBOX &part_box = part->bounding_box();
818 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
819 ColPartition_IT part_it(&part_lists[grid_y]);
820 part_it.add_to_end(part);
821 any_parts_found = true;
822 }
823 }
824 if (any_parts_found) {
825 for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
826 ColPartitionSet *line_set = nullptr;
827 if (!part_lists[grid_y].empty()) {
828 line_set = new ColPartitionSet(&part_lists[grid_y]);
829 }
830 part_sets->push_back(line_set);
831 }
832 }
833 delete[] part_lists;
834 return any_parts_found;
835}
int gridheight() const
Definition: bbgrid.h:69

◆ MakeSingleColumnSet()

ColPartitionSet * tesseract::ColPartitionGrid::MakeSingleColumnSet ( WidthCallback  cb)

Definition at line 841 of file colpartitiongrid.cpp.

841 {
842 ColPartition *single_column_part = nullptr;
843 // Iterate the ColPartitions in the grid to get parts onto lists for the
844 // y bottom of each.
845 ColPartitionGridSearch gsearch(this);
846 gsearch.StartFullSearch();
847 ColPartition *part;
848 while ((part = gsearch.NextFullSearch()) != nullptr) {
849 BlobRegionType blob_type = part->blob_type();
850 if (blob_type != BRT_NOISE &&
851 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
852 // Consider for single column.
853 BlobTextFlowType flow = part->flow();
854 if ((blob_type == BRT_TEXT &&
855 (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
856 flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
857 blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
858 if (single_column_part == nullptr) {
859 single_column_part = part->ShallowCopy();
860 single_column_part->set_blob_type(BRT_TEXT);
861 // Copy the tabs from itself to properly setup the margins.
862 single_column_part->CopyLeftTab(*single_column_part, false);
863 single_column_part->CopyRightTab(*single_column_part, false);
864 } else {
865 if (part->left_key() < single_column_part->left_key()) {
866 single_column_part->CopyLeftTab(*part, false);
867 }
868 if (part->right_key() > single_column_part->right_key()) {
869 single_column_part->CopyRightTab(*part, false);
870 }
871 }
872 }
873 }
874 }
875 if (single_column_part != nullptr) {
876 // Make a ColPartitionSet out of the single_column_part as a candidate
877 // for the single column case.
878 single_column_part->SetColumnGoodness(cb);
879 return new ColPartitionSet(single_column_part);
880 }
881 return nullptr;
882}
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_RECTIMAGE
Definition: blobbox.h:78
BlobTextFlowType
Definition: blobbox.h:110
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:116

◆ MergePart()

bool tesseract::ColPartitionGrid::MergePart ( const std::function< bool(ColPartition *, TBOX *)> &  box_cb,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb,
ColPartition part 
)

Definition at line 119 of file colpartitiongrid.cpp.

123 {
124 if (part->IsUnMergeableType()) {
125 return false;
126 }
127 bool any_done = false;
128 // Repeatedly merge part while we find a best merge candidate that works.
129 bool merge_done = false;
130 do {
131 merge_done = false;
132 TBOX box = part->bounding_box();
133 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
134 if (debug) {
135 tprintf("Merge candidate:");
136 box.print();
137 }
138 // Set up a rectangle search bounded by the part.
139 if (!box_cb(part, &box)) {
140 continue;
141 }
142 // Create a list of merge candidates.
143 ColPartition_CLIST merge_candidates;
144 FindMergeCandidates(part, box, debug, &merge_candidates);
145 // Find the best merge candidate based on minimal overlap increase.
146 int overlap_increase;
147 ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug,
148 confirm_cb, &overlap_increase);
149 if (neighbour != nullptr && overlap_increase <= 0) {
150 if (debug) {
151 tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
152 part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
153 overlap_increase);
154 }
155 // Looks like a good candidate so merge it.
156 RemoveBBox(neighbour);
157 // We will modify the box of part, so remove it from the grid, merge
158 // it and then re-insert it into the grid.
159 RemoveBBox(part);
160 part->Absorb(neighbour, nullptr);
161 InsertBBox(true, true, part);
162 merge_done = true;
163 any_done = true;
164 } else if (neighbour != nullptr) {
165 if (debug) {
166 tprintf("Overlapped when merged with increase %d: ", overlap_increase);
167 neighbour->bounding_box().print();
168 }
169 } else if (debug) {
170 tprintf("No candidate neighbour returned\n");
171 }
172 } while (merge_done);
173 return any_done;
174}
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, int *overlap_increase)

◆ Merges()

void tesseract::ColPartitionGrid::Merges ( const std::function< bool(ColPartition *, TBOX *)> &  box_cb,
const std::function< bool(const ColPartition *, const ColPartition *)> &  confirm_cb 
)

Definition at line 99 of file colpartitiongrid.cpp.

102 {
103 // Iterate the ColPartitions in the grid.
104 ColPartitionGridSearch gsearch(this);
105 gsearch.StartFullSearch();
106 ColPartition *part;
107 while ((part = gsearch.NextFullSearch()) != nullptr) {
108 if (MergePart(box_cb, confirm_cb, part)) {
109 gsearch.RepositionIterator();
110 }
111 }
112}
bool MergePart(const std::function< bool(ColPartition *, TBOX *)> &box_cb, const std::function< bool(const ColPartition *, const ColPartition *)> &confirm_cb, ColPartition *part)

◆ RecomputeBounds()

void tesseract::ColPartitionGrid::RecomputeBounds ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
const ICOORD vertical 
)

Definition at line 960 of file colpartitiongrid.cpp.

962 {
963 ColPartition_LIST saved_parts;
964 ColPartition_IT part_it(&saved_parts);
965 // Iterate the ColPartitions in the grid to get parts onto a list.
966 ColPartitionGridSearch gsearch(this);
967 gsearch.StartFullSearch();
968 ColPartition *part;
969 while ((part = gsearch.NextFullSearch()) != nullptr) {
970 part_it.add_to_end(part);
971 }
972 // Reinitialize grid to the new size.
974 // Recompute the bounds of the parts and put them back in the new grid.
975 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
976 part = part_it.extract();
977 part->set_vertical(vertical);
978 part->ComputeLimits();
979 InsertBBox(true, true, part);
980 }
981}

◆ RefinePartitionPartners()

void tesseract::ColPartitionGrid::RefinePartitionPartners ( bool  get_desperate)

Definition at line 1332 of file colpartitiongrid.cpp.

1332 {
1333 ColPartitionGridSearch gsearch(this);
1334 // Refine in type order so that chasing multiple partners can be done
1335 // before eliminating type mis-matching partners.
1336 for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1337 // Iterate the ColPartitions in the grid.
1338 gsearch.StartFullSearch();
1339 ColPartition *part;
1340 while ((part = gsearch.NextFullSearch()) != nullptr) {
1341 part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate,
1342 this);
1343 // Iterator may have been messed up by a merge.
1344 gsearch.RepositionIterator();
1345 }
1346 }
1347}

◆ ReflectInYAxis()

void tesseract::ColPartitionGrid::ReflectInYAxis ( )

Definition at line 660 of file colpartitiongrid.cpp.

660 {
661 ColPartition_LIST parts;
662 ColPartition_IT part_it(&parts);
663 // Iterate the ColPartitions in the grid to extract them.
664 ColPartitionGridSearch gsearch(this);
665 gsearch.StartFullSearch();
666 ColPartition *part;
667 while ((part = gsearch.NextFullSearch()) != nullptr) {
668 part_it.add_after_then_move(part);
669 }
670 ICOORD bot_left(-tright().x(), bleft().y());
671 ICOORD top_right(-bleft().x(), tright().y());
672 // Reinitializing the grid with reflected coords also clears all the
673 // pointers, so parts will now own the ColPartitions. (Briefly).
674 Init(gridsize(), bot_left, top_right);
675 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
676 part = part_it.extract();
677 part->ReflectInYAxis();
678 InsertBBox(true, true, part);
679 }
680}

◆ ReTypeBlobs()

void tesseract::ColPartitionGrid::ReTypeBlobs ( BLOBNBOX_LIST *  im_blobs)

Definition at line 898 of file colpartitiongrid.cpp.

898 {
899 BLOBNBOX_IT im_blob_it(im_blobs);
900 ColPartition_LIST dead_parts;
901 ColPartition_IT dead_part_it(&dead_parts);
902 // Iterate the ColPartitions in the grid.
903 ColPartitionGridSearch gsearch(this);
904 gsearch.StartFullSearch();
905 ColPartition *part;
906 while ((part = gsearch.NextFullSearch()) != nullptr) {
907 BlobRegionType blob_type = part->blob_type();
908 BlobTextFlowType flow = part->flow();
909 bool any_blobs_moved = false;
910 if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
911 BLOBNBOX_C_IT blob_it(part->boxes());
912 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
913 BLOBNBOX *blob = blob_it.data();
914 im_blob_it.add_after_then_move(blob);
915 }
916 } else if (blob_type != BRT_NOISE) {
917 // Make sure the blobs are marked with the correct type and flow.
918 BLOBNBOX_C_IT blob_it(part->boxes());
919 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
920 BLOBNBOX *blob = blob_it.data();
921 if (blob->region_type() == BRT_NOISE) {
922 // TODO(rays) Deprecated. Change this section to an assert to verify
923 // and then delete.
924 ASSERT_HOST(blob->cblob()->area() != 0);
925 blob->set_owner(nullptr);
926 blob_it.extract();
927 any_blobs_moved = true;
928 } else {
929 blob->set_region_type(blob_type);
930 if (blob->flow() != BTFT_LEADER) {
931 blob->set_flow(flow);
932 }
933 }
934 }
935 }
936 if (blob_type == BRT_NOISE || part->boxes()->empty()) {
937 BLOBNBOX_C_IT blob_it(part->boxes());
938 part->DisownBoxes();
939 dead_part_it.add_to_end(part);
940 gsearch.RemoveBBox();
941 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
942 BLOBNBOX *blob = blob_it.data();
943 if (blob->cblob()->area() == 0) {
944 // Any blob with zero area is a fake image blob and should be deleted.
945 delete blob->cblob();
946 delete blob;
947 }
948 }
949 } else if (any_blobs_moved) {
950 gsearch.RemoveBBox();
951 part->ComputeLimits();
952 InsertBBox(true, true, part);
953 gsearch.RepositionIterator();
954 }
955 }
956}
#define ASSERT_HOST(x)
Definition: errcode.h:54

◆ SetTabStops()

void tesseract::ColPartitionGrid::SetTabStops ( TabFind tabgrid)

Definition at line 775 of file colpartitiongrid.cpp.

775 {
776 // Iterate the ColPartitions in the grid.
777 ColPartitionGridSearch gsearch(this);
778 gsearch.StartFullSearch();
779 ColPartition *part;
780 while ((part = gsearch.NextFullSearch()) != nullptr) {
781 const TBOX &part_box = part->bounding_box();
782 TabVector *left_line = tabgrid->LeftTabForBox(part_box, true, false);
783 // If the overlapping line is not a left tab, try for non-overlapping.
784 if (left_line != nullptr && !left_line->IsLeftTab()) {
785 left_line = tabgrid->LeftTabForBox(part_box, false, false);
786 }
787 if (left_line != nullptr && left_line->IsLeftTab()) {
788 part->SetLeftTab(left_line);
789 }
790 TabVector *right_line = tabgrid->RightTabForBox(part_box, true, false);
791 if (right_line != nullptr && !right_line->IsRightTab()) {
792 right_line = tabgrid->RightTabForBox(part_box, false, false);
793 }
794 if (right_line != nullptr && right_line->IsRightTab()) {
795 part->SetRightTab(right_line);
796 }
797 part->SetColumnGoodness(tabgrid->WidthCB());
798 }
799}

◆ SplitOverlappingPartitions()

void tesseract::ColPartitionGrid::SplitOverlappingPartitions ( ColPartition_LIST *  big_parts)

Definition at line 523 of file colpartitiongrid.cpp.

524 {
525 int ok_overlap =
526 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
527 // Iterate the ColPartitions in the grid.
528 ColPartitionGridSearch gsearch(this);
529 gsearch.StartFullSearch();
530 ColPartition *part;
531 while ((part = gsearch.NextFullSearch()) != nullptr) {
532 // Set up a rectangle search bounded by the part.
533 const TBOX &box = part->bounding_box();
534 ColPartitionGridSearch rsearch(this);
535 rsearch.SetUniqueMode(true);
536 rsearch.StartRectSearch(box);
537 int unresolved_overlaps = 0;
538
539 ColPartition *neighbour;
540 while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
541 if (neighbour == part) {
542 continue;
543 }
544 const TBOX &neighbour_box = neighbour->bounding_box();
545 if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
546 part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) {
547 continue; // The overlap is OK both ways.
548 }
549
550 // If removal of the biggest box from either partition eliminates the
551 // overlap, and it is much bigger than the box left behind, then
552 // it is either a drop-cap, an inter-line join, or some junk that
553 // we don't want anyway, so put it in the big_parts list.
554 if (!part->IsSingleton()) {
555 BLOBNBOX *excluded = part->BiggestBox();
556 TBOX shrunken = part->BoundsWithoutBox(excluded);
557 if (!shrunken.overlap(neighbour_box) &&
558 excluded->bounding_box().height() >
559 kBigPartSizeRatio * shrunken.height()) {
560 // Removing the biggest box fixes the overlap, so do it!
561 gsearch.RemoveBBox();
562 RemoveBadBox(excluded, part, big_parts);
563 InsertBBox(true, true, part);
564 gsearch.RepositionIterator();
565 break;
566 }
567 } else if (box.contains(neighbour_box)) {
568 ++unresolved_overlaps;
569 continue; // No amount of splitting will fix it.
570 }
571 if (!neighbour->IsSingleton()) {
572 BLOBNBOX *excluded = neighbour->BiggestBox();
573 TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
574 if (!shrunken.overlap(box) &&
575 excluded->bounding_box().height() >
576 kBigPartSizeRatio * shrunken.height()) {
577 // Removing the biggest box fixes the overlap, so do it!
578 rsearch.RemoveBBox();
579 RemoveBadBox(excluded, neighbour, big_parts);
580 InsertBBox(true, true, neighbour);
581 gsearch.RepositionIterator();
582 break;
583 }
584 }
585 int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
586 int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
587 ColPartition *right_part = nullptr;
588 if (neighbour_overlap_count <= part_overlap_count ||
589 part->IsSingleton()) {
590 // Try to split the neighbour to reduce overlap.
591 BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box);
592 if (split_blob != nullptr) {
593 rsearch.RemoveBBox();
594 right_part = neighbour->SplitAtBlob(split_blob);
595 InsertBBox(true, true, neighbour);
596 ASSERT_HOST(right_part != nullptr);
597 }
598 } else {
599 // Try to split part to reduce overlap.
600 BLOBNBOX *split_blob = part->OverlapSplitBlob(neighbour_box);
601 if (split_blob != nullptr) {
602 gsearch.RemoveBBox();
603 right_part = part->SplitAtBlob(split_blob);
604 InsertBBox(true, true, part);
605 ASSERT_HOST(right_part != nullptr);
606 }
607 }
608 if (right_part != nullptr) {
609 InsertBBox(true, true, right_part);
610 gsearch.RepositionIterator();
611 rsearch.RepositionIterator();
612 break;
613 }
614 }
615 if (unresolved_overlaps > 2 && part->IsSingleton()) {
616 // This part is no good so just add to big_parts.
617 RemoveBBox(part);
618 ColPartition_IT big_it(big_parts);
619 part->set_block_owned(true);
620 big_it.add_to_end(part);
621 gsearch.RepositionIterator();
622 }
623 }
624}
const double kBigPartSizeRatio

The documentation for this class was generated from the following files: