tesseract v5.3.3.20231005
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
tesseract::ELIST2_LINK tesseract::TestableColPartition

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, const WidthCallback &cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (const WidthCallback &cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from tesseract::ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)=delete
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
default

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 91 of file colpartition.cpp.

92 : left_margin_(-INT32_MAX),
93 right_margin_(INT32_MAX),
94 median_bottom_(INT32_MAX),
95 median_top_(-INT32_MAX),
96 median_left_(INT32_MAX),
97 median_right_(-INT32_MAX),
98 blob_type_(blob_type),
99 vertical_(vertical) {
100 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
101}
BlobRegionType blob_type() const
Definition: colpartition.h:147

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 146 of file colpartition.cpp.

146 {
147 // Remove this as a partner of all partners, as we don't want them
148 // referring to a deleted object.
149 ColPartition_C_IT it(&upper_partners_);
150 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
151 it.data()->RemovePartner(false, this);
152 }
153 it.set_to_list(&lower_partners_);
154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155 it.data()->RemovePartner(true, this);
156 }
157}

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
const WidthCallback cb 
)

Definition at line 674 of file colpartition.cpp.

674 {
675 // The result has to either own all of the blobs or none of them.
676 // Verify the flag is consistent.
677 ASSERT_HOST(owns_blobs() == other->owns_blobs());
678 // TODO(nbeato): check owns_blobs better. Right now owns_blobs
679 // should always be true when this is called. So there is no issues.
680 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
681 bounding_box_.bottom()) ||
682 TabFind::WithinTestRegion(2, other->bounding_box_.left(),
683 other->bounding_box_.bottom())) {
684 tprintf("Merging:");
685 Print();
686 other->Print();
687 }
688
689 // Update the special_blobs_densities_.
690 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
691 for (int type = 0; type < BSTT_COUNT; ++type) {
692 unsigned w1 = boxes_.length();
693 unsigned w2 = other->boxes_.length();
694 float new_val = special_blobs_densities_[type] * w1 +
695 other->special_blobs_densities_[type] * w2;
696 if (!w1 || !w2) {
697 ASSERT_HOST((w1 + w2) > 0);
698 special_blobs_densities_[type] = new_val / (w1 + w2);
699 }
700 }
701
702 // Merge the two sorted lists.
703 BLOBNBOX_C_IT it(&boxes_);
704 BLOBNBOX_C_IT it2(&other->boxes_);
705 for (; !it2.empty(); it2.forward()) {
706 BLOBNBOX *bbox2 = it2.extract();
707 ColPartition *prev_owner = bbox2->owner();
708 if (prev_owner != other && prev_owner != nullptr) {
709 // A blob on other's list is owned by someone else; let them have it.
710 continue;
711 }
712 ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
713 if (prev_owner == other) {
714 bbox2->set_owner(this);
715 }
716 it.add_to_end(bbox2);
717 }
718 left_margin_ = std::min(left_margin_, other->left_margin_);
719 right_margin_ = std::max(right_margin_, other->right_margin_);
720 if (other->left_key_ < left_key_) {
721 left_key_ = other->left_key_;
722 left_key_tab_ = other->left_key_tab_;
723 }
724 if (other->right_key_ > right_key_) {
725 right_key_ = other->right_key_;
726 right_key_tab_ = other->right_key_tab_;
727 }
728 // Combine the flow and blob_type in a sensible way.
729 // Dominant flows stay.
730 if (!DominatesInMerge(flow_, other->flow_)) {
731 flow_ = other->flow_;
732 blob_type_ = other->blob_type_;
733 }
734 SetBlobTypes();
735 if (IsVerticalType()) {
736 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
737 last_add_was_vertical_ = true;
738 } else {
739 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
740 last_add_was_vertical_ = false;
741 }
743 // Fix partner lists. other is going away, so remove it as a
744 // partner of all its partners and add this in its place.
745 for (int upper = 0; upper < 2; ++upper) {
746 ColPartition_CLIST partners;
747 ColPartition_C_IT part_it(&partners);
748 part_it.add_list_after(upper ? &other->upper_partners_
749 : &other->lower_partners_);
750 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
751 ColPartition *partner = part_it.extract();
752 partner->RemovePartner(!upper, other);
753 partner->RemovePartner(!upper, this);
754 partner->AddPartner(!upper, this);
755 }
756 }
757 delete other;
758 if (cb != nullptr) {
760 }
761}
#define ASSERT_HOST(x)
Definition: errcode.h:54
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:125
@ BSTT_COUNT
Definition: blobbox.h:99
TDimension left() const
Definition: rect.h:82
TDimension bottom() const
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
PolyBlockType type() const
Definition: colpartition.h:180
void SetColumnGoodness(const WidthCallback &cb)
bool IsVerticalType() const
Definition: colpartition.h:441

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 180 of file colpartition.cpp.

180 {
181 TBOX box = bbox->bounding_box();
182 // Update the partition limits.
183 if (boxes_.empty()) {
184 bounding_box_ = box;
185 } else {
186 bounding_box_ += box;
187 }
188
189 if (IsVerticalType()) {
190 if (!last_add_was_vertical_) {
191 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
192 last_add_was_vertical_ = true;
193 }
194 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
195 } else {
196 if (last_add_was_vertical_) {
197 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
198 last_add_was_vertical_ = false;
199 }
200 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
201 }
202 if (!left_key_tab_) {
203 left_key_ = BoxLeftKey();
204 }
205 if (!right_key_tab_) {
206 right_key_ = BoxRightKey();
207 }
208 if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) {
209 tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
210 box.left(), box.bottom(), box.right(), box.top(),
211 bounding_box_.left(), bounding_box_.right());
212 }
213}
@ TBOX
TDimension right() const
Definition: rect.h:89

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 638 of file colpartition.cpp.

638 {
639 if (upper) {
640 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
641 this);
642 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
643 } else {
644 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
645 this);
646 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
647 }
648}

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1411 of file colpartition.cpp.

1414 {
1415 if (block_owned_) {
1416 return; // Done it already.
1417 }
1418 block_owned_ = true;
1419 WorkingPartSet_IT it(working_sets);
1420 // If there is an upper partner use its working_set_ directly.
1421 ColPartition *partner = SingletonPartner(true);
1422 if (partner != nullptr && partner->working_set_ != nullptr) {
1423 working_set_ = partner->working_set_;
1424 working_set_->AddPartition(this);
1425 return;
1426 }
1427 if (partner != nullptr && textord_debug_bugs) {
1428 tprintf("Partition with partner has no working set!:");
1429 Print();
1430 partner->Print();
1431 }
1432 // Search for the column that the left edge fits in.
1433 WorkingPartSet *work_set = nullptr;
1434 it.move_to_first();
1435 int col_index = 0;
1436 for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_;
1437 it.forward(), ++col_index) {
1438 ;
1439 }
1440 if (textord_debug_tabfind >= 2) {
1441 tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1442 Print();
1443 }
1444 if (it.cycled_list() && textord_debug_bugs) {
1445 tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1446 }
1447 ASSERT_HOST(!it.cycled_list());
1448 work_set = it.data();
1449 // If last_column_ != first_column, then we need to scoop up all blocks
1450 // between here and the last_column_ and put back in work_set.
1451 if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1452 // Find the column that the right edge falls in.
1453 BLOCK_LIST completed_blocks;
1454 TO_BLOCK_LIST to_blocks;
1455 for (; !it.cycled_list() && col_index <= last_column_;
1456 it.forward(), ++col_index) {
1457 WorkingPartSet *end_set = it.data();
1458 end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1459 &completed_blocks, &to_blocks);
1460 }
1461 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1462 }
1463 working_set_ = work_set;
1464 work_set->AddPartition(this);
1465}
int textord_debug_tabfind
Definition: alignedblob.cpp:29
int textord_debug_bugs
Definition: alignedblob.cpp:30
bool IsPulloutType() const
Definition: colpartition.h:437
ColPartition * SingletonPartner(bool upper)
void AddPartition(ColPartition *part)

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 229 of file colpartition.cpp.

229 {
230 BLOBNBOX *biggest = nullptr;
231 BLOBNBOX_C_IT bb_it(&boxes_);
232 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
233 BLOBNBOX *bbox = bb_it.data();
234 if (IsVerticalType()) {
235 if (biggest == nullptr ||
236 bbox->bounding_box().width() > biggest->bounding_box().width()) {
237 biggest = bbox;
238 }
239 } else {
240 if (biggest == nullptr ||
241 bbox->bounding_box().height() > biggest->bounding_box().height()) {
242 biggest = bbox;
243 }
244 }
245 }
246 return biggest;
247}

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 147 of file colpartition.h.

147 {
148 return blob_type_;
149 }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 204 of file colpartition.h.

204 {
205 return block_owned_;
206 }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 219 of file colpartition.h.

219 {
220 return bottom_spacing_;
221 }

◆ bounding_box()

const TBOX & tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 108 of file colpartition.h.

108 {
109 return bounding_box_;
110 }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 250 of file colpartition.cpp.

250 {
251 TBOX result;
252 BLOBNBOX_C_IT bb_it(&boxes_);
253 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
254 if (box != bb_it.data()) {
255 result += bb_it.data()->bounding_box();
256 }
257 }
258 return result;
259}

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1850 of file colpartition.cpp.

1850 {
1851 if (type_ == PT_UNKNOWN) {
1852 return BLOBNBOX::TextlineColor(blob_type_, flow_);
1853 }
1855}
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:442
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:389

◆ boxes()

BLOBNBOX_CLIST * tesseract::ColPartition::boxes ( )
inline

Definition at line 186 of file colpartition.h.

186 {
187 return &boxes_;
188 }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 189 of file colpartition.h.

189 {
190 return boxes_.length();
191 }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332 {
333 return SortKey(bounding_box_.left(), MidY());
334 }
int SortKey(int x, int y) const
Definition: colpartition.h:316

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336 {
337 return SortKey(bounding_box_.right(), MidY());
338 }

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 263 of file colpartition.cpp.

263 {
264 BLOBNBOX_C_IT bb_it(&boxes_);
265 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
266 BLOBNBOX *bblob = bb_it.data();
267 ColPartition *other = bblob->owner();
268 if (other == nullptr) {
269 // Normal case: ownership is available.
270 bblob->set_owner(this);
271 } else {
272 ASSERT_HOST(other == this);
273 }
274 }
275}

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 238 of file colpartition.h.

238 {
239 if (type_ == PT_TABLE) {
240 type_ = type_before_table_;
241 }
242 }

◆ color1()

uint8_t * tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285 {
286 return color1_;
287 }

◆ color2()

uint8_t * tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288 {
289 return color2_;
290 }

◆ column_set()

ColPartitionSet * tesseract::ColPartition::column_set ( ) const
inline

Definition at line 213 of file colpartition.h.

213 {
214 return column_set_;
215 }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353 {
354 return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355 }
const double y
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1107 of file colpartition.cpp.

1108 {
1109 int first_spanned_col = -1;
1110 ColumnSpanningType span_type = columns->SpanningType(
1111 resolution, bounding_box_.left(), bounding_box_.right(),
1112 std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
1113 left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
1114 type_ = PartitionType(span_type);
1115}
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
PolyBlockType PartitionType(ColumnSpanningType flow) const

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328 {
329 return KeyWidth(left_key_, right_key_);
330 }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 905 of file colpartition.cpp.

905 {
906 bounding_box_ = TBOX(); // Clear it
907 BLOBNBOX_C_IT it(&boxes_);
908 BLOBNBOX *bbox = nullptr;
909 int non_leader_count = 0;
910 if (it.empty()) {
911 bounding_box_.set_left(left_margin_);
912 bounding_box_.set_right(right_margin_);
913 bounding_box_.set_bottom(0);
914 bounding_box_.set_top(0);
915 } else {
916 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
917 bbox = it.data();
918 bounding_box_ += bbox->bounding_box();
919 if (bbox->flow() != BTFT_LEADER) {
920 ++non_leader_count;
921 }
922 }
923 }
924 if (!left_key_tab_) {
925 left_key_ = BoxLeftKey();
926 }
927 if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
928 // TODO(rays) investigate the causes of these error messages, to find
929 // out if they are genuinely harmful, or just indicative of junk input.
930 tprintf("Computed left-illegal partition\n");
931 Print();
932 }
933 if (!right_key_tab_) {
934 right_key_ = BoxRightKey();
935 }
936 if (right_key_ < BoxRightKey() && textord_debug_bugs) {
937 tprintf("Computed right-illegal partition\n");
938 Print();
939 }
940 if (it.empty()) {
941 return;
942 }
943 if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
945 median_top_ = bounding_box_.top();
946 median_bottom_ = bounding_box_.bottom();
947 median_height_ = bounding_box_.height();
948 median_left_ = bounding_box_.left();
949 median_right_ = bounding_box_.right();
950 median_width_ = bounding_box_.width();
951 } else {
952 STATS top_stats(bounding_box_.bottom(), bounding_box_.top());
953 STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top());
954 STATS height_stats(0, bounding_box_.height());
955 STATS left_stats(bounding_box_.left(), bounding_box_.right());
956 STATS right_stats(bounding_box_.left(), bounding_box_.right());
957 STATS width_stats(0, bounding_box_.width());
958 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
959 bbox = it.data();
960 if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
961 const TBOX &box = bbox->bounding_box();
962 int area = box.area();
963 top_stats.add(box.top(), area);
964 bottom_stats.add(box.bottom(), area);
965 height_stats.add(box.height(), area);
966 left_stats.add(box.left(), area);
967 right_stats.add(box.right(), area);
968 width_stats.add(box.width(), area);
969 }
970 }
971 median_top_ = static_cast<int>(top_stats.median() + 0.5);
972 median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
973 median_height_ = static_cast<int>(height_stats.median() + 0.5);
974 median_left_ = static_cast<int>(left_stats.median() + 0.5);
975 median_right_ = static_cast<int>(right_stats.median() + 0.5);
976 median_width_ = static_cast<int>(width_stats.median() + 0.5);
977 }
978
979 if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
980 tprintf("Made partition with bad right coords, %d < %d\n", right_margin_,
981 bounding_box_.right());
982 Print();
983 }
984 if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
985 tprintf("Made partition with bad left coords, %d > %d\n", left_margin_,
986 bounding_box_.left());
987 Print();
988 }
989 // Fix partner lists. The bounding box has changed and partners are stored
990 // in bounding box order, so remove and reinsert this as a partner
991 // of all its partners.
992 for (int upper = 0; upper < 2; ++upper) {
993 ColPartition_CLIST partners;
994 ColPartition_C_IT part_it(&partners);
995 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
996 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
997 ColPartition *partner = part_it.extract();
998 partner->RemovePartner(!upper, this);
999 partner->AddPartner(!upper, this);
1000 }
1001 }
1002 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1003 bounding_box_.bottom())) {
1004 tprintf("Recomputed box for partition %p\n", static_cast<void *>(this));
1005 Print();
1006 }
1007}
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_RECTIMAGE
Definition: blobbox.h:78
@ BTFT_LEADER
Definition: blobbox.h:117
void set_right(int x)
Definition: rect.h:92
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
void set_bottom(int y)
Definition: rect.h:78
void set_top(int y)
Definition: rect.h:71
bool IsImageType() const
Definition: colpartition.h:429

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 617 of file colpartition.cpp.

617 {
618 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
619 if (boxes_.empty()) {
620 return;
621 }
622
623 BLOBNBOX_C_IT blob_it(&boxes_);
624 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
625 BLOBNBOX *blob = blob_it.data();
626 BlobSpecialTextType type = blob->special_text_type();
627 special_blobs_densities_[type]++;
628 }
629
630 for (float &special_blobs_density : special_blobs_densities_) {
631 special_blobs_density /= boxes_.length();
632 }
633}
BlobSpecialTextType
Definition: blobbox.h:92

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 437 of file colpartition.cpp.

437 {
438 if (bounding_box_.right() < other.bounding_box_.left() &&
439 bounding_box_.right() < other.LeftBlobRule()) {
440 return false;
441 }
442 if (other.bounding_box_.right() < bounding_box_.left() &&
443 other.bounding_box_.right() < LeftBlobRule()) {
444 return false;
445 }
446 if (bounding_box_.left() > other.bounding_box_.right() &&
447 bounding_box_.left() > other.RightBlobRule()) {
448 return false;
449 }
450 if (other.bounding_box_.left() > bounding_box_.right() &&
451 other.bounding_box_.left() > RightBlobRule()) {
452 return false;
453 }
454 return true;
455}

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1835 of file colpartition.cpp.

1835 {
1836 ColPartition *copy = ShallowCopy();
1837 copy->set_owns_blobs(false);
1838 BLOBNBOX_C_IT inserter(copy->boxes());
1839 BLOBNBOX_C_IT traverser(boxes());
1840 for (traverser.mark_cycle_pt(); !traverser.cycled_list();
1841 traverser.forward()) {
1842 inserter.add_after_then_move(traverser.data());
1843 }
1844 return copy;
1845}
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:186
ColPartition * ShallowCopy() const

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 552 of file colpartition.cpp.

552 {
553 left_key_tab_ = take_box ? false : src.left_key_tab_;
554 if (left_key_tab_) {
555 left_key_ = src.left_key_;
556 } else {
557 bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
558 left_key_ = BoxLeftKey();
559 }
560 if (left_margin_ > bounding_box_.left()) {
561 left_margin_ = src.left_margin_;
562 }
563}
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 566 of file colpartition.cpp.

566 {
567 right_key_tab_ = take_box ? false : src.right_key_tab_;
568 if (right_key_tab_) {
569 right_key_ = src.right_key_;
570 } else {
571 bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
572 right_key_ = BoxRightKey();
573 }
574 if (right_margin_ < bounding_box_.right()) {
575 right_margin_ = src.right_margin_;
576 }
577}

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 1010 of file colpartition.cpp.

1010 {
1011 BLOBNBOX_C_IT it(&boxes_);
1012 int overlap_count = 0;
1013 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1014 BLOBNBOX *bbox = it.data();
1015 if (box.overlap(bbox->bounding_box())) {
1016 ++overlap_count;
1017 }
1018 }
1019 return overlap_count;
1020}

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 326 of file colpartition.cpp.

326 {
327 // Although the boxes_ list is a C_LIST, in some cases it owns the
328 // BLOBNBOXes, as the ColPartition takes ownership from the grid,
329 // and the BLOBNBOXes own the underlying C_BLOBs.
330 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
331 BLOBNBOX *bblob = bb_it.extract();
332 // TODO: remove next line, currently still needed for resultiterator_test.
333 delete bblob->remove_cblob();
334 delete bblob;
335 }
336}

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 210 of file colpartition.h.

210 {
211 return desperately_merged_;
212 }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 279 of file colpartition.cpp.

279 {
280 BLOBNBOX_C_IT bb_it(&boxes_);
281 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
282 BLOBNBOX *bblob = bb_it.data();
283 ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
284 bblob->set_owner(nullptr);
285 }
286}

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 292 of file colpartition.cpp.

292 {
293 BLOBNBOX_C_IT bb_it(&boxes_);
294 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
295 BLOBNBOX *bblob = bb_it.data();
296 if (bblob->owner() == this) {
297 bblob->set_owner(nullptr);
298 }
299 }
300}

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 108 of file colpartition.cpp.

111 {
112 auto *part = new ColPartition(blob_type, ICOORD(0, 1));
113 part->set_type(block_type);
114 part->set_flow(flow);
115 part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
116 part->set_left_margin(box.left());
117 part->set_right_margin(box.right());
118 part->SetBlobTypes();
119 part->ComputeLimits();
120 part->ClaimBoxes();
121 return part;
122}
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
BlobTextFlowType flow() const
Definition: colpartition.h:153

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 153 of file colpartition.h.

153 {
154 return flow_;
155 }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 159 of file colpartition.h.

159 {
160 return good_blob_score_;
161 }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 165 of file colpartition.h.

165 {
166 return good_column_;
167 }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 162 of file colpartition.h.

162 {
163 return good_width_;
164 }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1343 of file colpartition.cpp.

1343 {
1344 // Approximation of the baseline.
1345 DetLineFit linepoints;
1346 // Calculation of the mean height on this line segment. Note that these
1347 // variable names apply to the context of a horizontal line, and work
1348 // analogously, rather than literally in the case of a vertical line.
1349 int total_height = 0;
1350 int coverage = 0;
1351 int height_count = 0;
1352 int width = 0;
1353 BLOBNBOX_C_IT it(&boxes_);
1354 TBOX box(it.data()->bounding_box());
1355 // Accumulate points representing the baseline at the middle of each blob,
1356 // but add an additional point for each end of the line. This makes it
1357 // harder to fit a severe skew angle, as it is most likely not right.
1358 if (IsVerticalType()) {
1359 // For a vertical line, use the right side as the baseline.
1360 ICOORD first_pt(box.right(), box.bottom());
1361 // Use the bottom-right of the first (bottom) box, the top-right of the
1362 // last, and the middle-right of all others.
1363 linepoints.Add(first_pt);
1364 for (it.forward(); !it.at_last(); it.forward()) {
1365 BLOBNBOX *blob = it.data();
1366 box = blob->bounding_box();
1367 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1368 linepoints.Add(box_pt);
1369 total_height += box.width();
1370 coverage += box.height();
1371 ++height_count;
1372 }
1373 box = it.data()->bounding_box();
1374 ICOORD last_pt(box.right(), box.top());
1375 linepoints.Add(last_pt);
1376 width = last_pt.y() - first_pt.y();
1377
1378 } else {
1379 // Horizontal lines use the bottom as the baseline.
1380 TBOX box(it.data()->bounding_box());
1381 // Use the bottom-left of the first box, the bottom-right of the last,
1382 // and the middle of all others.
1383 ICOORD first_pt(box.left(), box.bottom());
1384 linepoints.Add(first_pt);
1385 for (it.forward(); !it.at_last(); it.forward()) {
1386 BLOBNBOX *blob = it.data();
1387 box = blob->bounding_box();
1388 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1389 linepoints.Add(box_pt);
1390 total_height += box.height();
1391 coverage += box.width();
1392 ++height_count;
1393 }
1394 box = it.data()->bounding_box();
1395 ICOORD last_pt(box.right(), box.bottom());
1396 linepoints.Add(last_pt);
1397 width = last_pt.x() - first_pt.x();
1398 }
1399 // Maximum median error allowed to be a good text line.
1400 if (height_count == 0) {
1401 return false;
1402 }
1403 double max_error = kMaxBaselineError * total_height / height_count;
1404 ICOORD start_pt, end_pt;
1405 double error = linepoints.Fit(&start_pt, &end_pt);
1406 return error < max_error && coverage >= kMinBaselineCoverage * width;
1407}
const double kMinBaselineCoverage
const double kMaxBaselineError

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 384 of file colpartition.h.

384 {
385 return std::min(median_right_, other.median_right_) -
386 std::max(median_left_, other.median_left_);
387 }

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365 {
366 return bounding_box_.x_overlap(other.bounding_box_);
367 }
bool x_overlap(const TBOX &box) const
Definition: rect.h:409

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243 {
244 return inside_table_column_;
245 }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357 {
358 return boxes_.empty();
359 }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 459 of file colpartition.h.

459 {
460 return IsHorizontalType() && IsLineType();
461 }
bool IsHorizontalType() const
Definition: colpartition.h:445

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 445 of file colpartition.h.

445 {
446 return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
447 }
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_HLINE
Definition: blobbox.h:76

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 429 of file colpartition.h.

429 {
430 return PTIsImageType(type_);
431 }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:75

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2270 of file colpartition.cpp.

2270 {
2271 // Overlap does not occur when last < part.first or first > part.last.
2272 // In other words, one is completely to the side of the other.
2273 // This is just DeMorgan's law applied to that so the function returns true.
2274 return (last_column_ >= part.first_column_) &&
2275 (first_column_ <= part.last_column_);
2276}

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349 {
350 return bounding_box_.right() < other.bounding_box_.right();
351 }

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 364 of file colpartition.cpp.

364 {
365 if (bounding_box_.left() > bounding_box_.right()) {
366 if (textord_debug_bugs) {
367 tprintf("Bounding box invalid\n");
368 Print();
369 }
370 return false; // Bounding box invalid.
371 }
372 if (left_margin_ > bounding_box_.left() ||
373 right_margin_ < bounding_box_.right()) {
374 if (textord_debug_bugs) {
375 tprintf("Margins invalid\n");
376 Print();
377 }
378 return false; // Margins invalid.
379 }
380 if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
381 if (textord_debug_bugs) {
382 tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(),
383 right_key_, BoxRightKey());
384 Print();
385 }
386 return false; // Keys inside the box.
387 }
388 return true;
389}

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 425 of file colpartition.h.

425 {
426 return PTIsLineType(type_);
427 }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:71

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 437 of file colpartition.h.

437 {
438 return PTIsPulloutType(type_);
439 }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:87

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361 {
362 return boxes_.singleton();
363 }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 433 of file colpartition.h.

433 {
434 return PTIsTextType(type_);
435 }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:80

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 449 of file colpartition.h.

449 {
450 return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
451 }
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:447

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 454 of file colpartition.h.

454 {
455 return IsVerticalType() && IsLineType();
456 }

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 441 of file colpartition.h.

441 {
442 return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
443 }
@ BRT_VLINE
Definition: blobbox.h:77
@ BRT_VERT_TEXT
Definition: blobbox.h:81

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324 {
325 return (right_key - left_key) / vertical_.y();
326 }
TDimension y() const
access_function
Definition: points.h:62

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 171 of file colpartition.h.

171 {
172 return left_key_;
173 }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 168 of file colpartition.h.

168 {
169 return left_key_tab_;
170 }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 111 of file colpartition.h.

111 {
112 return left_margin_;
113 }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340 {
341 return XAtY(left_key_, y);
342 }

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 580 of file colpartition.cpp.

580 {
581 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
582 return it.data()->left_rule();
583}

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1473 of file colpartition.cpp.

1478 {
1479 int page_height = tright.y() - bleft.y();
1480 // Compute the initial spacing stats.
1481 ColPartition_IT it(block_parts);
1482 int part_count = 0;
1483 int max_line_height = 0;
1484
1485 // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1486 // because their line spacing with their neighbors maybe smaller and their
1487 // height may be slightly larger.
1488
1489 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1490 ColPartition *part = it.data();
1491 ASSERT_HOST(!part->boxes()->empty());
1492 STATS side_steps(0, part->bounding_box().height() - 1);
1493 if (part->bounding_box().height() > max_line_height) {
1494 max_line_height = part->bounding_box().height();
1495 }
1496 BLOBNBOX_C_IT blob_it(part->boxes());
1497 int prev_bottom = blob_it.data()->bounding_box().bottom();
1498 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1499 BLOBNBOX *blob = blob_it.data();
1500 int bottom = blob->bounding_box().bottom();
1501 int step = bottom - prev_bottom;
1502 if (step < 0) {
1503 step = -step;
1504 }
1505 side_steps.add(step, 1);
1506 prev_bottom = bottom;
1507 }
1508 part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1509 if (!it.at_last()) {
1510 ColPartition *next_part = it.data_relative(1);
1511 part->set_bottom_spacing(part->median_bottom() -
1512 next_part->median_bottom());
1513 part->set_top_spacing(part->median_top() - next_part->median_top());
1514 } else {
1515 part->set_bottom_spacing(page_height);
1516 part->set_top_spacing(page_height);
1517 }
1519 part->Print();
1520 tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1521 side_steps.median(), part->top_spacing(), part->bottom_spacing());
1522 }
1523 ++part_count;
1524 }
1525 if (part_count == 0) {
1526 return;
1527 }
1528
1529 SmoothSpacings(resolution, page_height, block_parts);
1530
1531 // Move the partitions into individual block lists and make the blocks.
1532 BLOCK_IT block_it(completed_blocks);
1533 TO_BLOCK_IT to_block_it(to_blocks);
1534 ColPartition_LIST spacing_parts;
1535 ColPartition_IT sp_block_it(&spacing_parts);
1536 int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1537 for (it.mark_cycle_pt(); !it.empty();) {
1538 ColPartition *part = it.extract();
1539 sp_block_it.add_to_end(part);
1540 it.forward();
1541 if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1542 !part->SpacingsEqual(*it.data(), resolution)) {
1543 // There is a spacing boundary. Check to see if it.data() belongs
1544 // better in the current block or the next one.
1545 if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1546 ColPartition *next_part = it.data();
1547 // If there is a size match one-way, then the middle line goes with
1548 // its matched size, otherwise it goes with the smallest spacing.
1549 ColPartition *third_part = it.at_last() ? nullptr : it.data_relative(1);
1551 tprintf(
1552 "Spacings unequal: upper:%d/%d, lower:%d/%d,"
1553 " sizes %d %d %d\n",
1554 part->top_spacing(), part->bottom_spacing(),
1555 next_part->top_spacing(), next_part->bottom_spacing(),
1556 part->median_height(), next_part->median_height(),
1557 third_part != nullptr ? third_part->median_height() : 0);
1558 }
1559 // We can only consider adding the next line to the block if the sizes
1560 // match and the lines are close enough for their size.
1561 if (part->SizesSimilar(*next_part) &&
1562 next_part->median_height() * kMaxSameBlockLineSpacing >
1563 part->bottom_spacing() &&
1564 part->median_height() * kMaxSameBlockLineSpacing >
1565 part->top_spacing()) {
1566 // Even now, we can only add it as long as the third line doesn't
1567 // match in the same way and have a smaller bottom spacing.
1568 if (third_part == nullptr || !next_part->SizesSimilar(*third_part) ||
1569 third_part->median_height() * kMaxSameBlockLineSpacing <=
1570 next_part->bottom_spacing() ||
1571 next_part->median_height() * kMaxSameBlockLineSpacing <=
1572 next_part->top_spacing() ||
1573 next_part->bottom_spacing() > part->bottom_spacing()) {
1574 // Add to the current block.
1575 sp_block_it.add_to_end(it.extract());
1576 it.forward();
1578 tprintf("Added line to current block.\n");
1579 }
1580 }
1581 }
1582 }
1583 TO_BLOCK *to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1584 if (to_block != nullptr) {
1585 to_block_it.add_to_end(to_block);
1586 block_it.add_to_end(to_block->block);
1587 }
1588 sp_block_it.set_to_list(&spacing_parts);
1589 } else {
1590 if (textord_debug_tabfind && !it.empty()) {
1591 ColPartition *next_part = it.data();
1592 tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1593 part->top_spacing(), part->bottom_spacing(),
1594 next_part->top_spacing(), next_part->bottom_spacing(),
1595 part->median_height(), next_part->median_height());
1596 }
1597 }
1598 }
1599}
const double kMaxSameBlockLineSpacing
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)

◆ lower_partners()

ColPartition_CLIST * tesseract::ColPartition::lower_partners ( )
inline

Definition at line 198 of file colpartition.h.

198 {
199 return &lower_partners_;
200 }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 129 of file colpartition.cpp.

130 {
131 box->set_owner(nullptr);
132 auto *single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
133 single->set_flow(BTFT_NONE);
134 single->AddBox(box);
135 single->ComputeLimits();
136 single->ClaimBoxes();
137 single->SetBlobTypes();
138 single->set_block_owned(true);
139 if (big_part_list != nullptr) {
140 ColPartition_IT part_it(big_part_list);
141 part_it.add_to_end(single);
142 }
143 return single;
144}
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONE
Definition: blobbox.h:111

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1697 of file colpartition.cpp.

1699 {
1700 if (block_parts->empty()) {
1701 return nullptr; // Nothing to do.
1702 }
1703 // If the block_parts are not in reading order, then it will make an invalid
1704 // block polygon and bounding_box, so sort by bounding box now just to make
1705 // sure.
1706 block_parts->sort(&ColPartition::SortByBBox);
1707 ColPartition_IT it(block_parts);
1708 ColPartition *part = it.data();
1709 PolyBlockType type = part->type();
1710 if (type == PT_VERTICAL_TEXT) {
1711 return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1712 }
1713 // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1714 // put the average spacing in each partition, so we can just take the
1715 // linespacing from the first partition.
1716 int line_spacing = part->bottom_spacing();
1717 if (line_spacing < part->median_height()) {
1718 line_spacing = part->bounding_box().height();
1719 }
1720 ICOORDELT_LIST vertices;
1721 ICOORDELT_IT vert_it(&vertices);
1722 ICOORD start, end;
1723 int min_x = INT32_MAX;
1724 int max_x = -INT32_MAX;
1725 int min_y = INT32_MAX;
1726 int max_y = -INT32_MAX;
1727 int iteration = 0;
1728 do {
1729 if (iteration == 0) {
1730 ColPartition::LeftEdgeRun(&it, &start, &end);
1731 } else {
1732 ColPartition::RightEdgeRun(&it, &start, &end);
1733 }
1734 ClipCoord(bleft, tright, &start);
1735 ClipCoord(bleft, tright, &end);
1736 vert_it.add_after_then_move(new ICOORDELT(start));
1737 vert_it.add_after_then_move(new ICOORDELT(end));
1738 UpdateRange(start.x(), &min_x, &max_x);
1739 UpdateRange(end.x(), &min_x, &max_x);
1740 UpdateRange(start.y(), &min_y, &max_y);
1741 UpdateRange(end.y(), &min_y, &max_y);
1742 if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) {
1743 ++iteration;
1744 it.move_to_last();
1745 }
1746 } while (iteration < 2);
1748 tprintf("Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y);
1749 }
1750 auto *block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1751 block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1752 return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1753}
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:117
@ PT_VERTICAL_TEXT
Definition: publictypes.h:59
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:712

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 161 of file colpartition.cpp.

163 {
164 auto *part = new ColPartition(blob_type, vertical);
165 part->bounding_box_ = TBOX(left, bottom, right, top);
166 part->median_bottom_ = bottom;
167 part->median_top_ = top;
168 part->median_height_ = top - bottom;
169 part->median_left_ = left;
170 part->median_right_ = right;
171 part->median_width_ = right - left;
172 part->left_key_ = part->BoxLeftKey();
173 part->right_key_ = part->BoxRightKey();
174 return part;
175}

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1784 of file colpartition.cpp.

1784 {
1785 BLOBNBOX_C_IT blob_it(&boxes_);
1786 TO_ROW *row = nullptr;
1787 int line_size = IsVerticalType() ? median_width_ : median_height_;
1788 // Add all the blobs to a single TO_ROW.
1789 for (; !blob_it.empty(); blob_it.forward()) {
1790 BLOBNBOX *blob = blob_it.extract();
1791 // blob->compute_bounding_box();
1792 int top = blob->bounding_box().top();
1793 int bottom = blob->bounding_box().bottom();
1794 if (row == nullptr) {
1795 row =
1796 new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
1797 static_cast<float>(line_size));
1798 } else {
1799 row->add_blob(blob, static_cast<float>(top), static_cast<float>(bottom),
1800 static_cast<float>(line_size));
1801 }
1802 }
1803 return row;
1804}

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1757 of file colpartition.cpp.

1760 {
1761 if (block_parts->empty()) {
1762 return nullptr; // Nothing to do.
1763 }
1764 ColPartition_IT it(block_parts);
1765 ColPartition *part = it.data();
1766 TBOX block_box = part->bounding_box();
1767 int line_spacing = block_box.width();
1768 PolyBlockType type = it.data()->type();
1769 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1770 block_box += it.data()->bounding_box();
1771 }
1773 tprintf("Making block at:");
1774 block_box.print();
1775 }
1776 auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1777 block_box.right(), block_box.top());
1778 block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1779 return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1780}

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1131 of file colpartition.cpp.

1131 {
1132 bool result = false;
1133 // Gather statistics on the gaps between blobs and the widths of the blobs.
1134 int part_width = bounding_box_.width();
1135 STATS gap_stats(0, part_width - 1);
1136 STATS width_stats(0, part_width - 1);
1137 BLOBNBOX_C_IT it(&boxes_);
1138 BLOBNBOX *prev_blob = it.data();
1139 prev_blob->set_flow(BTFT_NEIGHBOURS);
1140 width_stats.add(prev_blob->bounding_box().width(), 1);
1141 int blob_count = 1;
1142 for (it.forward(); !it.at_first(); it.forward()) {
1143 BLOBNBOX *blob = it.data();
1144 int left = blob->bounding_box().left();
1145 int right = blob->bounding_box().right();
1146 gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1147 width_stats.add(right - left, 1);
1148 blob->set_flow(BTFT_NEIGHBOURS);
1149 prev_blob = blob;
1150 ++blob_count;
1151 }
1152 double median_gap = gap_stats.median();
1153 double median_width = width_stats.median();
1154 double max_width = std::max(median_gap, median_width);
1155 double min_width = std::min(median_gap, median_width);
1156 double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1157 if (textord_debug_tabfind >= 4) {
1158 tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count,
1159 max_width * kMaxLeaderGapFractionOfMax,
1160 min_width * kMaxLeaderGapFractionOfMin);
1161 }
1162 if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1163 gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1164 blob_count >= kMinLeaderCount) {
1165 // This is stable enough to be called a leader, so check the widths.
1166 // Since leader dashes can join, run a dp cutting algorithm and go
1167 // on the cost.
1168 int offset = static_cast<int>(ceil(gap_iqr * 2));
1169 int min_step = static_cast<int>(median_gap + median_width + 0.5);
1170 int max_step = min_step + offset;
1171 min_step -= offset;
1172 // Pad the buffer with min_step/2 on each end.
1173 int part_left = bounding_box_.left() - min_step / 2;
1174 part_width += min_step;
1175 auto *projection = new DPPoint[part_width];
1176 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1177 BLOBNBOX *blob = it.data();
1178 int left = blob->bounding_box().left();
1179 int right = blob->bounding_box().right();
1180 int height = blob->bounding_box().height();
1181 for (int x = left; x < right; ++x) {
1182 projection[left - part_left].AddLocalCost(height);
1183 }
1184 }
1185 DPPoint *best_end =
1186 DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
1187 part_width, projection);
1188 if (best_end != nullptr && best_end->total_cost() < blob_count) {
1189 // Good enough. Call it a leader.
1190 result = true;
1191 bool modified_blob_list = false;
1192 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1193 BLOBNBOX *blob = it.data();
1194 // If the first or last blob is spaced too much, don't mark it.
1195 if (it.at_first()) {
1196 int gap = it.data_relative(1)->bounding_box().left() -
1197 blob->bounding_box().right();
1198 if (blob->bounding_box().width() + gap > max_step) {
1199 it.extract();
1200 modified_blob_list = true;
1201 continue;
1202 }
1203 }
1204 if (it.at_last()) {
1205 int gap = blob->bounding_box().left() -
1206 it.data_relative(-1)->bounding_box().right();
1207 if (blob->bounding_box().width() + gap > max_step) {
1208 it.extract();
1209 modified_blob_list = true;
1210 break;
1211 }
1212 }
1213 blob->set_region_type(BRT_TEXT);
1214 blob->set_flow(BTFT_LEADER);
1215 }
1216 if (modified_blob_list) {
1217 ComputeLimits();
1218 }
1219 blob_type_ = BRT_TEXT;
1220 flow_ = BTFT_LEADER;
1221 } else if (textord_debug_tabfind) {
1222 if (best_end == nullptr) {
1223 tprintf("No path\n");
1224 } else {
1225 tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1226 blob_count);
1227 }
1228 }
1229 delete[] projection;
1230 }
1231 return result;
1232}
const double kMaxLeaderGapFractionOfMin
const int kMinLeaderCount
@ BTFT_NEIGHBOURS
Definition: blobbox.h:113
const double kMaxLeaderGapFractionOfMax
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:70

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392 {
393 int y = (MidY() + other.MidY()) / 2;
394 if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
396 return false;
397 }
398 if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
400 return false;
401 }
402 return true;
403}
const int kColumnWidthFactor
Definition: tabfind.h:41
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:51

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 428 of file colpartition.cpp.

428 {
429 if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) {
430 return !TabFind::DifferentSizes(median_width_, other.median_width_);
431 } else {
432 return !TabFind::DifferentSizes(median_height_, other.median_height_);
433 }
434}
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 458 of file colpartition.cpp.

460 {
461 int match_count = 0;
462 int nonmatch_count = 0;
463 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
464 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST *>(&other.boxes_));
465 box_it.mark_cycle_pt();
466 other_it.mark_cycle_pt();
467 while (!box_it.cycled_list() && !other_it.cycled_list()) {
468 if (box_it.data()->MatchingStrokeWidth(
469 *other_it.data(), fractional_tolerance, constant_tolerance)) {
470 ++match_count;
471 } else {
472 ++nonmatch_count;
473 }
474 box_it.forward();
475 other_it.forward();
476 }
477 return match_count > nonmatch_count;
478}

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 406 of file colpartition.cpp.

406 {
407 if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
408 other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) {
409 return false; // Too noisy.
410 }
411
412 // Colors must match for other to count.
413 double d_this1_o =
414 ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
415 double d_this2_o =
416 ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
417 double d_o1_this =
418 ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
419 double d_o2_this =
420 ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
421 // All 4 distances must be small enough.
422 return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
423 d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
424}
const int kMaxColorDistance
const int kMaxRMSColorNoise
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:364

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 126 of file colpartition.h.

126 {
127 return median_bottom_;
128 }

◆ median_height()

int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 135 of file colpartition.h.

135 {
136 return median_height_;
137 }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 129 of file colpartition.h.

129 {
130 return median_left_;
131 }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 132 of file colpartition.h.

132 {
133 return median_right_;
134 }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 123 of file colpartition.h.

123 {
124 return median_top_;
125 }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 141 of file colpartition.h.

141 {
142 return median_width_;
143 }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308 {
309 return (median_top_ + median_bottom_) / 2;
310 }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312 {
313 return (bounding_box_.left() + bounding_box_.right()) / 2;
314 }

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304 {
305 return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306 }

◆ nearest_neighbor_above()

ColPartition * tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249 {
250 return nearest_neighbor_above_;
251 }

◆ nearest_neighbor_below()

ColPartition * tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255 {
256 return nearest_neighbor_below_;
257 }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 486 of file colpartition.cpp.

487 {
488 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
489 int min_top = INT32_MAX;
490 int max_bottom = -INT32_MAX;
491 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
492 BLOBNBOX *blob = it.data();
493 if (!blob->IsDiacritic()) {
494 if (debug) {
495 tprintf("Blob is not a diacritic:");
496 blob->bounding_box().print();
497 }
498 return false; // All blobs must have diacritic bases.
499 }
500 if (blob->base_char_top() < min_top) {
501 min_top = blob->base_char_top();
502 }
503 if (blob->base_char_bottom() > max_bottom) {
504 max_bottom = blob->base_char_bottom();
505 }
506 }
507 // If the intersection of all vertical ranges of all base characters
508 // overlaps the median range of this, then it is OK.
509 bool result =
510 min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
511 if (debug) {
512 if (result) {
513 tprintf("OKDiacritic!\n");
514 } else {
515 tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top,
516 median_bottom_, median_top_);
517 }
518 }
519 return result;
520}

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 773 of file colpartition.cpp.

775 {
776 // Vertical partitions are not allowed to be involved.
777 if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
778 if (debug) {
779 tprintf("Vertical partition\n");
780 }
781 return false;
782 }
783 // The merging partitions must strongly overlap each other.
784 if (!merge1.VSignificantCoreOverlap(merge2)) {
785 if (debug) {
786 tprintf("Voverlap %d (%d)\n", merge1.VCoreOverlap(merge2),
787 merge1.VSignificantCoreOverlap(merge2));
788 }
789 return false;
790 }
791 // The merged box must not overlap the median bounds of this.
792 TBOX merged_box(merge1.bounding_box());
793 merged_box += merge2.bounding_box();
794 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
795 merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
796 merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
797 if (debug) {
798 tprintf("Excessive box overlap\n");
799 }
800 return false;
801 }
802 // Looks OK!
803 return true;
804}

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 808 of file colpartition.cpp.

808 {
809 if (boxes_.empty() || boxes_.singleton()) {
810 return nullptr;
811 }
812 BLOBNBOX_C_IT it(&boxes_);
813 TBOX left_box(it.data()->bounding_box());
814 for (it.forward(); !it.at_first(); it.forward()) {
815 BLOBNBOX *bbox = it.data();
816 left_box += bbox->bounding_box();
817 if (left_box.overlap(box)) {
818 return bbox;
819 }
820 }
821 return nullptr;
822}

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291 {
292 return owns_blobs_;
293 }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1056 of file colpartition.cpp.

1056 {
1057 if (flow == CST_NOISE) {
1058 if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1059 blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) {
1060 return PT_NOISE;
1061 }
1062 flow = CST_FLOWING;
1063 }
1064
1065 switch (blob_type_) {
1066 case BRT_NOISE:
1067 return PT_NOISE;
1068 case BRT_HLINE:
1069 return PT_HORZ_LINE;
1070 case BRT_VLINE:
1071 return PT_VERT_LINE;
1072 case BRT_RECTIMAGE:
1073 case BRT_POLYIMAGE:
1074 switch (flow) {
1075 case CST_FLOWING:
1076 return PT_FLOWING_IMAGE;
1077 case CST_HEADING:
1078 return PT_HEADING_IMAGE;
1079 case CST_PULLOUT:
1080 return PT_PULLOUT_IMAGE;
1081 default:
1082 ASSERT_HOST(!"Undefined flow type for image!");
1083 }
1084 break;
1085 case BRT_VERT_TEXT:
1086 return PT_VERTICAL_TEXT;
1087 case BRT_TEXT:
1088 case BRT_UNKNOWN:
1089 default:
1090 switch (flow) {
1091 case CST_FLOWING:
1092 return PT_FLOWING_TEXT;
1093 case CST_HEADING:
1094 return PT_HEADING_TEXT;
1095 case CST_PULLOUT:
1096 return PT_PULLOUT_TEXT;
1097 default:
1098 ASSERT_HOST(!"Undefined flow type for text!");
1099 }
1100 }
1101 ASSERT_HOST(!"Should never get here!");
1102 return PT_NOISE;
1103}
@ BRT_NOISE
Definition: blobbox.h:75
@ PT_PULLOUT_IMAGE
Definition: publictypes.h:63
@ PT_HEADING_IMAGE
Definition: publictypes.h:62
@ PT_HORZ_LINE
Definition: publictypes.h:64
@ PT_FLOWING_IMAGE
Definition: publictypes.h:61
@ PT_VERT_LINE
Definition: publictypes.h:65
@ PT_PULLOUT_TEXT
Definition: publictypes.h:55
@ PT_HEADING_TEXT
Definition: publictypes.h:54
@ PT_FLOWING_TEXT
Definition: publictypes.h:53

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1862 of file colpartition.cpp.

1862 {
1863 int y = MidY();
1864 tprintf(
1865 "ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1866 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1867 " ts=%d bs=%d ls=%d rs=%d\n",
1868 boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B',
1869 LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(),
1870 median_bottom_, bounding_box_.right(), RightAtY(y),
1871 right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
1872 bounding_box_.top(), median_top_, good_width_, good_column_, type_,
1873 kBlobTypes[blob_type_], flow_, first_column_, last_column_,
1874 boxes_.length(), space_above_, space_below_, space_to_left_,
1875 space_to_right_);
1876}

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1879 of file colpartition.cpp.

1879 {
1880 tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED],
1881 color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL],
1882 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1883}

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1956 of file colpartition.cpp.

1957 {
1958 if (TypesSimilar(type_, type)) {
1959 RefinePartnersInternal(true, get_desperate, grid);
1960 RefinePartnersInternal(false, get_desperate, grid);
1961 } else if (type == PT_COUNT) {
1962 // This is the final pass. Make sure only the correctly typed
1963 // partners surivive, however many there are.
1964 RefinePartnersByType(true, &upper_partners_);
1965 RefinePartnersByType(false, &lower_partners_);
1966 // It is possible for a merge to have given a partition multiple
1967 // partners again, so the last resort is to use overlap which is
1968 // guaranteed to leave at most one partner left.
1969 if (!upper_partners_.empty() && !upper_partners_.singleton()) {
1970 RefinePartnersByOverlap(true, &upper_partners_);
1971 }
1972 if (!lower_partners_.empty() && !lower_partners_.singleton()) {
1973 RefinePartnersByOverlap(false, &lower_partners_);
1974 }
1975 }
1976}
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:418

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 342 of file colpartition.cpp.

342 {
343 BLOBNBOX_CLIST reversed_boxes;
344 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
345 // Reverse the order of the boxes_.
346 BLOBNBOX_C_IT bb_it(&boxes_);
347 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
348 reversed_it.add_before_then_move(bb_it.extract());
349 }
350 bb_it.add_list_after(&reversed_boxes);
351 ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
352 int tmp = left_margin_;
353 left_margin_ = -right_margin_;
354 right_margin_ = -tmp;
356}

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 306 of file colpartition.cpp.

306 {
307 BLOBNBOX_C_IT bb_it(&boxes_);
308 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
309 BLOBNBOX *bblob = bb_it.data();
310 if (bblob->flow() != BTFT_LEADER) {
311 if (bblob->owner() == this) {
312 bblob->set_owner(nullptr);
313 }
314 bb_it.extract();
315 }
316 }
317 if (bb_it.empty()) {
318 return false;
319 }
320 flow_ = BTFT_LEADER;
322 return true;
323}

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 216 of file colpartition.cpp.

216 {
217 BLOBNBOX_C_IT bb_it(&boxes_);
218 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219 if (box == bb_it.data()) {
220 bb_it.extract();
222 return;
223 }
224 }
225}

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 653 of file colpartition.cpp.

653 {
654 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
655 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
656 if (it.data() == partner) {
657 it.extract();
658 break;
659 }
660 }
661}

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 177 of file colpartition.h.

177 {
178 return right_key_;
179 }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 174 of file colpartition.h.

174 {
175 return right_key_tab_;
176 }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 117 of file colpartition.h.

117 {
118 return right_margin_;
119 }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344 {
345 return XAtY(right_key_, y);
346 }

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 585 of file colpartition.cpp.

585 {
586 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
587 it.move_to_last();
588 return it.data()->right_rule();
589}

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 150 of file colpartition.h.

150 {
151 blob_type_ = t;
152 }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 207 of file colpartition.h.

207 {
208 block_owned_ = owned;
209 }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 222 of file colpartition.h.

222 {
223 bottom_spacing_ = spacing;
224 }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 729 of file colpartition.h.

729 {
730 first_column_ = column;
731 }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 156 of file colpartition.h.

156 {
157 flow_ = f;
158 }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246 {
247 inside_table_column_ = val;
248 }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 732 of file colpartition.h.

732 {
733 last_column_ = column;
734 }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 114 of file colpartition.h.

114 {
115 left_margin_ = margin;
116 }

◆ set_median_height()

void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 138 of file colpartition.h.

138 {
139 median_height_ = height;
140 }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 144 of file colpartition.h.

144 {
145 median_width_ = width;
146 }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252 {
253 nearest_neighbor_above_ = part;
254 }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258 {
259 nearest_neighbor_below_ = part;
260 }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294 {
295 // Do NOT change ownership flag when there are blobs in the list.
296 // Immediately set the ownership flag when creating copies.
297 ASSERT_HOST(boxes_.empty());
298 owns_blobs_ = owns_blobs;
299 }

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 120 of file colpartition.h.

120 {
121 right_margin_ = margin;
122 }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 216 of file colpartition.h.

216 {
217 side_step_ = step;
218 }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264 {
265 space_above_ = space;
266 }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270 {
271 space_below_ = space;
272 }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276 {
277 space_to_left_ = space;
278 }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282 {
283 space_to_right_ = space;
284 }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 232 of file colpartition.h.

232 {
233 if (type_ != PT_TABLE) {
234 type_before_table_ = type_;
235 type_ = PT_TABLE;
236 }
237 }

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 228 of file colpartition.h.

228 {
229 top_spacing_ = spacing;
230 }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 183 of file colpartition.h.

183 {
184 type_ = t;
185 }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 192 of file colpartition.h.

192 {
193 vertical_ = v;
194 }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 201 of file colpartition.h.

201 {
202 working_set_ = working_set;
203 }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1326 of file colpartition.cpp.

1326 {
1327 if (!owns_blobs()) {
1328 return;
1329 }
1330 BLOBNBOX_C_IT it(&boxes_);
1331 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1332 BLOBNBOX *blob = it.data();
1333 if (blob->flow() != BTFT_LEADER) {
1334 blob->set_flow(flow_);
1335 }
1336 blob->set_region_type(blob_type_);
1337 ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1338 }
1339}

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( const WidthCallback cb)

Definition at line 1118 of file colpartition.cpp.

1118 {
1119 int y = MidY();
1120 int width = RightAtY(y) - LeftAtY(y);
1121 good_width_ = cb(width);
1122 good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1123}

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 525 of file colpartition.cpp.

525 {
526 if (tab_vector != nullptr) {
527 left_key_ = tab_vector->sort_key();
528 left_key_tab_ = left_key_ <= BoxLeftKey();
529 } else {
530 left_key_tab_ = false;
531 }
532 if (!left_key_tab_) {
533 left_key_ = BoxLeftKey();
534 }
535}

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 1024 of file colpartition.cpp.

1024 {
1025 int first_spanned_col = -1;
1026 ColumnSpanningType span_type = columns->SpanningType(
1027 resolution, bounding_box_.left(), bounding_box_.right(),
1028 std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
1029 left_margin_, right_margin_, &first_column_, &last_column_,
1030 &first_spanned_col);
1031 column_set_ = columns;
1032 if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
1033 !IsLineType()) {
1034 // Unequal columns may indicate that the pullout spans one of the columns
1035 // it lies in, so force it to be allocated to just that column.
1036 if (first_spanned_col >= 0) {
1037 first_column_ = first_spanned_col;
1038 last_column_ = first_spanned_col;
1039 } else {
1040 if ((first_column_ & 1) == 0) {
1041 last_column_ = first_column_;
1042 } else if ((last_column_ & 1) == 0) {
1043 first_column_ = last_column_;
1044 } else {
1045 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1046 }
1047 }
1048 }
1049 type_ = PartitionType(span_type);
1050}

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1241 of file colpartition.cpp.

1241 {
1242 int blob_count = 0; // Total # blobs.
1243 int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1244 int noisy_count = 0; // Total # neighbours marked as noise.
1245 int hline_count = 0;
1246 int vline_count = 0;
1247 BLOBNBOX_C_IT it(&boxes_);
1248 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1249 BLOBNBOX *blob = it.data();
1250 ++blob_count;
1251 noisy_count += blob->NoisyNeighbours();
1252 good_blob_score_ += blob->GoodTextBlob();
1253 if (blob->region_type() == BRT_HLINE) {
1254 ++hline_count;
1255 }
1256 if (blob->region_type() == BRT_VLINE) {
1257 ++vline_count;
1258 }
1259 }
1260 flow_ = BTFT_NEIGHBOURS;
1261 blob_type_ = BRT_UNKNOWN;
1262 if (hline_count > vline_count) {
1263 flow_ = BTFT_NONE;
1264 blob_type_ = BRT_HLINE;
1265 } else if (vline_count > hline_count) {
1266 flow_ = BTFT_NONE;
1267 blob_type_ = BRT_VLINE;
1268 } else if (value < -1 || 1 < value) {
1269 int long_side;
1270 int short_side;
1271 if (value > 0) {
1272 long_side = bounding_box_.width();
1273 short_side = bounding_box_.height();
1274 blob_type_ = BRT_TEXT;
1275 } else {
1276 long_side = bounding_box_.height();
1277 short_side = bounding_box_.width();
1278 blob_type_ = BRT_VERT_TEXT;
1279 }
1280 // We will combine the old metrics using aspect ratio and blob counts
1281 // with the input value by allowing a strong indication to flip the
1282 // STRONG_CHAIN/CHAIN flow values.
1283 int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1284 if (short_side > kHorzStrongTextlineHeight) {
1285 ++strong_score;
1286 }
1287 if (short_side * kHorzStrongTextlineAspect < long_side) {
1288 ++strong_score;
1289 }
1290 if (abs(value) >= kMinStrongTextValue) {
1291 flow_ = BTFT_STRONG_CHAIN;
1292 } else if (abs(value) >= kMinChainTextValue) {
1293 flow_ = BTFT_CHAIN;
1294 } else {
1295 flow_ = BTFT_NEIGHBOURS;
1296 }
1297 // Upgrade chain to strong chain if the other indicators are good
1298 if (flow_ == BTFT_CHAIN && strong_score == 3) {
1299 flow_ = BTFT_STRONG_CHAIN;
1300 }
1301 // Downgrade strong vertical text to chain if the indicators are bad.
1302 if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) {
1303 flow_ = BTFT_CHAIN;
1304 }
1305 }
1306 if (flow_ == BTFT_NEIGHBOURS) {
1307 // Check for noisy neighbours.
1308 if (noisy_count >= blob_count) {
1309 flow_ = BTFT_NONTEXT;
1310 blob_type_ = BRT_NOISE;
1311 }
1312 }
1313 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1314 bounding_box_.bottom())) {
1315 tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1316 blob_count, noisy_count, good_blob_score_);
1317 tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_,
1318 blob_type_);
1319 Print();
1320 }
1321 SetBlobTypes();
1322}
int value
const int kMinChainTextValue
const int kHorzStrongTextlineCount
const int kHorzStrongTextlineHeight
const int kHorzStrongTextlineAspect
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:115
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_NONTEXT
Definition: blobbox.h:112
const int kMinStrongTextValue

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 538 of file colpartition.cpp.

538 {
539 if (tab_vector != nullptr) {
540 right_key_ = tab_vector->sort_key();
541 right_key_tab_ = right_key_ >= BoxRightKey();
542 } else {
543 right_key_tab_ = false;
544 }
545 if (!right_key_tab_) {
546 right_key_ = BoxRightKey();
547 }
548}

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 611 of file colpartition.cpp.

612 {
614 special_blobs_densities_[type] = density;
615}

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1808 of file colpartition.cpp.

1808 {
1809 auto *part = new ColPartition(blob_type_, vertical_);
1810 part->left_margin_ = left_margin_;
1811 part->right_margin_ = right_margin_;
1812 part->bounding_box_ = bounding_box_;
1813 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1814 sizeof(special_blobs_densities_));
1815 part->median_bottom_ = median_bottom_;
1816 part->median_top_ = median_top_;
1817 part->median_height_ = median_height_;
1818 part->median_left_ = median_left_;
1819 part->median_right_ = median_right_;
1820 part->median_width_ = median_width_;
1821 part->good_width_ = good_width_;
1822 part->good_column_ = good_column_;
1823 part->left_key_tab_ = left_key_tab_;
1824 part->right_key_tab_ = right_key_tab_;
1825 part->type_ = type_;
1826 part->flow_ = flow_;
1827 part->left_key_ = left_key_;
1828 part->right_key_ = right_key_;
1829 part->first_column_ = first_column_;
1830 part->last_column_ = last_column_;
1831 part->owns_blobs_ = false;
1832 return part;
1833}

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 664 of file colpartition.cpp.

664 {
665 ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
666 if (!partners->singleton()) {
667 return nullptr;
668 }
669 ColPartition_C_IT it(partners);
670 return it.data();
671}

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1886 of file colpartition.cpp.

1886 {
1887 STATS left_stats(0, working_set_count - 1);
1888 STATS right_stats(0, working_set_count - 1);
1889 PolyBlockType max_type = type_;
1890 ColPartition *partner;
1891 for (partner = SingletonPartner(false); partner != nullptr;
1892 partner = partner->SingletonPartner(false)) {
1893 if (partner->type_ > max_type) {
1894 max_type = partner->type_;
1895 }
1896 if (column_set_ == partner->column_set_) {
1897 left_stats.add(partner->first_column_, 1);
1898 right_stats.add(partner->last_column_, 1);
1899 }
1900 }
1901 type_ = max_type;
1902 // TODO(rays) Either establish that it isn't necessary to set the columns,
1903 // or find a way to do it that does not cause an assert failure in
1904 // AddToWorkingSet.
1905#if 0
1906 first_column_ = left_stats.mode();
1907 last_column_ = right_stats.mode();
1908 if (last_column_ < first_column_)
1909 last_column_ = first_column_;
1910#endif
1911
1912 for (partner = SingletonPartner(false); partner != nullptr;
1913 partner = partner->SingletonPartner(false)) {
1914 partner->type_ = max_type;
1915#if 0 // See TODO above
1916 if (column_set_ == partner->column_set_) {
1917 partner->first_column_ = first_column_;
1918 partner->last_column_ = last_column_;
1919 }
1920#endif
1921 }
1922}

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 712 of file colpartition.h.

712 {
713 const ColPartition *part1 = *static_cast<const ColPartition *const *>(p1);
714 const ColPartition *part2 = *static_cast<const ColPartition *const *>(p2);
715 int mid_y1 = part1->bounding_box_.y_middle();
716 int mid_y2 = part2->bounding_box_.y_middle();
717 if ((part2->bounding_box_.bottom() <= mid_y1 &&
718 mid_y1 <= part2->bounding_box_.top()) ||
719 (part1->bounding_box_.bottom() <= mid_y2 &&
720 mid_y2 <= part1->bounding_box_.top())) {
721 // Sort by increasing x.
722 return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
723 }
724 // Sort by decreasing y.
725 return mid_y2 - mid_y1;
726 }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316 {
317 return TabVector::SortKey(vertical_, x, y);
318 }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261 {
262 return space_above_;
263 }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267 {
268 return space_below_;
269 }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273 {
274 return space_to_left_;
275 }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279 {
280 return space_to_right_;
281 }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 596 of file colpartition.cpp.

596 {
598 BLOBNBOX_C_IT blob_it(&boxes_);
599 int count = 0;
600 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
601 BLOBNBOX *blob = blob_it.data();
602 BlobSpecialTextType blob_type = blob->special_text_type();
603 if (blob_type == type) {
604 count++;
605 }
606 }
607
608 return count;
609}
int * count

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 591 of file colpartition.cpp.

591 {
593 return special_blobs_densities_[type];
594}

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 865 of file colpartition.cpp.

865 {
866 if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) {
867 return nullptr; // There will be no change.
868 }
869 ColPartition *split_part = ShallowCopy();
870 split_part->set_owns_blobs(owns_blobs());
871 BLOBNBOX_C_IT it(&boxes_);
872 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
873 BLOBNBOX *bbox = it.data();
874 ColPartition *prev_owner = bbox->owner();
875 ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
876 const TBOX &box = bbox->bounding_box();
877 if (box.left() >= split_x) {
878 split_part->AddBox(it.extract());
879 if (owns_blobs() && prev_owner != nullptr) {
880 bbox->set_owner(split_part);
881 }
882 }
883 }
884 if (it.empty()) {
885 // Possible if split-x passes through the first blob.
886 it.add_list_after(&split_part->boxes_);
887 }
888 ASSERT_HOST(!it.empty());
889 if (split_part->IsEmpty()) {
890 // Split part ended up with nothing. Possible if split_x passes
891 // through the last blob.
892 delete split_part;
893 return nullptr;
894 }
895 right_key_tab_ = false;
896 split_part->left_key_tab_ = false;
897 right_margin_ = split_x;
898 split_part->left_margin_ = split_x;
900 split_part->ComputeLimits();
901 return split_part;
902}

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 828 of file colpartition.cpp.

828 {
829 ColPartition *split_part = ShallowCopy();
830 split_part->set_owns_blobs(owns_blobs());
831 BLOBNBOX_C_IT it(&boxes_);
832 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
833 BLOBNBOX *bbox = it.data();
834 ColPartition *prev_owner = bbox->owner();
835 ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
836 if (bbox == split_blob || !split_part->boxes_.empty()) {
837 split_part->AddBox(it.extract());
838 if (owns_blobs() && prev_owner != nullptr) {
839 bbox->set_owner(split_part);
840 }
841 }
842 }
843 ASSERT_HOST(!it.empty());
844 if (split_part->IsEmpty()) {
845 // Split part ended up with nothing. Possible if split_blob is not
846 // in the list of blobs.
847 delete split_part;
848 return nullptr;
849 }
850 right_key_tab_ = false;
851 split_part->left_key_tab_ = false;
853 // TODO(nbeato) Merge Ray's CL like this:
854 // if (owns_blobs())
855 // SetBlobTextlineGoodness();
856 split_part->ComputeLimits();
857 // TODO(nbeato) Merge Ray's CL like this:
858 // if (split_part->owns_blobs())
859 // split_part->SetBlobTextlineGoodness();
860 return split_part;
861}

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 225 of file colpartition.h.

225 {
226 return top_spacing_;
227 }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 180 of file colpartition.h.

180 {
181 return type_;
182 }

◆ TypesMatch() [1/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412 {
413 return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
415 }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:443

◆ TypesMatch() [2/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 409 of file colpartition.h.

409 {
410 return TypesMatch(blob_type_, other.blob_type_);
411 }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:409

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 418 of file colpartition.h.

418 {
419 return (type1 == type2 ||
420 (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
421 (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
422 }
@ PT_INLINE_EQUATION
Definition: publictypes.h:57

◆ upper_partners()

ColPartition_CLIST * tesseract::ColPartition::upper_partners ( )
inline

Definition at line 195 of file colpartition.h.

195 {
196 return &upper_partners_;
197 }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375 {
376 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
377 return 0;
378 }
379 return std::min(median_top_, other.median_top_) -
380 std::max(median_bottom_, other.median_bottom_);
381 }

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370 {
371 return bounding_box_.y_gap(other.bounding_box_) < 0;
372 }
int y_gap(const TBOX &box) const
Definition: rect.h:245

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 390 of file colpartition.h.

390 {
391 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
392 return false;
393 }
394 int overlap = VCoreOverlap(other);
395 int height = std::min(median_top_ - median_bottom_,
396 other.median_top_ - other.median_bottom_);
397 return overlap * 3 > height;
398 }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 401 of file colpartition.h.

401 {
402 return left_margin_ <= other.bounding_box_.left() &&
403 bounding_box_.left() >= other.left_margin_ &&
404 bounding_box_.right() <= other.right_margin_ &&
405 right_margin_ >= other.bounding_box_.right();
406 }

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320 {
321 return TabVector::XAtY(vertical_, sort_key, y);
322 }
int XAtY(int y) const
Definition: tabvector.h:181

The documentation for this class was generated from the following files: