All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_size () const
 
void set_median_size (int size)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uinT8color1 ()
 
uinT8color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desparate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

tesseract::ColPartition::ColPartition ( )
inline

Definition at line 69 of file colpartition.h.

69  {
70  // This empty constructor is here only so that the class can be ELISTIZED.
71  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
72  // and eliminate CLASSNAME##_copier.
73  }
tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 88 of file colpartition.cpp.

89  : left_margin_(-MAX_INT32), right_margin_(MAX_INT32),
90  median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0),
91  median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0),
92  blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
93  good_width_(false), good_column_(false),
94  left_key_tab_(false), right_key_tab_(false),
95  left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
96  working_set_(NULL), last_add_was_vertical_(false), block_owned_(false),
97  desperately_merged_(false),
98  first_column_(-1), last_column_(-1), column_set_(NULL),
99  side_step_(0), top_spacing_(0), bottom_spacing_(0),
100  type_before_table_(PT_UNKNOWN), inside_table_column_(false),
101  nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
102  space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
103  owns_blobs_(true) {
104  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
105 }
BlobRegionType blob_type() const
Definition: colpartition.h:148
#define MAX_INT32
Definition: host.h:120
#define NULL
Definition: host.h:144
tesseract::ColPartition::~ColPartition ( )

Definition at line 150 of file colpartition.cpp.

150  {
151  // Remove this as a partner of all partners, as we don't want them
152  // referring to a deleted object.
153  ColPartition_C_IT it(&upper_partners_);
154  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155  it.data()->RemovePartner(false, this);
156  }
157  it.set_to_list(&lower_partners_);
158  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
159  it.data()->RemovePartner(true, this);
160  }
161 }

Member Function Documentation

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 653 of file colpartition.cpp.

653  {
654  // The result has to either own all of the blobs or none of them.
655  // Verify the flag is consisent.
656  ASSERT_HOST(owns_blobs() == other->owns_blobs());
657  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
658  // should always be true when this is called. So there is no issues.
659  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
660  bounding_box_.bottom()) ||
661  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
662  other->bounding_box_.bottom())) {
663  tprintf("Merging:");
664  Print();
665  other->Print();
666  }
667 
668  // Update the special_blobs_densities_.
669  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
670  for (int type = 0; type < BSTT_COUNT; ++type) {
671  int w1 = boxes_.length(), w2 = other->boxes_.length();
672  float new_val = special_blobs_densities_[type] * w1 +
673  other->special_blobs_densities_[type] * w2;
674  if (!w1 || !w2) {
675  special_blobs_densities_[type] = new_val / (w1 + w2);
676  }
677  }
678 
679  // Merge the two sorted lists.
680  BLOBNBOX_C_IT it(&boxes_);
681  BLOBNBOX_C_IT it2(&other->boxes_);
682  for (; !it2.empty(); it2.forward()) {
683  BLOBNBOX* bbox2 = it2.extract();
684  ColPartition* prev_owner = bbox2->owner();
685  if (prev_owner != other && prev_owner != NULL) {
686  // A blob on other's list is owned by someone else; let them have it.
687  continue;
688  }
689  ASSERT_HOST(prev_owner == other || prev_owner == NULL);
690  if (prev_owner == other)
691  bbox2->set_owner(this);
692  it.add_to_end(bbox2);
693  }
694  left_margin_ = MIN(left_margin_, other->left_margin_);
695  right_margin_ = MAX(right_margin_, other->right_margin_);
696  if (other->left_key_ < left_key_) {
697  left_key_ = other->left_key_;
698  left_key_tab_ = other->left_key_tab_;
699  }
700  if (other->right_key_ > right_key_) {
701  right_key_ = other->right_key_;
702  right_key_tab_ = other->right_key_tab_;
703  }
704  // Combine the flow and blob_type in a sensible way.
705  // Dominant flows stay.
706  if (!DominatesInMerge(flow_, other->flow_)) {
707  flow_ = other->flow_;
708  blob_type_ = other->blob_type_;
709  }
710  SetBlobTypes();
711  if (IsVerticalType()) {
712  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
713  last_add_was_vertical_ = true;
714  } else {
715  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
716  last_add_was_vertical_ = false;
717  }
718  ComputeLimits();
719  // Fix partner lists. other is going away, so remove it as a
720  // partner of all its partners and add this in its place.
721  for (int upper = 0; upper < 2; ++upper) {
722  ColPartition_CLIST partners;
723  ColPartition_C_IT part_it(&partners);
724  part_it.add_list_after(upper ? &other->upper_partners_
725  : &other->lower_partners_);
726  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
727  ColPartition* partner = part_it.extract();
728  partner->RemovePartner(!upper, other);
729  partner->RemovePartner(!upper, this);
730  partner->AddPartner(!upper, this);
731  }
732  }
733  delete other;
734  if (cb != NULL) {
735  SetColumnGoodness(cb);
736  }
737 }
bool IsVerticalType() const
Definition: colpartition.h:435
static bool WithinTestRegion(int detail_level, int x, int y)
#define MAX(x, y)
Definition: ndminx.h:24
#define tprintf(...)
Definition: tprintf.h:31
#define MIN(x, y)
Definition: ndminx.h:28
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool owns_blobs() const
Definition: colpartition.h:291
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
PolyBlockType type() const
Definition: colpartition.h:181
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:114
void SetColumnGoodness(WidthCallback *cb)
#define NULL
Definition: host.h:144
void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 184 of file colpartition.cpp.

184  {
185  TBOX box = bbox->bounding_box();
186  // Update the partition limits.
187  if (boxes_.length() == 0) {
188  bounding_box_ = box;
189  } else {
190  bounding_box_ += box;
191  }
192 
193  if (IsVerticalType()) {
194  if (!last_add_was_vertical_) {
195  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
196  last_add_was_vertical_ = true;
197  }
198  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
199  } else {
200  if (last_add_was_vertical_) {
201  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
202  last_add_was_vertical_ = false;
203  }
204  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
205  }
206  if (!left_key_tab_)
207  left_key_ = BoxLeftKey();
208  if (!right_key_tab_)
209  right_key_ = BoxRightKey();
210  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
211  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
212  box.left(), box.bottom(), box.right(), box.top(),
213  bounding_box_.left(), bounding_box_.right());
214 }
bool IsVerticalType() const
Definition: colpartition.h:435
static bool WithinTestRegion(int detail_level, int x, int y)
#define tprintf(...)
Definition: tprintf.h:31
inT16 right() const
Definition: rect.h:75
int BoxRightKey() const
Definition: colpartition.h:336
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 618 of file colpartition.cpp.

618  {
619  if (upper) {
620  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
621  true, this);
622  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
623  } else {
624  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
625  true, this);
626  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
627  }
628 }
void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1359 of file colpartition.cpp.

1362  {
1363  if (block_owned_)
1364  return; // Done it already.
1365  block_owned_ = true;
1366  WorkingPartSet_IT it(working_sets);
1367  // If there is an upper partner use its working_set_ directly.
1368  ColPartition* partner = SingletonPartner(true);
1369  if (partner != NULL && partner->working_set_ != NULL) {
1370  working_set_ = partner->working_set_;
1371  working_set_->AddPartition(this);
1372  return;
1373  }
1374  if (partner != NULL && textord_debug_bugs) {
1375  tprintf("Partition with partner has no working set!:");
1376  Print();
1377  partner->Print();
1378  }
1379  // Search for the column that the left edge fits in.
1380  WorkingPartSet* work_set = NULL;
1381  it.move_to_first();
1382  int col_index = 0;
1383  for (it.mark_cycle_pt(); !it.cycled_list() &&
1384  col_index != first_column_;
1385  it.forward(), ++col_index);
1386  if (textord_debug_tabfind >= 2) {
1387  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1388  Print();
1389  }
1390  if (it.cycled_list() && textord_debug_bugs) {
1391  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1392  }
1393  ASSERT_HOST(!it.cycled_list());
1394  work_set = it.data();
1395  // If last_column_ != first_column, then we need to scoop up all blocks
1396  // between here and the last_column_ and put back in work_set.
1397  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1398  // Find the column that the right edge falls in.
1399  BLOCK_LIST completed_blocks;
1400  TO_BLOCK_LIST to_blocks;
1401  for (; !it.cycled_list() && col_index <= last_column_;
1402  it.forward(), ++col_index) {
1403  WorkingPartSet* end_set = it.data();
1404  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1405  &completed_blocks, &to_blocks);
1406  }
1407  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1408  }
1409  working_set_ = work_set;
1410  work_set->AddPartition(this);
1411 }
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
int textord_debug_tabfind
Definition: alignedblob.cpp:27
ColPartition * SingletonPartner(bool upper)
bool IsPulloutType() const
Definition: colpartition.h:431
int textord_debug_bugs
Definition: alignedblob.cpp:28
#define NULL
Definition: host.h:144
void AddPartition(ColPartition *part)
BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 230 of file colpartition.cpp.

230  {
231  BLOBNBOX* biggest = NULL;
232  BLOBNBOX_C_IT bb_it(&boxes_);
233  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
234  BLOBNBOX* bbox = bb_it.data();
235  if (IsVerticalType()) {
236  if (biggest == NULL ||
237  bbox->bounding_box().width() > biggest->bounding_box().width())
238  biggest = bbox;
239  } else {
240  if (biggest == NULL ||
241  bbox->bounding_box().height() > biggest->bounding_box().height())
242  biggest = bbox;
243  }
244  }
245  return biggest;
246 }
bool IsVerticalType() const
Definition: colpartition.h:435
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 148 of file colpartition.h.

148  {
149  return blob_type_;
150  }
bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 205 of file colpartition.h.

205  {
206  return block_owned_;
207  }
int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 220 of file colpartition.h.

220  {
221  return bottom_spacing_;
222  }
const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 109 of file colpartition.h.

109  {
110  return bounding_box_;
111  }
TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 249 of file colpartition.cpp.

249  {
250  TBOX result;
251  BLOBNBOX_C_IT bb_it(&boxes_);
252  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
253  if (box != bb_it.data()) {
254  result += bb_it.data()->bounding_box();
255  }
256  }
257  return result;
258 }
Definition: rect.h:30
ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1779 of file colpartition.cpp.

1779  {
1780  if (type_ == PT_UNKNOWN)
1781  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1782  return POLY_BLOCK::ColorForPolyBlockType(type_);
1783 }
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:437
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:395
BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 187 of file colpartition.h.

187  {
188  return &boxes_;
189  }
int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 190 of file colpartition.h.

190  {
191  return boxes_.length();
192  }
int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332  {
333  return SortKey(bounding_box_.left(), MidY());
334  }
int SortKey(int x, int y) const
Definition: colpartition.h:316
inT16 left() const
Definition: rect.h:68
int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336  {
337  return SortKey(bounding_box_.right(), MidY());
338  }
int SortKey(int x, int y) const
Definition: colpartition.h:316
inT16 right() const
Definition: rect.h:75
void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 262 of file colpartition.cpp.

262  {
263  BLOBNBOX_C_IT bb_it(&boxes_);
264  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
265  BLOBNBOX* bblob = bb_it.data();
266  ColPartition* other = bblob->owner();
267  if (other == NULL) {
268  // Normal case: ownership is available.
269  bblob->set_owner(this);
270  } else {
271  ASSERT_HOST(other == this);
272  }
273  }
274 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define NULL
Definition: host.h:144
void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 239 of file colpartition.h.

239  {
240  if (type_ == PT_TABLE)
241  type_ = type_before_table_;
242  }
Definition: capi.h:78
uinT8* tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285  {
286  return color1_;
287  }
uinT8* tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288  {
289  return color2_;
290  }
ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 214 of file colpartition.h.

214  {
215  return column_set_;
216  }
bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353  {
354  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355  }
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344
void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1069 of file colpartition.cpp.

1070  {
1071  int first_spanned_col = -1;
1072  ColumnSpanningType span_type =
1073  columns->SpanningType(resolution,
1074  bounding_box_.left(), bounding_box_.right(),
1075  MIN(bounding_box_.height(), bounding_box_.width()),
1076  MidY(), left_margin_, right_margin_,
1077  first_col, last_col,
1078  &first_spanned_col);
1079  type_ = PartitionType(span_type);
1080 }
#define MIN(x, y)
Definition: ndminx.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
PolyBlockType PartitionType(ColumnSpanningType flow) const
int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328  {
329  return KeyWidth(left_key_, right_key_);
330  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
void tesseract::ColPartition::ComputeLimits ( )

Definition at line 874 of file colpartition.cpp.

874  {
875  bounding_box_ = TBOX(); // Clear it
876  BLOBNBOX_C_IT it(&boxes_);
877  BLOBNBOX* bbox = NULL;
878  int non_leader_count = 0;
879  if (it.empty()) {
880  bounding_box_.set_left(left_margin_);
881  bounding_box_.set_right(right_margin_);
882  bounding_box_.set_bottom(0);
883  bounding_box_.set_top(0);
884  } else {
885  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
886  bbox = it.data();
887  bounding_box_ += bbox->bounding_box();
888  if (bbox->flow() != BTFT_LEADER)
889  ++non_leader_count;
890  }
891  }
892  if (!left_key_tab_)
893  left_key_ = BoxLeftKey();
894  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
895  // TODO(rays) investigate the causes of these error messages, to find
896  // out if they are genuinely harmful, or just indicative of junk input.
897  tprintf("Computed left-illegal partition\n");
898  Print();
899  }
900  if (!right_key_tab_)
901  right_key_ = BoxRightKey();
902  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
903  tprintf("Computed right-illegal partition\n");
904  Print();
905  }
906  if (it.empty())
907  return;
908  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
909  blob_type() == BRT_POLYIMAGE) {
910  median_top_ = bounding_box_.top();
911  median_bottom_ = bounding_box_.bottom();
912  median_size_ = bounding_box_.height();
913  median_left_ = bounding_box_.left();
914  median_right_ = bounding_box_.right();
915  median_width_ = bounding_box_.width();
916  } else {
917  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
918  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
919  STATS size_stats(0, bounding_box_.height() + 1);
920  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
921  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
922  STATS width_stats(0, bounding_box_.width() + 1);
923  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
924  bbox = it.data();
925  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
926  TBOX box = bbox->bounding_box();
927  int area = box.area();
928  top_stats.add(box.top(), area);
929  bottom_stats.add(box.bottom(), area);
930  size_stats.add(box.height(), area);
931  left_stats.add(box.left(), area);
932  right_stats.add(box.right(), area);
933  width_stats.add(box.width(), area);
934  }
935  }
936  median_top_ = static_cast<int>(top_stats.median() + 0.5);
937  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
938  median_size_ = static_cast<int>(size_stats.median() + 0.5);
939  median_left_ = static_cast<int>(left_stats.median() + 0.5);
940  median_right_ = static_cast<int>(right_stats.median() + 0.5);
941  median_width_ = static_cast<int>(width_stats.median() + 0.5);
942  }
943 
944  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
945  tprintf("Made partition with bad right coords");
946  Print();
947  }
948  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
949  tprintf("Made partition with bad left coords");
950  Print();
951  }
952  // Fix partner lists. The bounding box has changed and partners are stored
953  // in bounding box order, so remove and reinsert this as a partner
954  // of all its partners.
955  for (int upper = 0; upper < 2; ++upper) {
956  ColPartition_CLIST partners;
957  ColPartition_C_IT part_it(&partners);
958  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
959  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
960  ColPartition* partner = part_it.extract();
961  partner->RemovePartner(!upper, this);
962  partner->AddPartner(!upper, this);
963  }
964  }
965  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
966  bounding_box_.bottom())) {
967  tprintf("Recomputed box for partition %p\n", this);
968  Print();
969  }
970 }
static bool WithinTestRegion(int detail_level, int x, int y)
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
void set_right(int x)
Definition: rect.h:78
BlobRegionType blob_type() const
Definition: colpartition.h:148
inT16 right() const
Definition: rect.h:75
void set_left(int x)
Definition: rect.h:71
inT32 area() const
Definition: rect.h:118
int BoxRightKey() const
Definition: colpartition.h:336
bool IsImageType() const
Definition: colpartition.h:423
void set_bottom(int y)
Definition: rect.h:64
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
int textord_debug_bugs
Definition: alignedblob.cpp:28
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
void set_top(int y)
Definition: rect.h:57
inT16 top() const
Definition: rect.h:54
BlobTextFlowType flow() const
Definition: blobbox.h:280
void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 597 of file colpartition.cpp.

597  {
598  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
599  if (boxes_.empty()) {
600  return;
601  }
602 
603  BLOBNBOX_C_IT blob_it(&boxes_);
604  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
605  BLOBNBOX* blob = blob_it.data();
607  special_blobs_densities_[type]++;
608  }
609 
610  for (int type = 0; type < BSTT_COUNT; ++type) {
611  special_blobs_densities_[type] /= boxes_.length();
612  }
613 }
BlobSpecialTextType
Definition: blobbox.h:81
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:274
PolyBlockType type() const
Definition: colpartition.h:181
bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 428 of file colpartition.cpp.

428  {
429  if (bounding_box_.right() < other.bounding_box_.left() &&
430  bounding_box_.right() < other.LeftBlobRule())
431  return false;
432  if (other.bounding_box_.right() < bounding_box_.left() &&
433  other.bounding_box_.right() < LeftBlobRule())
434  return false;
435  if (bounding_box_.left() > other.bounding_box_.right() &&
436  bounding_box_.left() > other.RightBlobRule())
437  return false;
438  if (other.bounding_box_.left() > bounding_box_.right() &&
439  other.bounding_box_.left() > RightBlobRule())
440  return false;
441  return true;
442 }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1766 of file colpartition.cpp.

1766  {
1767  ColPartition* copy = ShallowCopy();
1768  copy->set_owns_blobs(false);
1769  BLOBNBOX_C_IT inserter(copy->boxes());
1770  BLOBNBOX_C_IT traverser(boxes());
1771  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1772  inserter.add_after_then_move(traverser.data());
1773  return copy;
1774 }
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
ColPartition * ShallowCopy() const
void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 534 of file colpartition.cpp.

534  {
535  left_key_tab_ = take_box ? false : src.left_key_tab_;
536  if (left_key_tab_) {
537  left_key_ = src.left_key_;
538  } else {
539  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
540  left_key_ = BoxLeftKey();
541  }
542  if (left_margin_ > bounding_box_.left())
543  left_margin_ = src.left_margin_;
544 }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
void set_left(int x)
Definition: rect.h:71
inT16 left() const
Definition: rect.h:68
void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 547 of file colpartition.cpp.

547  {
548  right_key_tab_ = take_box ? false : src.right_key_tab_;
549  if (right_key_tab_) {
550  right_key_ = src.right_key_;
551  } else {
552  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
553  right_key_ = BoxRightKey();
554  }
555  if (right_margin_ < bounding_box_.right())
556  right_margin_ = src.right_margin_;
557 }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
void set_right(int x)
Definition: rect.h:78
inT16 right() const
Definition: rect.h:75
int BoxRightKey() const
Definition: colpartition.h:336
int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 973 of file colpartition.cpp.

973  {
974  BLOBNBOX_C_IT it(&boxes_);
975  int overlap_count = 0;
976  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
977  BLOBNBOX* bbox = it.data();
978  if (box.overlap(bbox->bounding_box()))
979  ++overlap_count;
980  }
981  return overlap_count;
982 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool overlap(const TBOX &box) const
Definition: rect.h:345
void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 320 of file colpartition.cpp.

320  {
321  // Although the boxes_ list is a C_LIST, in some cases it owns the
322  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
323  // and the BLOBNBOXes own the underlying C_BLOBs.
324  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
325  BLOBNBOX* bblob = bb_it.extract();
326  delete bblob->cblob();
327  delete bblob;
328  }
329 }
C_BLOB * cblob() const
Definition: blobbox.h:253
bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 211 of file colpartition.h.

211  {
212  return desperately_merged_;
213  }
void tesseract::ColPartition::DisownBoxes ( )

Definition at line 278 of file colpartition.cpp.

278  {
279  BLOBNBOX_C_IT bb_it(&boxes_);
280  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
281  BLOBNBOX* bblob = bb_it.data();
282  ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL);
283  bblob->set_owner(NULL);
284  }
285 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define NULL
Definition: host.h:144
void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 291 of file colpartition.cpp.

291  {
292  BLOBNBOX_C_IT bb_it(&boxes_);
293  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
294  BLOBNBOX* bblob = bb_it.data();
295  if (bblob->owner() == this)
296  bblob->set_owner(NULL);
297  }
298 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define NULL
Definition: host.h:144
ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 112 of file colpartition.cpp.

115  {
116  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
117  part->set_type(block_type);
118  part->set_flow(flow);
119  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
120  part->set_left_margin(box.left());
121  part->set_right_margin(box.right());
122  part->SetBlobTypes();
123  part->ComputeLimits();
124  part->ClaimBoxes();
125  return part;
126 }
BlobRegionType blob_type() const
Definition: colpartition.h:148
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
BlobTextFlowType flow() const
Definition: colpartition.h:154
integer coordinate
Definition: points.h:30
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 154 of file colpartition.h.

154  {
155  return flow_;
156  }
int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 160 of file colpartition.h.

160  {
161  return good_blob_score_;
162  }
bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 166 of file colpartition.h.

166  {
167  return good_column_;
168  }
bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 163 of file colpartition.h.

163  {
164  return good_width_;
165  }
bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1294 of file colpartition.cpp.

1294  {
1295  // Approximation of the baseline.
1296  DetLineFit linepoints;
1297  // Calculation of the mean height on this line segment. Note that these
1298  // variable names apply to the context of a horizontal line, and work
1299  // analogously, rather than literally in the case of a vertical line.
1300  int total_height = 0;
1301  int coverage = 0;
1302  int height_count = 0;
1303  int width = 0;
1304  BLOBNBOX_C_IT it(&boxes_);
1305  TBOX box(it.data()->bounding_box());
1306  // Accumulate points representing the baseline at the middle of each blob,
1307  // but add an additional point for each end of the line. This makes it
1308  // harder to fit a severe skew angle, as it is most likely not right.
1309  if (IsVerticalType()) {
1310  // For a vertical line, use the right side as the baseline.
1311  ICOORD first_pt(box.right(), box.bottom());
1312  // Use the bottom-right of the first (bottom) box, the top-right of the
1313  // last, and the middle-right of all others.
1314  linepoints.Add(first_pt);
1315  for (it.forward(); !it.at_last(); it.forward()) {
1316  BLOBNBOX* blob = it.data();
1317  box = blob->bounding_box();
1318  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1319  linepoints.Add(box_pt);
1320  total_height += box.width();
1321  coverage += box.height();
1322  ++height_count;
1323  }
1324  box = it.data()->bounding_box();
1325  ICOORD last_pt(box.right(), box.top());
1326  linepoints.Add(last_pt);
1327  width = last_pt.y() - first_pt.y();
1328 
1329  } else {
1330  // Horizontal lines use the bottom as the baseline.
1331  TBOX box(it.data()->bounding_box());
1332  // Use the bottom-left of the first box, the the bottom-right of the last,
1333  // and the middle of all others.
1334  ICOORD first_pt(box.left(), box.bottom());
1335  linepoints.Add(first_pt);
1336  for (it.forward(); !it.at_last(); it.forward()) {
1337  BLOBNBOX* blob = it.data();
1338  box = blob->bounding_box();
1339  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1340  linepoints.Add(box_pt);
1341  total_height += box.height();
1342  coverage += box.width();
1343  ++height_count;
1344  }
1345  box = it.data()->bounding_box();
1346  ICOORD last_pt(box.right(), box.bottom());
1347  linepoints.Add(last_pt);
1348  width = last_pt.x() - first_pt.x();
1349  }
1350  // Maximum median error allowed to be a good text line.
1351  double max_error = kMaxBaselineError * total_height / height_count;
1352  ICOORD start_pt, end_pt;
1353  double error = linepoints.Fit(&start_pt, &end_pt);
1354  return error < max_error && coverage >= kMinBaselineCoverage * width;
1355 }
const double kMinBaselineCoverage
bool IsVerticalType() const
Definition: colpartition.h:435
inT16 y() const
access_function
Definition: points.h:56
integer coordinate
Definition: points.h:30
const double kMaxBaselineError
inT16 x() const
access function
Definition: points.h:52
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 381 of file colpartition.h.

381  {
382  return MIN(median_right_, other.median_right_) -
383  MAX(median_left_, other.median_left_);
384  }
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN(x, y)
Definition: ndminx.h:28
bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365  {
366  return bounding_box_.x_overlap(other.bounding_box_);
367  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:391
bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243  {
244  return inside_table_column_;
245  }
bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357  {
358  return boxes_.empty();
359  }
bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 453 of file colpartition.h.

453  {
454  return IsHorizontalType() && IsLineType();
455  }
bool IsLineType() const
Definition: colpartition.h:419
bool IsHorizontalType() const
Definition: colpartition.h:439
bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 439 of file colpartition.h.

439  {
440  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
441  }
bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 423 of file colpartition.h.

423  {
424  return PTIsImageType(type_);
425  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:65
bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2183 of file colpartition.cpp.

2183  {
2184  // Overlap does not occur when last < part.first or first > part.last.
2185  // In other words, one is completely to the side of the other.
2186  // This is just DeMorgan's law applied to that so the function returns true.
2187  return (last_column_ >= part.first_column_) &&
2188  (first_column_ <= part.last_column_);
2189 }
bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349  {
350  return bounding_box_.right() < other.bounding_box_.right();
351  }
inT16 right() const
Definition: rect.h:75
bool tesseract::ColPartition::IsLegal ( )

Definition at line 357 of file colpartition.cpp.

357  {
358  if (bounding_box_.left() > bounding_box_.right()) {
359  if (textord_debug_bugs) {
360  tprintf("Bounding box invalid\n");
361  Print();
362  }
363  return false; // Bounding box invalid.
364  }
365  if (left_margin_ > bounding_box_.left() ||
366  right_margin_ < bounding_box_.right()) {
367  if (textord_debug_bugs) {
368  tprintf("Margins invalid\n");
369  Print();
370  }
371  return false; // Margins invalid.
372  }
373  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
374  if (textord_debug_bugs) {
375  tprintf("Key inside box: %d v %d or %d v %d\n",
376  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
377  Print();
378  }
379  return false; // Keys inside the box.
380  }
381  return true;
382 }
#define tprintf(...)
Definition: tprintf.h:31
inT16 right() const
Definition: rect.h:75
int BoxRightKey() const
Definition: colpartition.h:336
inT16 left() const
Definition: rect.h:68
int textord_debug_bugs
Definition: alignedblob.cpp:28
bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 419 of file colpartition.h.

419  {
420  return PTIsLineType(type_);
421  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:61
bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 431 of file colpartition.h.

431  {
432  return PTIsPulloutType(type_);
433  }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:77
bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361  {
362  return boxes_.singleton();
363  }
bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 427 of file colpartition.h.

427  {
428  return PTIsTextType(type_);
429  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 443 of file colpartition.h.

443  {
444  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
445  }
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415
Definition: capi.h:79
bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 448 of file colpartition.h.

448  {
449  return IsVerticalType() && IsLineType();
450  }
bool IsLineType() const
Definition: colpartition.h:419
bool IsVerticalType() const
Definition: colpartition.h:435
bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 435 of file colpartition.h.

435  {
436  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
437  }
int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324  {
325  return (right_key - left_key) / vertical_.y();
326  }
inT16 y() const
access_function
Definition: points.h:56
int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 172 of file colpartition.h.

172  {
173  return left_key_;
174  }
bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 169 of file colpartition.h.

169  {
170  return left_key_tab_;
171  }
int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 112 of file colpartition.h.

112  {
113  return left_margin_;
114  }
int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340  {
341  return XAtY(left_key_, y);
342  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 560 of file colpartition.cpp.

560  {
561  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
562  return it.data()->left_rule();
563 }
void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1419 of file colpartition.cpp.

1424  {
1425  int page_height = tright.y() - bleft.y();
1426  // Compute the initial spacing stats.
1427  ColPartition_IT it(block_parts);
1428  int part_count = 0;
1429  int max_line_height = 0;
1430 
1431  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1432  // because their line spacing with their neighbors maybe smaller and their
1433  // height may be slightly larger.
1434 
1435  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1436  ColPartition* part = it.data();
1437  ASSERT_HOST(!part->boxes()->empty());
1438  STATS side_steps(0, part->bounding_box().height());
1439  if (part->bounding_box().height() > max_line_height)
1440  max_line_height = part->bounding_box().height();
1441  BLOBNBOX_C_IT blob_it(part->boxes());
1442  int prev_bottom = blob_it.data()->bounding_box().bottom();
1443  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1444  BLOBNBOX* blob = blob_it.data();
1445  int bottom = blob->bounding_box().bottom();
1446  int step = bottom - prev_bottom;
1447  if (step < 0)
1448  step = -step;
1449  side_steps.add(step, 1);
1450  prev_bottom = bottom;
1451  }
1452  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1453  if (!it.at_last()) {
1454  ColPartition* next_part = it.data_relative(1);
1455  part->set_bottom_spacing(part->median_bottom() -
1456  next_part->median_bottom());
1457  part->set_top_spacing(part->median_top() - next_part->median_top());
1458  } else {
1459  part->set_bottom_spacing(page_height);
1460  part->set_top_spacing(page_height);
1461  }
1462  if (textord_debug_tabfind) {
1463  part->Print();
1464  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1465  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1466  }
1467  ++part_count;
1468  }
1469  if (part_count == 0)
1470  return;
1471 
1472  SmoothSpacings(resolution, page_height, block_parts);
1473 
1474  // Move the partitions into individual block lists and make the blocks.
1475  BLOCK_IT block_it(completed_blocks);
1476  TO_BLOCK_IT to_block_it(to_blocks);
1477  ColPartition_LIST spacing_parts;
1478  ColPartition_IT sp_block_it(&spacing_parts);
1479  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1480  for (it.mark_cycle_pt(); !it.empty();) {
1481  ColPartition* part = it.extract();
1482  sp_block_it.add_to_end(part);
1483  it.forward();
1484  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1485  !part->SpacingsEqual(*it.data(), resolution)) {
1486  // There is a spacing boundary. Check to see if it.data() belongs
1487  // better in the current block or the next one.
1488  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1489  ColPartition* next_part = it.data();
1490  // If there is a size match one-way, then the middle line goes with
1491  // its matched size, otherwise it goes with the smallest spacing.
1492  ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
1493  if (textord_debug_tabfind) {
1494  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1495  " sizes %d %d %d\n",
1496  part->top_spacing(), part->bottom_spacing(),
1497  next_part->top_spacing(), next_part->bottom_spacing(),
1498  part->median_size(), next_part->median_size(),
1499  third_part != NULL ? third_part->median_size() : 0);
1500  }
1501  // We can only consider adding the next line to the block if the sizes
1502  // match and the lines are close enough for their size.
1503  if (part->SizesSimilar(*next_part) &&
1504  next_part->median_size() * kMaxSameBlockLineSpacing >
1505  part->bottom_spacing() &&
1506  part->median_size() * kMaxSameBlockLineSpacing >
1507  part->top_spacing()) {
1508  // Even now, we can only add it as long as the third line doesn't
1509  // match in the same way and have a smaller bottom spacing.
1510  if (third_part == NULL ||
1511  !next_part->SizesSimilar(*third_part) ||
1512  third_part->median_size() * kMaxSameBlockLineSpacing <=
1513  next_part->bottom_spacing() ||
1514  next_part->median_size() * kMaxSameBlockLineSpacing <=
1515  next_part->top_spacing() ||
1516  next_part->bottom_spacing() > part->bottom_spacing()) {
1517  // Add to the current block.
1518  sp_block_it.add_to_end(it.extract());
1519  it.forward();
1520  if (textord_debug_tabfind) {
1521  tprintf("Added line to current block.\n");
1522  }
1523  }
1524  }
1525  }
1526  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1527  if (to_block != NULL) {
1528  to_block_it.add_to_end(to_block);
1529  block_it.add_to_end(to_block->block);
1530  }
1531  sp_block_it.set_to_list(&spacing_parts);
1532  } else {
1533  if (textord_debug_tabfind && !it.empty()) {
1534  ColPartition* next_part = it.data();
1535  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n",
1536  part->top_spacing(), part->bottom_spacing(),
1537  next_part->top_spacing(), next_part->bottom_spacing(),
1538  part->median_size(), next_part->median_size());
1539  }
1540  }
1541  }
1542 }
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
const double kMaxSameBlockLineSpacing
int textord_debug_tabfind
Definition: alignedblob.cpp:27
inT16 y() const
access_function
Definition: points.h:56
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
BLOCK * block
Definition: blobbox.h:773
ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 199 of file colpartition.h.

199  {
200  return &lower_partners_;
201  }
ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 133 of file colpartition.cpp.

134  {
135  box->set_owner(NULL);
136  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
137  single->set_flow(BTFT_NONE);
138  single->AddBox(box);
139  single->ComputeLimits();
140  single->ClaimBoxes();
141  single->SetBlobTypes();
142  single->set_block_owned(true);
143  if (big_part_list != NULL) {
144  ColPartition_IT part_it(big_part_list);
145  part_it.add_to_end(single);
146  }
147  return single;
148 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
integer coordinate
Definition: points.h:30
#define NULL
Definition: host.h:144
TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1635 of file colpartition.cpp.

1637  {
1638  if (block_parts->empty())
1639  return NULL; // Nothing to do.
1640  ColPartition_IT it(block_parts);
1641  ColPartition* part = it.data();
1642  PolyBlockType type = part->type();
1643  if (type == PT_VERTICAL_TEXT)
1644  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1645  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1646  // put the average spacing in each partition, so we can just take the
1647  // linespacing from the first partition.
1648  int line_spacing = part->bottom_spacing();
1649  if (line_spacing < part->median_size())
1650  line_spacing = part->bounding_box().height();
1651  ICOORDELT_LIST vertices;
1652  ICOORDELT_IT vert_it(&vertices);
1653  ICOORD start, end;
1654  int min_x = MAX_INT32;
1655  int max_x = -MAX_INT32;
1656  int min_y = MAX_INT32;
1657  int max_y = -MAX_INT32;
1658  int iteration = 0;
1659  do {
1660  if (iteration == 0)
1661  ColPartition::LeftEdgeRun(&it, &start, &end);
1662  else
1663  ColPartition::RightEdgeRun(&it, &start, &end);
1664  ClipCoord(bleft, tright, &start);
1665  ClipCoord(bleft, tright, &end);
1666  vert_it.add_after_then_move(new ICOORDELT(start));
1667  vert_it.add_after_then_move(new ICOORDELT(end));
1668  UpdateRange(start.x(), &min_x, &max_x);
1669  UpdateRange(end.x(), &min_x, &max_x);
1670  UpdateRange(start.y(), &min_y, &max_y);
1671  UpdateRange(end.y(), &min_y, &max_y);
1672  if ((iteration == 0 && it.at_first()) ||
1673  (iteration == 1 && it.at_last())) {
1674  ++iteration;
1675  it.move_to_last();
1676  }
1677  } while (iteration < 2);
1679  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1680  min_x, min_y, max_x, max_y);
1681  BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1682  block->set_poly_block(new POLY_BLOCK(&vertices, type));
1683  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1684 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
#define tprintf(...)
Definition: tprintf.h:31
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
int median_size() const
Definition: colpartition.h:136
int textord_debug_tabfind
Definition: alignedblob.cpp:27
inT16 y() const
access_function
Definition: points.h:56
Definition: ocrblock.h:30
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
#define MAX_INT32
Definition: host.h:120
integer coordinate
Definition: points.h:30
PolyBlockType type() const
Definition: colpartition.h:181
inT16 x() const
access function
Definition: points.h:52
#define NULL
Definition: host.h:144
PolyBlockType
Definition: publictypes.h:41
ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 165 of file colpartition.cpp.

168  {
169  ColPartition* part = new ColPartition(blob_type, vertical);
170  part->bounding_box_ = TBOX(left, bottom, right, top);
171  part->median_bottom_ = bottom;
172  part->median_top_ = top;
173  part->median_size_ = top - bottom;
174  part->median_width_ = right - left;
175  part->left_key_ = part->BoxLeftKey();
176  part->right_key_ = part->BoxRightKey();
177  return part;
178 }
BlobRegionType blob_type() const
Definition: colpartition.h:148
Definition: rect.h:30
TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1714 of file colpartition.cpp.

1714  {
1715  BLOBNBOX_C_IT blob_it(&boxes_);
1716  TO_ROW* row = NULL;
1717  int line_size = IsVerticalType() ? median_width_ : median_size_;
1718  // Add all the blobs to a single TO_ROW.
1719  for (; !blob_it.empty(); blob_it.forward()) {
1720  BLOBNBOX* blob = blob_it.extract();
1721 // blob->compute_bounding_box();
1722  int top = blob->bounding_box().top();
1723  int bottom = blob->bounding_box().bottom();
1724  if (row == NULL) {
1725  row = new TO_ROW(blob, static_cast<float>(top),
1726  static_cast<float>(bottom),
1727  static_cast<float>(line_size));
1728  } else {
1729  row->add_blob(blob, static_cast<float>(top),
1730  static_cast<float>(bottom),
1731  static_cast<float>(line_size));
1732  }
1733  }
1734  return row;
1735 }
bool IsVerticalType() const
Definition: colpartition.h:435
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:726
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 top() const
Definition: rect.h:54
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1688 of file colpartition.cpp.

1691  {
1692  if (block_parts->empty())
1693  return NULL; // Nothing to do.
1694  ColPartition_IT it(block_parts);
1695  ColPartition* part = it.data();
1696  TBOX block_box = part->bounding_box();
1697  int line_spacing = block_box.width();
1698  PolyBlockType type = it.data()->type();
1699  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1700  block_box += it.data()->bounding_box();
1701  }
1702  if (textord_debug_tabfind) {
1703  tprintf("Making block at:");
1704  block_box.print();
1705  }
1706  BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1707  block_box.right(), block_box.top());
1708  block->set_poly_block(new POLY_BLOCK(block_box, type));
1709  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1710 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:63
#define tprintf(...)
Definition: tprintf.h:31
void print() const
Definition: rect.h:270
inT16 right() const
Definition: rect.h:75
int textord_debug_tabfind
Definition: alignedblob.cpp:27
inT16 left() const
Definition: rect.h:68
Definition: ocrblock.h:30
inT16 bottom() const
Definition: rect.h:61
PolyBlockType type() const
Definition: colpartition.h:181
inT16 width() const
Definition: rect.h:111
Definition: rect.h:30
#define NULL
Definition: host.h:144
PolyBlockType
Definition: publictypes.h:41
inT16 top() const
Definition: rect.h:54
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1096 of file colpartition.cpp.

1096  {
1097  bool result = false;
1098  // Gather statistics on the gaps between blobs and the widths of the blobs.
1099  int part_width = bounding_box_.width();
1100  STATS gap_stats(0, part_width);
1101  STATS width_stats(0, part_width);
1102  BLOBNBOX_C_IT it(&boxes_);
1103  BLOBNBOX* prev_blob = it.data();
1104  prev_blob->set_flow(BTFT_NEIGHBOURS);
1105  width_stats.add(prev_blob->bounding_box().width(), 1);
1106  int blob_count = 1;
1107  for (it.forward(); !it.at_first(); it.forward()) {
1108  BLOBNBOX* blob = it.data();
1109  int left = blob->bounding_box().left();
1110  int right = blob->bounding_box().right();
1111  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1112  width_stats.add(right - left, 1);
1113  blob->set_flow(BTFT_NEIGHBOURS);
1114  prev_blob = blob;
1115  ++blob_count;
1116  }
1117  double median_gap = gap_stats.median();
1118  double median_width = width_stats.median();
1119  double max_width = MAX(median_gap, median_width);
1120  double min_width = MIN(median_gap, median_width);
1121  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1122  if (textord_debug_tabfind >= 4) {
1123  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1124  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1125  min_width * kMaxLeaderGapFractionOfMin);
1126  }
1127  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1128  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1129  blob_count >= kMinLeaderCount) {
1130  // This is stable enough to be called a leader, so check the widths.
1131  // Since leader dashes can join, run a dp cutting algorithm and go
1132  // on the cost.
1133  int offset = static_cast<int>(ceil(gap_iqr * 2));
1134  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1135  int max_step = min_step + offset;
1136  min_step -= offset;
1137  // Pad the buffer with min_step/2 on each end.
1138  int part_left = bounding_box_.left() - min_step / 2;
1139  part_width += min_step;
1140  DPPoint* projection = new DPPoint[part_width];
1141  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1142  BLOBNBOX* blob = it.data();
1143  int left = blob->bounding_box().left();
1144  int right = blob->bounding_box().right();
1145  int height = blob->bounding_box().height();
1146  for (int x = left; x < right; ++x) {
1147  projection[left - part_left].AddLocalCost(height);
1148  }
1149  }
1150  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1152  part_width, projection);
1153  if (best_end != NULL && best_end->total_cost() < blob_count) {
1154  // Good enough. Call it a leader.
1155  result = true;
1156  bool modified_blob_list = false;
1157  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1158  BLOBNBOX* blob = it.data();
1159  TBOX box = blob->bounding_box();
1160  // If the first or last blob is spaced too much, don't mark it.
1161  if (it.at_first()) {
1162  int gap = it.data_relative(1)->bounding_box().left() -
1163  blob->bounding_box().right();
1164  if (blob->bounding_box().width() + gap > max_step) {
1165  it.extract();
1166  modified_blob_list = true;
1167  continue;
1168  }
1169  }
1170  if (it.at_last()) {
1171  int gap = blob->bounding_box().left() -
1172  it.data_relative(-1)->bounding_box().right();
1173  if (blob->bounding_box().width() + gap > max_step) {
1174  it.extract();
1175  modified_blob_list = true;
1176  break;
1177  }
1178  }
1179  blob->set_region_type(BRT_TEXT);
1180  blob->set_flow(BTFT_LEADER);
1181  }
1182  if (modified_blob_list) ComputeLimits();
1183  blob_type_ = BRT_TEXT;
1184  flow_ = BTFT_LEADER;
1185  } else if (textord_debug_tabfind) {
1186  if (best_end == NULL) {
1187  tprintf("No path\n");
1188  } else {
1189  tprintf("Total cost = %d vs allowed %d\n",
1190  best_end->total_cost() < blob_count);
1191  }
1192  }
1193  delete [] projection;
1194  }
1195  return result;
1196 }
#define MAX(x, y)
Definition: ndminx.h:24
#define tprintf(...)
Definition: tprintf.h:31
#define MIN(x, y)
Definition: ndminx.h:28
Definition: statistc.h:33
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:30
int median_width() const
Definition: colpartition.h:142
inT16 right() const
Definition: rect.h:75
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:283
int textord_debug_tabfind
Definition: alignedblob.cpp:27
inT16 left() const
Definition: rect.h:68
inT64 CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:68
const int kMinLeaderCount
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
const double kMaxLeaderGapFractionOfMin
Definition: rect.h:30
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
const double kMaxLeaderGapFractionOfMax
bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 385 of file colpartition.cpp.

385  {
386  int y = (MidY() + other.MidY()) / 2;
387  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
388  LeftAtY(y) / kColumnWidthFactor, 1))
389  return false;
390  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
391  RightAtY(y) / kColumnWidthFactor, 1))
392  return false;
393  return true;
394 }
int LeftAtY(int y) const
Definition: colpartition.h:340
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:148
int RightAtY(int y) const
Definition: colpartition.h:344
const int kColumnWidthFactor
Definition: tabfind.h:42
bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 420 of file colpartition.cpp.

420  {
421  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
422  return !TabFind::DifferentSizes(median_width_, other.median_width_);
423  else
424  return !TabFind::DifferentSizes(median_size_, other.median_size_);
425 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:429
bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 445 of file colpartition.cpp.

447  {
448  int match_count = 0;
449  int nonmatch_count = 0;
450  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
451  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
452  box_it.mark_cycle_pt();
453  other_it.mark_cycle_pt();
454  while (!box_it.cycled_list() && !other_it.cycled_list()) {
455  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
456  fractional_tolerance,
457  constant_tolerance))
458  ++match_count;
459  else
460  ++nonmatch_count;
461  box_it.forward();
462  other_it.forward();
463  }
464  return match_count > nonmatch_count;
465 }
bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 397 of file colpartition.cpp.

397  {
398  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
399  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
400  return false; // Too noisy.
401 
402  // Colors must match for other to count.
403  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
404  other.color2_,
405  color1_);
406  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
407  other.color2_,
408  color2_);
409  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
410  other.color1_);
411  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
412  other.color2_);
413 // All 4 distances must be small enough.
414  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
415  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
416 }
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
Definition: imagefind.cpp:331
const int kMaxColorDistance
const int kMaxRMSColorNoise
int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 127 of file colpartition.h.

127  {
128  return median_bottom_;
129  }
int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 130 of file colpartition.h.

130  {
131  return median_left_;
132  }
int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 133 of file colpartition.h.

133  {
134  return median_right_;
135  }
int tesseract::ColPartition::median_size ( ) const
inline

Definition at line 136 of file colpartition.h.

136  {
137  return median_size_;
138  }
int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 124 of file colpartition.h.

124  {
125  return median_top_;
126  }
int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 142 of file colpartition.h.

142  {
143  return median_width_;
144  }
int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308  {
309  return (median_top_ + median_bottom_) / 2;
310  }
int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312  {
313  return (bounding_box_.left() + bounding_box_.right()) / 2;
314  }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304  {
305  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306  }
inT16 bottom() const
Definition: rect.h:61
inT16 top() const
Definition: rect.h:54
ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249  {
250  return nearest_neighbor_above_;
251  }
ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255  {
256  return nearest_neighbor_below_;
257  }
bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 473 of file colpartition.cpp.

474  {
475  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
476  int min_top = MAX_INT32;
477  int max_bottom = -MAX_INT32;
478  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
479  BLOBNBOX* blob = it.data();
480  if (!blob->IsDiacritic()) {
481  if (debug) {
482  tprintf("Blob is not a diacritic:");
483  blob->bounding_box().print();
484  }
485  return false; // All blobs must have diacritic bases.
486  }
487  if (blob->base_char_top() < min_top)
488  min_top = blob->base_char_top();
489  if (blob->base_char_bottom() > max_bottom)
490  max_bottom = blob->base_char_bottom();
491  }
492  // If the intersection of all vertical ranges of all base characters
493  // overlaps the median range of this, then it is OK.
494  bool result = min_top > candidate.median_bottom_ &&
495  max_bottom < candidate.median_top_;
496  if (debug) {
497  if (result)
498  tprintf("OKDiacritic!\n");
499  else
500  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
501  max_bottom, min_top, median_bottom_, median_top_);
502  }
503  return result;
504 }
#define tprintf(...)
Definition: tprintf.h:31
void print() const
Definition: rect.h:270
#define MAX_INT32
Definition: host.h:120
bool IsDiacritic() const
Definition: blobbox.h:365
const TBOX & bounding_box() const
Definition: blobbox.h:215
int base_char_bottom() const
Definition: blobbox.h:371
int base_char_top() const
Definition: blobbox.h:368
bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 749 of file colpartition.cpp.

751  {
752  // Vertical partitions are not allowed to be involved.
753  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
754  if (debug)
755  tprintf("Vertical partition\n");
756  return false;
757  }
758  // The merging partitions must strongly overlap each other.
759  if (!merge1.VSignificantCoreOverlap(merge2)) {
760  if (debug)
761  tprintf("Voverlap %d (%d)\n",
762  merge1.VCoreOverlap(merge2),
763  merge1.VSignificantCoreOverlap(merge2));
764  return false;
765  }
766  // The merged box must not overlap the median bounds of this.
767  TBOX merged_box(merge1.bounding_box());
768  merged_box += merge2.bounding_box();
769  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
770  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
771  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
772  if (debug)
773  tprintf("Excessive box overlap\n");
774  return false;
775  }
776  // Looks OK!
777  return true;
778 }
bool IsVerticalType() const
Definition: colpartition.h:435
#define tprintf(...)
Definition: tprintf.h:31
inT16 bottom() const
Definition: rect.h:61
Definition: rect.h:30
inT16 top() const
Definition: rect.h:54
BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 782 of file colpartition.cpp.

782  {
783  if (boxes_.empty() || boxes_.singleton())
784  return NULL;
785  BLOBNBOX_C_IT it(&boxes_);
786  TBOX left_box(it.data()->bounding_box());
787  for (it.forward(); !it.at_first(); it.forward()) {
788  BLOBNBOX* bbox = it.data();
789  left_box += bbox->bounding_box();
790  if (left_box.overlap(box))
791  return bbox;
792  }
793  return NULL;
794 }
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291  {
292  return owns_blobs_;
293  }
PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1019 of file colpartition.cpp.

1019  {
1020  if (flow == CST_NOISE) {
1021  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1022  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1023  return PT_NOISE;
1024  flow = CST_FLOWING;
1025  }
1026 
1027  switch (blob_type_) {
1028  case BRT_NOISE:
1029  return PT_NOISE;
1030  case BRT_HLINE:
1031  return PT_HORZ_LINE;
1032  case BRT_VLINE:
1033  return PT_VERT_LINE;
1034  case BRT_RECTIMAGE:
1035  case BRT_POLYIMAGE:
1036  switch (flow) {
1037  case CST_FLOWING:
1038  return PT_FLOWING_IMAGE;
1039  case CST_HEADING:
1040  return PT_HEADING_IMAGE;
1041  case CST_PULLOUT:
1042  return PT_PULLOUT_IMAGE;
1043  default:
1044  ASSERT_HOST(!"Undefined flow type for image!");
1045  }
1046  break;
1047  case BRT_VERT_TEXT:
1048  return PT_VERTICAL_TEXT;
1049  case BRT_TEXT:
1050  case BRT_UNKNOWN:
1051  default:
1052  switch (flow) {
1053  case CST_FLOWING:
1054  return PT_FLOWING_TEXT;
1055  case CST_HEADING:
1056  return PT_HEADING_TEXT;
1057  case CST_PULLOUT:
1058  return PT_PULLOUT_TEXT;
1059  default:
1060  ASSERT_HOST(!"Undefined flow type for text!");
1061  }
1062  }
1063  ASSERT_HOST(!"Should never get here!");
1064  return PT_NOISE;
1065 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
BlobTextFlowType flow() const
Definition: colpartition.h:154
Definition: capi.h:79
void tesseract::ColPartition::Print ( ) const

Definition at line 1790 of file colpartition.cpp.

1790  {
1791  int y = MidY();
1792  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1793  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1794  " ts=%d bs=%d ls=%d rs=%d\n",
1795  boxes_.empty() ? 'E' : ' ',
1796  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1797  bounding_box_.left(), median_left_,
1798  bounding_box_.bottom(), median_bottom_,
1799  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1800  right_margin_, median_right_, bounding_box_.top(), median_top_,
1801  good_width_, good_column_, type_,
1802  kBlobTypes[blob_type_], flow_,
1803  first_column_, last_column_, boxes_.length(),
1804  space_above_, space_below_, space_to_left_, space_to_right_);
1805 }
#define tprintf(...)
Definition: tprintf.h:31
int LeftAtY(int y) const
Definition: colpartition.h:340
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
inT16 bottom() const
Definition: rect.h:61
int RightAtY(int y) const
Definition: colpartition.h:344
inT16 top() const
Definition: rect.h:54
void tesseract::ColPartition::PrintColors ( )

Definition at line 1808 of file colpartition.cpp.

1808  {
1809  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1810  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1811  color1_[L_ALPHA_CHANNEL],
1812  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1813 }
#define tprintf(...)
Definition: tprintf.h:31
void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desparate,
ColPartitionGrid grid 
)

Definition at line 1885 of file colpartition.cpp.

1886  {
1887  if (TypesSimilar(type_, type)) {
1888  RefinePartnersInternal(true, get_desperate, grid);
1889  RefinePartnersInternal(false, get_desperate, grid);
1890  } else if (type == PT_COUNT) {
1891  // This is the final pass. Make sure only the correctly typed
1892  // partners surivive, however many there are.
1893  RefinePartnersByType(true, &upper_partners_);
1894  RefinePartnersByType(false, &lower_partners_);
1895  // It is possible for a merge to have given a partition multiple
1896  // partners again, so the last resort is to use overlap which is
1897  // guaranteed to leave at most one partner left.
1898  if (!upper_partners_.empty() && !upper_partners_.singleton())
1899  RefinePartnersByOverlap(true, &upper_partners_);
1900  if (!lower_partners_.empty() && !lower_partners_.singleton())
1901  RefinePartnersByOverlap(false, &lower_partners_);
1902  }
1903 }
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:412
Definition: capi.h:79
PolyBlockType type() const
Definition: colpartition.h:181
void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 335 of file colpartition.cpp.

335  {
336  BLOBNBOX_CLIST reversed_boxes;
337  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
338  // Reverse the order of the boxes_.
339  BLOBNBOX_C_IT bb_it(&boxes_);
340  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
341  reversed_it.add_before_then_move(bb_it.extract());
342  }
343  bb_it.add_list_after(&reversed_boxes);
344  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
345  int tmp = left_margin_;
346  left_margin_ = -right_margin_;
347  right_margin_ = -tmp;
348  ComputeLimits();
349 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 304 of file colpartition.cpp.

304  {
305  BLOBNBOX_C_IT bb_it(&boxes_);
306  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
307  BLOBNBOX* bblob = bb_it.data();
308  if (bblob->flow() != BTFT_LEADER) {
309  if (bblob->owner() == this) bblob->set_owner(NULL);
310  bb_it.extract();
311  }
312  }
313  if (bb_it.empty()) return false;
314  flow_ = BTFT_LEADER;
315  ComputeLimits();
316  return true;
317 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define NULL
Definition: host.h:144
BlobTextFlowType flow() const
Definition: blobbox.h:280
void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 217 of file colpartition.cpp.

217  {
218  BLOBNBOX_C_IT bb_it(&boxes_);
219  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
220  if (box == bb_it.data()) {
221  bb_it.extract();
222  ComputeLimits();
223  return;
224  }
225  }
226 }
void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 633 of file colpartition.cpp.

633  {
634  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
635  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
636  if (it.data() == partner) {
637  it.extract();
638  break;
639  }
640  }
641 }
int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 178 of file colpartition.h.

178  {
179  return right_key_;
180  }
bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 175 of file colpartition.h.

175  {
176  return right_key_tab_;
177  }
int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 118 of file colpartition.h.

118  {
119  return right_margin_;
120  }
int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344  {
345  return XAtY(right_key_, y);
346  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 565 of file colpartition.cpp.

565  {
566  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
567  it.move_to_last();
568  return it.data()->right_rule();
569 }
void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 151 of file colpartition.h.

151  {
152  blob_type_ = t;
153  }
void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 208 of file colpartition.h.

208  {
209  block_owned_ = owned;
210  }
void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 223 of file colpartition.h.

223  {
224  bottom_spacing_ = spacing;
225  }
void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 708 of file colpartition.h.

708  {
709  first_column_ = column;
710  }
void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 157 of file colpartition.h.

157  {
158  flow_ = f;
159  }
void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246  {
247  inside_table_column_ = val;
248  }
void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 711 of file colpartition.h.

711  {
712  last_column_ = column;
713  }
void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 115 of file colpartition.h.

115  {
116  left_margin_ = margin;
117  }
void tesseract::ColPartition::set_median_size ( int  size)
inline

Definition at line 139 of file colpartition.h.

139  {
140  median_size_ = size;
141  }
void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 145 of file colpartition.h.

145  {
146  median_width_ = width;
147  }
void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252  {
253  nearest_neighbor_above_ = part;
254  }
void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258  {
259  nearest_neighbor_below_ = part;
260  }
void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294  {
295  // Do NOT change ownership flag when there are blobs in the list.
296  // Immediately set the ownership flag when creating copies.
297  ASSERT_HOST(boxes_.empty());
298  owns_blobs_ = owns_blobs;
299  }
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool owns_blobs() const
Definition: colpartition.h:291
void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 121 of file colpartition.h.

121  {
122  right_margin_ = margin;
123  }
void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 217 of file colpartition.h.

217  {
218  side_step_ = step;
219  }
void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264  {
265  space_above_ = space;
266  }
void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270  {
271  space_below_ = space;
272  }
void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276  {
277  space_to_left_ = space;
278  }
void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282  {
283  space_to_right_ = space;
284  }
void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 233 of file colpartition.h.

233  {
234  if (type_ != PT_TABLE) {
235  type_before_table_ = type_;
236  type_ = PT_TABLE;
237  }
238  }
Definition: capi.h:78
void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 229 of file colpartition.h.

229  {
230  top_spacing_ = spacing;
231  }
void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 184 of file colpartition.h.

184  {
185  type_ = t;
186  }
void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 193 of file colpartition.h.

193  {
194  vertical_ = v;
195  }
void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 202 of file colpartition.h.

202  {
203  working_set_ = working_set;
204  }
void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1279 of file colpartition.cpp.

1279  {
1280  if (!owns_blobs())
1281  return;
1282  BLOBNBOX_C_IT it(&boxes_);
1283  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1284  BLOBNBOX* blob = it.data();
1285  if (blob->flow() != BTFT_LEADER)
1286  blob->set_flow(flow_);
1287  blob->set_region_type(blob_type_);
1288  ASSERT_HOST(blob->owner() == NULL || blob->owner() == this);
1289  }
1290 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:283
bool owns_blobs() const
Definition: colpartition.h:291
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
#define NULL
Definition: host.h:144
BlobTextFlowType flow() const
Definition: blobbox.h:280
void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1083 of file colpartition.cpp.

1083  {
1084  int y = MidY();
1085  int width = RightAtY(y) - LeftAtY(y);
1086  good_width_ = cb->Run(width);
1087  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1088 }
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344
void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 509 of file colpartition.cpp.

509  {
510  if (tab_vector != NULL) {
511  left_key_ = tab_vector->sort_key();
512  left_key_tab_ = left_key_ <= BoxLeftKey();
513  } else {
514  left_key_tab_ = false;
515  }
516  if (!left_key_tab_)
517  left_key_ = BoxLeftKey();
518 }
#define NULL
Definition: host.h:144
void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 986 of file colpartition.cpp.

986  {
987  int first_spanned_col = -1;
988  ColumnSpanningType span_type =
989  columns->SpanningType(resolution,
990  bounding_box_.left(), bounding_box_.right(),
991  MIN(bounding_box_.height(), bounding_box_.width()),
992  MidY(), left_margin_, right_margin_,
993  &first_column_, &last_column_,
994  &first_spanned_col);
995  column_set_ = columns;
996  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
997  !IsLineType()) {
998  // Unequal columns may indicate that the pullout spans one of the columns
999  // it lies in, so force it to be allocated to just that column.
1000  if (first_spanned_col >= 0) {
1001  first_column_ = first_spanned_col;
1002  last_column_ = first_spanned_col;
1003  } else {
1004  if ((first_column_ & 1) == 0)
1005  last_column_ = first_column_;
1006  else if ((last_column_ & 1) == 0)
1007  first_column_ = last_column_;
1008  else
1009  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1010  }
1011  }
1012  type_ = PartitionType(span_type);
1013 }
bool IsLineType() const
Definition: colpartition.h:419
#define MIN(x, y)
Definition: ndminx.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
PolyBlockType PartitionType(ColumnSpanningType flow) const
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1205 of file colpartition.cpp.

1205  {
1206  int blob_count = 0; // Total # blobs.
1207  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1208  int noisy_count = 0; // Total # neighbours marked as noise.
1209  int hline_count = 0;
1210  int vline_count = 0;
1211  BLOBNBOX_C_IT it(&boxes_);
1212  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1213  BLOBNBOX* blob = it.data();
1214  ++blob_count;
1215  noisy_count += blob->NoisyNeighbours();
1216  good_blob_score_ += blob->GoodTextBlob();
1217  if (blob->region_type() == BRT_HLINE) ++hline_count;
1218  if (blob->region_type() == BRT_VLINE) ++vline_count;
1219  }
1220  flow_ = BTFT_NEIGHBOURS;
1221  blob_type_ = BRT_UNKNOWN;
1222  if (hline_count > vline_count) {
1223  flow_ = BTFT_NONE;
1224  blob_type_ = BRT_HLINE;
1225  } else if (vline_count > hline_count) {
1226  flow_ = BTFT_NONE;
1227  blob_type_ = BRT_VLINE;
1228  } else if (value < -1 || 1 < value) {
1229  int long_side;
1230  int short_side;
1231  if (value > 0) {
1232  long_side = bounding_box_.width();
1233  short_side = bounding_box_.height();
1234  blob_type_ = BRT_TEXT;
1235  } else {
1236  long_side = bounding_box_.height();
1237  short_side = bounding_box_.width();
1238  blob_type_ = BRT_VERT_TEXT;
1239  }
1240  // We will combine the old metrics using aspect ratio and blob counts
1241  // with the input value by allowing a strong indication to flip the
1242  // STRONG_CHAIN/CHAIN flow values.
1243  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1244  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1245  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1246  if (abs(value) >= kMinStrongTextValue)
1247  flow_ = BTFT_STRONG_CHAIN;
1248  else if (abs(value) >= kMinChainTextValue)
1249  flow_ = BTFT_CHAIN;
1250  else
1251  flow_ = BTFT_NEIGHBOURS;
1252  // Upgrade chain to strong chain if the other indicators are good
1253  if (flow_ == BTFT_CHAIN && strong_score == 3)
1254  flow_ = BTFT_STRONG_CHAIN;
1255  // Downgrade strong vertical text to chain if the indicators are bad.
1256  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1257  flow_ = BTFT_CHAIN;
1258  }
1259  if (flow_ == BTFT_NEIGHBOURS) {
1260  // Check for noisy neighbours.
1261  if (noisy_count >= blob_count) {
1262  flow_ = BTFT_NONTEXT;
1263  blob_type_= BRT_NOISE;
1264  }
1265  }
1266  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1267  bounding_box_.bottom())) {
1268  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1269  blob_count, noisy_count, good_blob_score_);
1270  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1271  value, flow_, blob_type_);
1272  Print();
1273  }
1274  SetBlobTypes();
1275 }
static bool WithinTestRegion(int detail_level, int x, int y)
const int kHorzStrongTextlineHeight
const int kMinStrongTextValue
int NoisyNeighbours() const
Definition: blobbox.cpp:230
#define tprintf(...)
Definition: tprintf.h:31
const int kHorzStrongTextlineAspect
inT16 left() const
Definition: rect.h:68
BlobRegionType region_type() const
Definition: blobbox.h:268
const int kMinChainTextValue
inT16 bottom() const
Definition: rect.h:61
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
const int kHorzStrongTextlineCount
int GoodTextBlob() const
Definition: blobbox.cpp:219
void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 521 of file colpartition.cpp.

521  {
522  if (tab_vector != NULL) {
523  right_key_ = tab_vector->sort_key();
524  right_key_tab_ = right_key_ >= BoxRightKey();
525  } else {
526  right_key_tab_ = false;
527  }
528  if (!right_key_tab_)
529  right_key_ = BoxRightKey();
530 }
int BoxRightKey() const
Definition: colpartition.h:336
#define NULL
Definition: host.h:144
void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 591 of file colpartition.cpp.

592  {
594  special_blobs_densities_[type] = density;
595 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:181
ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1739 of file colpartition.cpp.

1739  {
1740  ColPartition* part = new ColPartition(blob_type_, vertical_);
1741  part->left_margin_ = left_margin_;
1742  part->right_margin_ = right_margin_;
1743  part->bounding_box_ = bounding_box_;
1744  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1745  sizeof(special_blobs_densities_));
1746  part->median_bottom_ = median_bottom_;
1747  part->median_top_ = median_top_;
1748  part->median_size_ = median_size_;
1749  part->median_left_ = median_left_;
1750  part->median_right_ = median_right_;
1751  part->median_width_ = median_width_;
1752  part->good_width_ = good_width_;
1753  part->good_column_ = good_column_;
1754  part->left_key_tab_ = left_key_tab_;
1755  part->right_key_tab_ = right_key_tab_;
1756  part->type_ = type_;
1757  part->flow_ = flow_;
1758  part->left_key_ = left_key_;
1759  part->right_key_ = right_key_;
1760  part->first_column_ = first_column_;
1761  part->last_column_ = last_column_;
1762  part->owns_blobs_ = false;
1763  return part;
1764 }
ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 644 of file colpartition.cpp.

644  {
645  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
646  if (!partners->singleton())
647  return NULL;
648  ColPartition_C_IT it(partners);
649  return it.data();
650 }
#define NULL
Definition: host.h:144
void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1816 of file colpartition.cpp.

1816  {
1817  STATS left_stats(0, working_set_count);
1818  STATS right_stats(0, working_set_count);
1819  PolyBlockType max_type = type_;
1820  ColPartition* partner;
1821  for (partner = SingletonPartner(false); partner != NULL;
1822  partner = partner->SingletonPartner(false)) {
1823  if (partner->type_ > max_type)
1824  max_type = partner->type_;
1825  if (column_set_ == partner->column_set_) {
1826  left_stats.add(partner->first_column_, 1);
1827  right_stats.add(partner->last_column_, 1);
1828  }
1829  }
1830  type_ = max_type;
1831  // TODO(rays) Either establish that it isn't necessary to set the columns,
1832  // or find a way to do it that does not cause an assert failure in
1833  // AddToWorkingSet.
1834 #if 0
1835  first_column_ = left_stats.mode();
1836  last_column_ = right_stats.mode();
1837  if (last_column_ < first_column_)
1838  last_column_ = first_column_;
1839 #endif
1840 
1841  for (partner = SingletonPartner(false); partner != NULL;
1842  partner = partner->SingletonPartner(false)) {
1843  partner->type_ = max_type;
1844 #if 0 // See TODO above
1845  if (column_set_ == partner->column_set_) {
1846  partner->first_column_ = first_column_;
1847  partner->last_column_ = last_column_;
1848  }
1849 #endif
1850  }
1851 }
Definition: statistc.h:33
ColPartition * SingletonPartner(bool upper)
#define NULL
Definition: host.h:144
PolyBlockType
Definition: publictypes.h:41
int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316  {
317  return TabVector::SortKey(vertical_, x, y);
318  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261  {
262  return space_above_;
263  }
int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267  {
268  return space_below_;
269  }
int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273  {
274  return space_to_left_;
275  }
int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279  {
280  return space_to_right_;
281  }
int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 576 of file colpartition.cpp.

576  {
578  BLOBNBOX_C_IT blob_it(&boxes_);
579  int count = 0;
580  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
581  BLOBNBOX* blob = blob_it.data();
583  if (blob_type == type) {
584  count++;
585  }
586  }
587 
588  return count;
589 }
BlobSpecialTextType
Definition: blobbox.h:81
BlobRegionType blob_type() const
Definition: colpartition.h:148
#define ASSERT_HOST(x)
Definition: errcode.h:84
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:274
PolyBlockType type() const
Definition: colpartition.h:181
int count(LIST var_list)
Definition: oldlist.cpp:108
float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 571 of file colpartition.cpp.

571  {
573  return special_blobs_densities_[type];
574 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:181
ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 836 of file colpartition.cpp.

836  {
837  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
838  return NULL; // There will be no change.
839  ColPartition* split_part = ShallowCopy();
840  split_part->set_owns_blobs(owns_blobs());
841  BLOBNBOX_C_IT it(&boxes_);
842  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
843  BLOBNBOX* bbox = it.data();
844  ColPartition* prev_owner = bbox->owner();
845  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
846  const TBOX& box = bbox->bounding_box();
847  if (box.left() >= split_x) {
848  split_part->AddBox(it.extract());
849  if (owns_blobs() && prev_owner != NULL)
850  bbox->set_owner(split_part);
851  }
852  }
853  if (it.empty()) {
854  // Possible if split-x passes through the first blob.
855  it.add_list_after(&split_part->boxes_);
856  }
857  ASSERT_HOST(!it.empty());
858  if (split_part->IsEmpty()) {
859  // Split part ended up with nothing. Possible if split_x passes
860  // through the last blob.
861  delete split_part;
862  return NULL;
863  }
864  right_key_tab_ = false;
865  split_part->left_key_tab_ = false;
866  right_margin_ = split_x;
867  split_part->left_margin_ = split_x;
868  ComputeLimits();
869  split_part->ComputeLimits();
870  return split_part;
871 }
ColPartition * ShallowCopy() const
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
inT16 right() const
Definition: rect.h:75
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool owns_blobs() const
Definition: colpartition.h:291
inT16 left() const
Definition: rect.h:68
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
Definition: rect.h:30
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 800 of file colpartition.cpp.

800  {
801  ColPartition* split_part = ShallowCopy();
802  split_part->set_owns_blobs(owns_blobs());
803  BLOBNBOX_C_IT it(&boxes_);
804  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
805  BLOBNBOX* bbox = it.data();
806  ColPartition* prev_owner = bbox->owner();
807  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
808  if (bbox == split_blob || !split_part->boxes_.empty()) {
809  split_part->AddBox(it.extract());
810  if (owns_blobs() && prev_owner != NULL)
811  bbox->set_owner(split_part);
812  }
813  }
814  ASSERT_HOST(!it.empty());
815  if (split_part->IsEmpty()) {
816  // Split part ended up with nothing. Possible if split_blob is not
817  // in the list of blobs.
818  delete split_part;
819  return NULL;
820  }
821  right_key_tab_ = false;
822  split_part->left_key_tab_ = false;
823  ComputeLimits();
824  // TODO(nbeato) Merge Ray's CL like this:
825  // if (owns_blobs())
826  // SetBlobTextlineGoodness();
827  split_part->ComputeLimits();
828  // TODO(nbeato) Merge Ray's CL like this:
829  // if (split_part->owns_blobs())
830  // split_part->SetBlobTextlineGoodness();
831  return split_part;
832 }
ColPartition * ShallowCopy() const
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool owns_blobs() const
Definition: colpartition.h:291
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
#define NULL
Definition: host.h:144
int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 226 of file colpartition.h.

226  {
227  return top_spacing_;
228  }
PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 181 of file colpartition.h.

181  {
182  return type_;
183  }
bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 403 of file colpartition.h.

403  {
404  return TypesMatch(blob_type_, other.blob_type_);
405  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:403
static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 406 of file colpartition.h.

406  {
407  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
408  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
409  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:411
static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412  {
413  return (type1 == type2 ||
414  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
415  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
416  }
ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 196 of file colpartition.h.

196  {
197  return &upper_partners_;
198  }
int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375  {
376  return MIN(median_top_, other.median_top_) -
377  MAX(median_bottom_, other.median_bottom_);
378  }
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN(x, y)
Definition: ndminx.h:28
bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370  {
371  return bounding_box_.y_gap(other.bounding_box_) < 0;
372  }
int y_gap(const TBOX &box) const
Definition: rect.h:225
bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 387 of file colpartition.h.

387  {
388  int overlap = VCoreOverlap(other);
389  int height = MIN(median_top_ - median_bottom_,
390  other.median_top_ - other.median_bottom_);
391  return overlap * 3 > height;
392  }
#define MIN(x, y)
Definition: ndminx.h:28
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375
bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 395 of file colpartition.h.

395  {
396  return left_margin_ <= other.bounding_box_.left() &&
397  bounding_box_.left() >= other.left_margin_ &&
398  bounding_box_.right() <= other.right_margin_ &&
399  right_margin_ >= other.bounding_box_.right();
400  }
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320  {
321  return TabVector::XAtY(vertical_, sort_key, y);
322  }
int XAtY(int y) const
Definition: tabvector.h:189

The documentation for this class was generated from the following files: