tesseract v5.3.3.20231005
tesseract::StructuredTable Class Reference

#include <tablerecog.h>

Inheritance diagram for tesseract::StructuredTable:
tesseract::TestableStructuredTable

Public Member Functions

 StructuredTable ()
 
 ~StructuredTable ()=default
 
void Init ()
 
void set_text_grid (ColPartitionGrid *text)
 
void set_line_grid (ColPartitionGrid *lines)
 
void set_max_text_height (int height)
 
bool is_lined () const
 
unsigned row_count () const
 
unsigned column_count () const
 
unsigned cell_count () const
 
void set_bounding_box (const TBOX &box)
 
const TBOXbounding_box () const
 
int median_cell_height ()
 
int median_cell_width ()
 
int row_height (unsigned row) const
 
int column_width (unsigned column) const
 
int space_above () const
 
int space_below () const
 
bool FindLinedStructure ()
 
bool FindWhitespacedStructure ()
 
bool DoesPartitionFit (const ColPartition &part) const
 
int CountFilledCells ()
 
int CountFilledCellsInRow (int row)
 
int CountFilledCellsInColumn (int column)
 
int CountFilledCells (unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end)
 
bool VerifyRowFilled (int row)
 
double CalculateCellFilledPercentage (unsigned row, unsigned column)
 
void Display (ScrollView *window, ScrollView::Color color)
 

Protected Member Functions

void ClearStructure ()
 
bool VerifyLinedTableCells ()
 
bool VerifyWhitespacedTable ()
 
void FindWhitespacedColumns ()
 
void FindWhitespacedRows ()
 
void CalculateMargins ()
 
void UpdateMargins (ColPartitionGrid *grid)
 
int FindVerticalMargin (ColPartitionGrid *grid, int start_x, bool decrease) const
 
int FindHorizontalMargin (ColPartitionGrid *grid, int start_y, bool decrease) const
 
void CalculateStats ()
 
void AbsorbNearbyLines ()
 
int CountVerticalIntersections (int x)
 
int CountHorizontalIntersections (int y)
 
int CountPartitions (const TBOX &box)
 

Static Protected Member Functions

static void FindCellSplitLocations (const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
 

Protected Attributes

ColPartitionGridtext_grid_
 
ColPartitionGridline_grid_
 
TBOX bounding_box_
 
std::vector< int > cell_x_
 
std::vector< int > cell_y_
 
bool is_lined_
 
int space_above_
 
int space_below_
 
int space_left_
 
int space_right_
 
int median_cell_height_
 
int median_cell_width_
 
int max_text_height_
 

Detailed Description

Definition at line 70 of file tablerecog.h.

Constructor & Destructor Documentation

◆ StructuredTable()

tesseract::StructuredTable::StructuredTable ( )

Definition at line 88 of file tablerecog.cpp.

◆ ~StructuredTable()

tesseract::StructuredTable::~StructuredTable ( )
default

Member Function Documentation

◆ AbsorbNearbyLines()

void tesseract::StructuredTable::AbsorbNearbyLines ( )
protected

Definition at line 573 of file tablerecog.cpp.

573 {
575 gsearch.SetUniqueMode(true);
576
577 // Is the closest line above good? Loop multiple times for tables with
578 // multi-line (sometimes 2) borders. Limit the number of lines by
579 // making sure they stay within a table cell or so.
580 ColPartition *line = nullptr;
581 gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.top());
582 while ((line = gsearch.NextVerticalSearch(false)) != nullptr) {
583 if (!line->IsHorizontalLine()) {
584 break;
585 }
587 line->MidY());
588 if (text_search.height() > median_cell_height_ * 2) {
589 break;
590 }
591 if (CountPartitions(text_search) > 0) {
592 break;
593 }
594 bounding_box_.set_top(line->MidY());
595 }
596 // As above, is the closest line below good?
597 line = nullptr;
598 gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), bounding_box_.bottom());
599 while ((line = gsearch.NextVerticalSearch(true)) != nullptr) {
600 if (!line->IsHorizontalLine()) {
601 break;
602 }
603 TBOX text_search(bounding_box_.left(), line->MidY(), bounding_box_.right(),
604 bounding_box_.bottom() - 1);
605 if (text_search.height() > median_cell_height_ * 2) {
606 break;
607 }
608 if (CountPartitions(text_search) > 0) {
609 break;
610 }
611 bounding_box_.set_bottom(line->MidY());
612 }
613 // TODO(nbeato): vertical lines
614}
@ TBOX
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
void set_bottom(int y)
Definition: rect.h:78
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void set_top(int y)
Definition: rect.h:71
int CountPartitions(const TBOX &box)
Definition: tablerecog.cpp:727

◆ bounding_box()

const TBOX & tesseract::StructuredTable::bounding_box ( ) const

Definition at line 126 of file tablerecog.cpp.

126 {
127 return bounding_box_;
128}

◆ CalculateCellFilledPercentage()

double tesseract::StructuredTable::CalculateCellFilledPercentage ( unsigned  row,
unsigned  column 
)

Definition at line 294 of file tablerecog.cpp.

294 {
295 ASSERT_HOST(row <= row_count());
296 ASSERT_HOST(column <= column_count());
297 const TBOX kCellBox(cell_x_[column], cell_y_[row], cell_x_[column + 1], cell_y_[row + 1]);
298 ASSERT_HOST(!kCellBox.null_box());
299
301 gsearch.SetUniqueMode(true);
302 gsearch.StartRectSearch(kCellBox);
303 double area_covered = 0;
304 ColPartition *text = nullptr;
305 while ((text = gsearch.NextRectSearch()) != nullptr) {
306 if (text->IsTextType()) {
307 area_covered += text->bounding_box().intersection(kCellBox).area();
308 }
309 }
310 const int32_t current_area = kCellBox.area();
311 if (current_area == 0) {
312 return 1.0;
313 }
314 return std::min(1.0, area_covered / current_area);
315}
#define ASSERT_HOST(x)
Definition: errcode.h:54
std::vector< int > cell_y_
Definition: tablerecog.h:238
unsigned column_count() const
Definition: tablerecog.cpp:117
std::vector< int > cell_x_
Definition: tablerecog.h:237
unsigned row_count() const
Definition: tablerecog.cpp:114

◆ CalculateMargins()

void tesseract::StructuredTable::CalculateMargins ( )
protected

Definition at line 496 of file tablerecog.cpp.

496 {
497 space_above_ = INT32_MAX;
498 space_below_ = INT32_MAX;
499 space_right_ = INT32_MAX;
500 space_left_ = INT32_MAX;
503}
void UpdateMargins(ColPartitionGrid *grid)
Definition: tablerecog.cpp:506

◆ CalculateStats()

void tesseract::StructuredTable::CalculateStats ( )
protected

Definition at line 551 of file tablerecog.cpp.

551 {
552 const int kMaxCellHeight = 1000;
553 const int kMaxCellWidth = 1000;
554 STATS height_stats(0, kMaxCellHeight);
555 STATS width_stats(0, kMaxCellWidth);
556
557 for (unsigned i = 0; i < row_count(); ++i) {
558 height_stats.add(row_height(i), column_count());
559 }
560 for (unsigned i = 0; i < column_count(); ++i) {
561 width_stats.add(column_width(i), row_count());
562 }
563
564 median_cell_height_ = static_cast<int>(height_stats.median() + 0.5);
565 median_cell_width_ = static_cast<int>(width_stats.median() + 0.5);
566}
int row_height(unsigned row) const
Definition: tablerecog.cpp:135
int column_width(unsigned column) const
Definition: tablerecog.cpp:139

◆ cell_count()

unsigned tesseract::StructuredTable::cell_count ( ) const

Definition at line 120 of file tablerecog.cpp.

120 {
121 return row_count() * column_count();
122}

◆ ClearStructure()

void tesseract::StructuredTable::ClearStructure ( )
protected

Definition at line 336 of file tablerecog.cpp.

336 {
337 cell_x_.clear();
338 cell_y_.clear();
339 is_lined_ = false;
340 space_above_ = 0;
341 space_below_ = 0;
342 space_left_ = 0;
343 space_right_ = 0;
346}

◆ column_count()

unsigned tesseract::StructuredTable::column_count ( ) const

Definition at line 117 of file tablerecog.cpp.

117 {
118 return cell_x_.empty() ? 0 : cell_x_.size() - 1;
119}

◆ column_width()

int tesseract::StructuredTable::column_width ( unsigned  column) const

Definition at line 139 of file tablerecog.cpp.

139 {
140 ASSERT_HOST(column < column_count());
141 return cell_x_[column + 1] - cell_x_[column];
142}

◆ CountFilledCells() [1/2]

int tesseract::StructuredTable::CountFilledCells ( )

Definition at line 250 of file tablerecog.cpp.

250 {
251 return CountFilledCells(0, row_count() - 1, 0, column_count() - 1);
252}

◆ CountFilledCells() [2/2]

int tesseract::StructuredTable::CountFilledCells ( unsigned  row_start,
unsigned  row_end,
unsigned  column_start,
unsigned  column_end 
)

Definition at line 259 of file tablerecog.cpp.

260 {
261 ASSERT_HOST(row_start <= row_end && row_end < row_count());
262 ASSERT_HOST(column_start <= column_end && column_end < column_count());
263 int cell_count = 0;
264 TBOX cell_box;
265 for (unsigned row = row_start; row <= row_end; ++row) {
266 cell_box.set_bottom(cell_y_[row]);
267 cell_box.set_top(cell_y_[row + 1]);
268 for (unsigned col = column_start; col <= column_end; ++col) {
269 cell_box.set_left(cell_x_[col]);
270 cell_box.set_right(cell_x_[col + 1]);
271 if (CountPartitions(cell_box) > 0) {
272 ++cell_count;
273 }
274 }
275 }
276 return cell_count;
277}
unsigned cell_count() const
Definition: tablerecog.cpp:120

◆ CountFilledCellsInColumn()

int tesseract::StructuredTable::CountFilledCellsInColumn ( int  column)

Definition at line 256 of file tablerecog.cpp.

256 {
257 return CountFilledCells(0, row_count() - 1, column, column);
258}

◆ CountFilledCellsInRow()

int tesseract::StructuredTable::CountFilledCellsInRow ( int  row)

Definition at line 253 of file tablerecog.cpp.

253 {
254 return CountFilledCells(row, row, 0, column_count() - 1);
255}

◆ CountHorizontalIntersections()

int tesseract::StructuredTable::CountHorizontalIntersections ( int  y)
protected

Definition at line 699 of file tablerecog.cpp.

699 {
700 int count = 0;
701 // Make a small box to keep the search time down.
702 const int kGridSize = text_grid_->gridsize();
703 TBOX horizontal_box = bounding_box_;
704 horizontal_box.set_bottom(y - kGridSize);
705 horizontal_box.set_top(y + kGridSize);
706
708 gsearch.SetUniqueMode(true);
709 gsearch.StartRectSearch(horizontal_box);
710 ColPartition *text = nullptr;
711 while ((text = gsearch.NextRectSearch()) != nullptr) {
712 if (!text->IsTextType()) {
713 continue;
714 }
715
716 const TBOX &box = text->bounding_box();
717 if (box.bottom() < y && y < box.top()) {
718 ++count;
719 }
720 }
721 return count;
722}
const double y
int * count
int gridsize() const
Definition: bbgrid.h:63

◆ CountPartitions()

int tesseract::StructuredTable::CountPartitions ( const TBOX box)
protected

Definition at line 727 of file tablerecog.cpp.

727 {
729 gsearch.SetUniqueMode(true);
730 gsearch.StartRectSearch(box);
731 int count = 0;
732 ColPartition *text = nullptr;
733 while ((text = gsearch.NextRectSearch()) != nullptr) {
734 if (text->IsTextType()) {
735 ++count;
736 }
737 }
738 return count;
739}

◆ CountVerticalIntersections()

int tesseract::StructuredTable::CountVerticalIntersections ( int  x)
protected

Definition at line 673 of file tablerecog.cpp.

673 {
674 int count = 0;
675 // Make a small box to keep the search time down.
676 const int kGridSize = text_grid_->gridsize();
677 TBOX vertical_box = bounding_box_;
678 vertical_box.set_left(x - kGridSize);
679 vertical_box.set_right(x + kGridSize);
680
682 gsearch.SetUniqueMode(true);
683 gsearch.StartRectSearch(vertical_box);
684 ColPartition *text = nullptr;
685 while ((text = gsearch.NextRectSearch()) != nullptr) {
686 if (!text->IsTextType()) {
687 continue;
688 }
689 const TBOX &box = text->bounding_box();
690 if (box.left() < x && x < box.right()) {
691 ++count;
692 }
693 }
694 return count;
695}
void set_left(int x)
Definition: rect.h:85

◆ Display()

void tesseract::StructuredTable::Display ( ScrollView window,
ScrollView::Color  color 
)

Definition at line 319 of file tablerecog.cpp.

319 {
320 window->Brush(ScrollView::NONE);
321 window->Pen(color);
322 window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
324 for (int i : cell_x_) {
325 window->Line(i, bounding_box_.bottom(), i, bounding_box_.top());
326 }
327 for (int i : cell_y_) {
328 window->Line(bounding_box_.left(), i, bounding_box_.right(), i);
329 }
330 window->UpdateWindow();
331}

◆ DoesPartitionFit()

bool tesseract::StructuredTable::DoesPartitionFit ( const ColPartition part) const

Definition at line 234 of file tablerecog.cpp.

234 {
235 const TBOX &box = part.bounding_box();
236 for (int i : cell_x_) {
237 if (box.left() < i && i < box.right()) {
238 return false;
239 }
240 }
241 for (int i : cell_y_) {
242 if (box.bottom() < i && i < box.top()) {
243 return false;
244 }
245 }
246 return true;
247}

◆ FindCellSplitLocations()

void tesseract::StructuredTable::FindCellSplitLocations ( const std::vector< int > &  min_list,
const std::vector< int > &  max_list,
int  max_merged,
std::vector< int > *  locations 
)
staticprotected

Definition at line 630 of file tablerecog.cpp.

632 {
633 locations->clear();
634 ASSERT_HOST(min_list.size() == max_list.size());
635 if (min_list.empty()) {
636 return;
637 }
638 ASSERT_HOST(min_list.at(0) < max_list.at(0));
639 ASSERT_HOST(min_list.at(min_list.size() - 1) < max_list.at(max_list.size() - 1));
640
641 locations->push_back(min_list.at(0));
642 unsigned min_index = 0;
643 unsigned max_index = 0;
644 int stacked_partitions = 0;
645 int last_cross_position = INT32_MAX;
646 // max_index will expire after min_index.
647 // However, we can't "increase" the hill size if min_index expired.
648 // So finish processing when min_index expires.
649 while (min_index < min_list.size()) {
650 // Increase the hill count.
651 if (min_list[min_index] < max_list[max_index]) {
652 ++stacked_partitions;
653 if (last_cross_position != INT32_MAX && stacked_partitions > max_merged) {
654 int mid = (last_cross_position + min_list[min_index]) / 2;
655 locations->push_back(mid);
656 last_cross_position = INT32_MAX;
657 }
658 ++min_index;
659 } else {
660 // Decrease the hill count.
661 --stacked_partitions;
662 if (last_cross_position == INT32_MAX && stacked_partitions <= max_merged) {
663 last_cross_position = max_list[max_index];
664 }
665 ++max_index;
666 }
667 }
668 locations->push_back(max_list.at(max_list.size() - 1));
669}

◆ FindHorizontalMargin()

int tesseract::StructuredTable::FindHorizontalMargin ( ColPartitionGrid grid,
int  start_y,
bool  decrease 
) const
protected

Definition at line 533 of file tablerecog.cpp.

533 {
534 ColPartitionGridSearch gsearch(grid);
535 gsearch.SetUniqueMode(true);
536 gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top());
537 ColPartition *part = nullptr;
538 while ((part = gsearch.NextSideSearch(decrease)) != nullptr) {
539 if (!part->IsTextType() && !part->IsVerticalLine()) {
540 continue;
541 }
542 int distance =
543 decrease ? border - part->bounding_box().right() : part->bounding_box().left() - border;
544 if (distance >= 0) {
545 return distance;
546 }
547 }
548 return INT32_MAX;
549}
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44

◆ FindLinedStructure()

bool tesseract::StructuredTable::FindLinedStructure ( )

Definition at line 157 of file tablerecog.cpp.

157 {
159
160 // Search for all of the lines in the current box.
161 // Update the cellular structure with the exact lines.
163 box_search.SetUniqueMode(true);
164 box_search.StartRectSearch(bounding_box_);
165 ColPartition *line = nullptr;
166
167 while ((line = box_search.NextRectSearch()) != nullptr) {
168 if (line->IsHorizontalLine()) {
169 cell_y_.push_back(line->MidY());
170 }
171 if (line->IsVerticalLine()) {
172 cell_x_.push_back(line->MidX());
173 }
174 }
175
176 // HasSignificantLines should guarantee cells.
177 // Because that code is a different class, just gracefully
178 // return false. This could be an assert.
179 if (cell_x_.size() < 3 || cell_y_.size() < 3) {
180 return false;
181 }
182
183 // Sort and remove duplicates that may have occurred due to split lines.
184 std::sort(cell_x_.begin(), cell_x_.end());
185 auto last_x = std::unique(cell_x_.begin(), cell_x_.end());
186 cell_x_.erase(last_x, cell_x_.end());
187 std::sort(cell_y_.begin(), cell_y_.end());
188 auto last_y = std::unique(cell_y_.begin(), cell_y_.end());
189 cell_y_.erase(last_y, cell_y_.end());
190
191 // The border should be the extents of line boxes, not middle.
193 cell_x_[cell_x_.size() - 1] = bounding_box_.right();
195 cell_y_[cell_y_.size() - 1] = bounding_box_.top();
196
197 // Remove duplicates that may have occurred due to moving the borders.
198 last_x = std::unique(cell_x_.begin(), cell_x_.end());
199 cell_x_.erase(last_x, cell_x_.end());
200 last_y = std::unique(cell_y_.begin(), cell_y_.end());
201 cell_y_.erase(last_y, cell_y_.end());
202
206 return is_lined_;
207}

◆ FindVerticalMargin()

int tesseract::StructuredTable::FindVerticalMargin ( ColPartitionGrid grid,
int  start_x,
bool  decrease 
) const
protected

Definition at line 516 of file tablerecog.cpp.

516 {
517 ColPartitionGridSearch gsearch(grid);
518 gsearch.SetUniqueMode(true);
519 gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), border);
520 ColPartition *part = nullptr;
521 while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) {
522 if (!part->IsTextType() && !part->IsHorizontalLine()) {
523 continue;
524 }
525 int distance =
526 decrease ? border - part->bounding_box().top() : part->bounding_box().bottom() - border;
527 if (distance >= 0) {
528 return distance;
529 }
530 }
531 return INT32_MAX;
532}

◆ FindWhitespacedColumns()

void tesseract::StructuredTable::FindWhitespacedColumns ( )
protected

Definition at line 384 of file tablerecog.cpp.

384 {
385 // Set of the extents of all partitions on the page.
386 std::vector<int> left_sides;
387 std::vector<int> right_sides;
388
389 // Look at each text partition. We want to find the partitions
390 // that have extremal left/right sides. These will give us a basis
391 // for the table columns.
393 gsearch.SetUniqueMode(true);
394 gsearch.StartRectSearch(bounding_box_);
395 ColPartition *text = nullptr;
396 while ((text = gsearch.NextRectSearch()) != nullptr) {
397 if (!text->IsTextType()) {
398 continue;
399 }
400
401 ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
402 int spacing = static_cast<int>(text->median_width() * kHorizontalSpacing / 2.0 + 0.5);
403 left_sides.push_back(text->bounding_box().left() - spacing);
404 right_sides.push_back(text->bounding_box().right() + spacing);
405 }
406 // It causes disaster below, so avoid it!
407 if (left_sides.empty() || right_sides.empty()) {
408 return;
409 }
410
411 // Since data may be inserted in grid order, we sort the left/right sides.
412 std::sort(left_sides.begin(), left_sides.end());
413 std::sort(right_sides.begin(), right_sides.end());
414
415 // At this point, in the "merged list", we expect to have a left side,
416 // followed by either more left sides or a right side. The last number
417 // should be a right side. We find places where the splits occur by looking
418 // for "valleys". If we want to force gap sizes or allow overlap, change
419 // the spacing above. If you want to let lines "slice" partitions as long
420 // as it is infrequent, change the following function.
421 FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, &cell_x_);
422}
const double kHorizontalSpacing
Definition: tablerecog.cpp:34
const int kCellSplitColumnThreshold
Definition: tablerecog.cpp:41
static void FindCellSplitLocations(const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
Definition: tablerecog.cpp:630

◆ FindWhitespacedRows()

void tesseract::StructuredTable::FindWhitespacedRows ( )
protected

Definition at line 429 of file tablerecog.cpp.

429 {
430 // Set of the extents of all partitions on the page.
431 std::vector<int> bottom_sides;
432 std::vector<int> top_sides;
433 // We will be "shrinking" partitions, so keep the min/max around to
434 // make sure the bottom/top lines do not intersect text.
435 int min_bottom = INT32_MAX;
436 int max_top = INT32_MIN;
437
438 // Look at each text partition. We want to find the partitions
439 // that have extremal bottom/top sides. These will give us a basis
440 // for the table rows. Because the textlines can be skewed and close due
441 // to warping, the height of the partitions is toned down a little bit.
443 gsearch.SetUniqueMode(true);
444 gsearch.StartRectSearch(bounding_box_);
445 ColPartition *text = nullptr;
446 while ((text = gsearch.NextRectSearch()) != nullptr) {
447 if (!text->IsTextType()) {
448 continue;
449 }
450
451 ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top());
452 min_bottom = std::min(min_bottom, static_cast<int>(text->bounding_box().bottom()));
453 max_top = std::max(max_top, static_cast<int>(text->bounding_box().top()));
454
455 // Ignore "tall" text partitions, as these are usually false positive
456 // vertical text or multiple lines pulled together.
457 if (text->bounding_box().height() > max_text_height_) {
458 continue;
459 }
460
461 int spacing = static_cast<int>(text->bounding_box().height() * kVerticalSpacing / 2.0 + 0.5);
462 int bottom = text->bounding_box().bottom() - spacing;
463 int top = text->bounding_box().top() + spacing;
464 // For horizontal text, the factor can be negative. This should
465 // probably cause a warning or failure. I haven't actually checked if
466 // it happens.
467 if (bottom >= top) {
468 continue;
469 }
470
471 bottom_sides.push_back(bottom);
472 top_sides.push_back(top);
473 }
474 // It causes disaster below, so avoid it!
475 if (bottom_sides.empty() || top_sides.empty()) {
476 return;
477 }
478
479 // Since data may be inserted in grid order, we sort the bottom/top sides.
480 std::sort(bottom_sides.begin(), bottom_sides.end());
481 std::sort(top_sides.begin(), top_sides.end());
482
483 // At this point, in the "merged list", we expect to have a bottom side,
484 // followed by either more bottom sides or a top side. The last number
485 // should be a top side. We find places where the splits occur by looking
486 // for "valleys". If we want to force gap sizes or allow overlap, change
487 // the spacing above. If you want to let lines "slice" partitions as long
488 // as it is infrequent, change the following function.
489 FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold, &cell_y_);
490
491 // Recover the min/max correctly since it was shifted.
492 cell_y_[0] = min_bottom;
493 cell_y_[cell_y_.size() - 1] = max_top;
494}
const double kVerticalSpacing
Definition: tablerecog.cpp:37
const int kCellSplitRowThreshold
Definition: tablerecog.cpp:40

◆ FindWhitespacedStructure()

bool tesseract::StructuredTable::FindWhitespacedStructure ( )

Definition at line 210 of file tablerecog.cpp.

210 {
214
215 if (!VerifyWhitespacedTable()) {
216 return false;
217 } else {
225 return true;
226 }
227}
void set_right(int x)
Definition: rect.h:92

◆ Init()

void tesseract::StructuredTable::Init ( )

Definition at line 100 of file tablerecog.cpp.

100{}

◆ is_lined()

bool tesseract::StructuredTable::is_lined ( ) const

Definition at line 111 of file tablerecog.cpp.

111 {
112 return is_lined_;
113}

◆ median_cell_height()

int tesseract::StructuredTable::median_cell_height ( )

Definition at line 129 of file tablerecog.cpp.

129 {
130 return median_cell_height_;
131}

◆ median_cell_width()

int tesseract::StructuredTable::median_cell_width ( )

Definition at line 132 of file tablerecog.cpp.

132 {
133 return median_cell_width_;
134}

◆ row_count()

unsigned tesseract::StructuredTable::row_count ( ) const

Definition at line 114 of file tablerecog.cpp.

114 {
115 return cell_y_.empty() ? 0 : cell_y_.size() - 1;
116}

◆ row_height()

int tesseract::StructuredTable::row_height ( unsigned  row) const

Definition at line 135 of file tablerecog.cpp.

135 {
136 ASSERT_HOST(row < row_count());
137 return cell_y_[row + 1] - cell_y_[row];
138}

◆ set_bounding_box()

void tesseract::StructuredTable::set_bounding_box ( const TBOX box)

Definition at line 123 of file tablerecog.cpp.

123 {
124 bounding_box_ = box;
125}

◆ set_line_grid()

void tesseract::StructuredTable::set_line_grid ( ColPartitionGrid lines)

Definition at line 105 of file tablerecog.cpp.

105 {
106 line_grid_ = line_grid;
107}

◆ set_max_text_height()

void tesseract::StructuredTable::set_max_text_height ( int  height)

Definition at line 108 of file tablerecog.cpp.

108 {
109 max_text_height_ = height;
110}

◆ set_text_grid()

void tesseract::StructuredTable::set_text_grid ( ColPartitionGrid text)

Definition at line 102 of file tablerecog.cpp.

102 {
103 text_grid_ = text_grid;
104}

◆ space_above()

int tesseract::StructuredTable::space_above ( ) const

Definition at line 143 of file tablerecog.cpp.

143 {
144 return space_above_;
145}

◆ space_below()

int tesseract::StructuredTable::space_below ( ) const

Definition at line 146 of file tablerecog.cpp.

146 {
147 return space_below_;
148}

◆ UpdateMargins()

void tesseract::StructuredTable::UpdateMargins ( ColPartitionGrid grid)
protected

Definition at line 506 of file tablerecog.cpp.

506 {
507 int below = FindVerticalMargin(grid, bounding_box_.bottom(), true);
508 space_below_ = std::min(space_below_, below);
509 int above = FindVerticalMargin(grid, bounding_box_.top(), false);
510 space_above_ = std::min(space_above_, above);
511 int left = FindHorizontalMargin(grid, bounding_box_.left(), true);
512 space_left_ = std::min(space_left_, left);
513 int right = FindHorizontalMargin(grid, bounding_box_.right(), false);
514 space_right_ = std::min(space_right_, right);
515}
int FindVerticalMargin(ColPartitionGrid *grid, int start_x, bool decrease) const
Definition: tablerecog.cpp:516
int FindHorizontalMargin(ColPartitionGrid *grid, int start_y, bool decrease) const
Definition: tablerecog.cpp:533

◆ VerifyLinedTableCells()

bool tesseract::StructuredTable::VerifyLinedTableCells ( )
protected

Definition at line 350 of file tablerecog.cpp.

350 {
351 // Function only called when lines exist.
352 ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
353 for (int i : cell_y_) {
355 return false;
356 }
357 }
358 for (int i : cell_x_) {
359 if (CountVerticalIntersections(i) > 0) {
360 return false;
361 }
362 }
363 return true;
364}
int CountHorizontalIntersections(int y)
Definition: tablerecog.cpp:699
int CountVerticalIntersections(int x)
Definition: tablerecog.cpp:673

◆ VerifyRowFilled()

bool tesseract::StructuredTable::VerifyRowFilled ( int  row)

Definition at line 282 of file tablerecog.cpp.

282 {
283 for (unsigned i = 0; i < column_count(); ++i) {
284 auto area_filled = CalculateCellFilledPercentage(row, i);
285 if (area_filled >= kMinFilledArea) {
286 return true;
287 }
288 }
289 return false;
290}
const double kMinFilledArea
Definition: tablerecog.cpp:60
double CalculateCellFilledPercentage(unsigned row, unsigned column)
Definition: tablerecog.cpp:294

◆ VerifyWhitespacedTable()

bool tesseract::StructuredTable::VerifyWhitespacedTable ( )
protected

Definition at line 374 of file tablerecog.cpp.

374 {
375 // criteria for a table, must be at least 2x3 or 3x2
376 return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6;
377}

Member Data Documentation

◆ bounding_box_

TBOX tesseract::StructuredTable::bounding_box_
protected

Definition at line 236 of file tablerecog.h.

◆ cell_x_

std::vector<int> tesseract::StructuredTable::cell_x_
protected

Definition at line 237 of file tablerecog.h.

◆ cell_y_

std::vector<int> tesseract::StructuredTable::cell_y_
protected

Definition at line 238 of file tablerecog.h.

◆ is_lined_

bool tesseract::StructuredTable::is_lined_
protected

Definition at line 239 of file tablerecog.h.

◆ line_grid_

ColPartitionGrid* tesseract::StructuredTable::line_grid_
protected

Definition at line 232 of file tablerecog.h.

◆ max_text_height_

int tesseract::StructuredTable::max_text_height_
protected

Definition at line 248 of file tablerecog.h.

◆ median_cell_height_

int tesseract::StructuredTable::median_cell_height_
protected

Definition at line 245 of file tablerecog.h.

◆ median_cell_width_

int tesseract::StructuredTable::median_cell_width_
protected

Definition at line 246 of file tablerecog.h.

◆ space_above_

int tesseract::StructuredTable::space_above_
protected

Definition at line 241 of file tablerecog.h.

◆ space_below_

int tesseract::StructuredTable::space_below_
protected

Definition at line 242 of file tablerecog.h.

◆ space_left_

int tesseract::StructuredTable::space_left_
protected

Definition at line 243 of file tablerecog.h.

◆ space_right_

int tesseract::StructuredTable::space_right_
protected

Definition at line 244 of file tablerecog.h.

◆ text_grid_

ColPartitionGrid* tesseract::StructuredTable::text_grid_
protected

Definition at line 231 of file tablerecog.h.


The documentation for this class was generated from the following files: