tesseract v5.3.3.20231005
tesseract::TestableTableFinder Class Reference
Inheritance diagram for tesseract::TestableTableFinder:
tesseract::TableFinder

Public Member Functions

void ExpectPartition (const TBOX &box)
 
void ExpectPartitionCount (int expected_count)
 
bool GapInXProjection (int *xprojection, int length)
 
bool HasLeaderAdjacent (const ColPartition &part)
 
void InsertLeaderPartition (ColPartition *part)
 
void InsertTextPartition (ColPartition *part)
 
void set_global_median_blob_width (int width)
 
void set_global_median_ledding (int ledding)
 
void set_global_median_xheight (int xheight)
 
void SplitAndInsertFragmentedTextPartition (ColPartition *part)
 
- Public Member Functions inherited from tesseract::TableFinder
 TableFinder ()
 
 ~TableFinder ()
 
void set_resolution (int resolution)
 
void set_left_to_right_language (bool order)
 
void Init (int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
 
void InsertCleanPartitions (ColPartitionGrid *grid, TO_BLOCK *block)
 
void LocateTables (ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb, const FCOORD &reskew)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::TableFinder
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void InsertTextPartition (ColPartition *part)
 
void InsertFragmentedTextPartition (ColPartition *part)
 
void InsertLeaderPartition (ColPartition *part)
 
void InsertRulingPartition (ColPartition *part)
 
void InsertImagePartition (ColPartition *part)
 
void SplitAndInsertFragmentedTextPartition (ColPartition *part)
 
bool AllowTextPartition (const ColPartition &part) const
 
bool AllowBlob (const BLOBNBOX &blob) const
 
void MoveColSegmentsToGrid (ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
 
void InitializePartitions (ColPartitionSet **all_columns)
 
void SetVerticalSpacing (ColPartition *part)
 
void SetGlobalSpacings (ColPartitionGrid *grid)
 
void set_global_median_xheight (int xheight)
 
void set_global_median_blob_width (int width)
 
void set_global_median_ledding (int ledding)
 
void FindNeighbors ()
 
void MarkTablePartitions ()
 
void MarkPartitionsUsingLocalInformation ()
 
bool HasWideOrNoInterWordGap (ColPartition *part) const
 
bool HasLeaderAdjacent (const ColPartition &part)
 
void FilterFalseAlarms ()
 
void FilterParagraphEndings ()
 
void FilterHeaderAndFooter ()
 
void SmoothTablePartitionRuns ()
 
void GetColumnBlocks (ColPartitionSet **columns, ColSegment_LIST *col_segments)
 
void GroupColumnBlocks (ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
 
bool ConsecutiveBoxes (const TBOX &b1, const TBOX &b2)
 
void SetColumnsType (ColSegment_LIST *col_segments)
 
void GridMergeColumnBlocks ()
 
void GetTableColumns (ColSegment_LIST *table_columns)
 
void GetTableRegions (ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
 
void GridMergeTableRegions ()
 
bool BelongToOneTable (const TBOX &box1, const TBOX &box2)
 
void AdjustTableBoundaries ()
 
void GrowTableBox (const TBOX &table_box, TBOX *result_box)
 
void GrowTableToIncludePartials (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
void GrowTableToIncludeLines (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
bool HLineBelongsToTable (const ColPartition &part, const TBOX &table_box)
 
void IncludeLeftOutColumnHeaders (TBOX *table_box)
 
void DeleteSingleColumnTables ()
 
bool GapInXProjection (int *xprojection, int length)
 
void RecognizeTables ()
 
void DisplayColSegments (ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void DisplayColPartitionConnections (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void MakeTableBlocks (ColPartitionGrid *grid, ColPartitionSet **columns, const WidthCallback &width_cb)
 
- Static Protected Member Functions inherited from tesseract::TableFinder
static void SetPartitionSpacings (ColPartitionGrid *grid, ColPartitionSet **all_columns)
 
- Protected Attributes inherited from tesseract::TableFinder
int resolution_
 
int global_median_xheight_
 
int global_median_blob_width_
 
int global_median_ledding_
 
ColPartitionGrid clean_part_grid_
 
ColPartitionGrid leader_and_ruling_grid_
 
ColPartitionGrid fragmented_text_grid_
 
ColSegmentGrid col_seg_grid_
 
ColSegmentGrid table_grid_
 
bool left_to_right_language_
 

Detailed Description

Definition at line 22 of file tablefind_test.cc.

Member Function Documentation

◆ ExpectPartition()

void tesseract::TestableTableFinder::ExpectPartition ( const TBOX box)
inline

Definition at line 33 of file tablefind_test.cc.

33 {
35 gsearch.SetUniqueMode(true);
36 gsearch.StartFullSearch();
37 ColPartition *part = nullptr;
38 bool found = false;
39 while ((part = gsearch.NextFullSearch()) != nullptr) {
40 if (part->bounding_box().left() == box.left() &&
41 part->bounding_box().bottom() == box.bottom() &&
42 part->bounding_box().right() == box.right() && part->bounding_box().top() == box.top()) {
43 found = true;
44 }
45 }
46 EXPECT_TRUE(found);
47 }
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:401

◆ ExpectPartitionCount()

void tesseract::TestableTableFinder::ExpectPartitionCount ( int  expected_count)
inline

Definition at line 48 of file tablefind_test.cc.

48 {
50 gsearch.SetUniqueMode(true);
51 gsearch.StartFullSearch();
52 ColPartition *part = nullptr;
53 int count = 0;
54 while ((part = gsearch.NextFullSearch()) != nullptr) {
55 ++count;
56 }
57 EXPECT_EQ(expected_count, count);
58 }
int * count
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043

◆ GapInXProjection()

bool tesseract::TableFinder::GapInXProjection ( int *  xprojection,
int  length 
)

Definition at line 345 of file tablefind.cpp.

1838 {
1839 // Find peak value of the histogram
1840 int peak_value = 0;
1841 for (int i = 0; i < length; i++) {
1842 if (xprojection[i] > peak_value) {
1843 peak_value = xprojection[i];
1844 }
1845 }
1846 // Peak value represents the maximum number of horizontally
1847 // overlapping colpartitions, so this can be considered as the
1848 // number of rows in the table
1849 if (peak_value < kMinRowsInTable) {
1850 return false;
1851 }
1852 double projection_threshold = kSmallTableProjectionThreshold * peak_value;
1853 if (peak_value >= kLargeTableRowCount) {
1854 projection_threshold = kLargeTableProjectionThreshold * peak_value;
1855 }
1856 // Threshold the histogram
1857 for (int i = 0; i < length; i++) {
1858 xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
1859 }
1860 // Find the largest run of zeros between two ones
1861 int largest_gap = 0;
1862 int run_start = -1;
1863 for (int i = 1; i < length; i++) {
1864 // detect start of a run of zeros
1865 if (xprojection[i - 1] && !xprojection[i]) {
1866 run_start = i;
1867 }
1868 // detect end of a run of zeros and update the value of largest gap
1869 if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
1870 int gap = i - run_start;
1871 if (gap > largest_gap) {
1872 largest_gap = gap;
1873 }
1874 run_start = -1;
1875 }
1876 }
1877 return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
1878}
const double kLargeTableProjectionThreshold
Definition: tablefind.cpp:107
const int kMinRowsInTable
Definition: tablefind.cpp:112
const int kLargeTableRowCount
Definition: tablefind.cpp:109
const double kSmallTableProjectionThreshold
Definition: tablefind.cpp:106
const double kMaxXProjectionGapFactor
Definition: tablefind.cpp:136

◆ HasLeaderAdjacent()

bool tesseract::TableFinder::HasLeaderAdjacent ( const ColPartition part)

Definition at line 244 of file tablefind.cpp.

969 {
970 if (part.flow() == BTFT_LEADER) {
971 return true;
972 }
973 // Search range is left and right bounded by an offset of the
974 // median xheight. This offset is to allow some tolerance to the
975 // the leaders on the page in the event that the alignment is still
976 // a bit off.
977 const TBOX &box = part.bounding_box();
979 const int top = box.top() + search_size;
980 const int bottom = box.bottom() - search_size;
982 for (int direction = 0; direction < 2; ++direction) {
983 bool right_to_left = (direction == 0);
984 int x = right_to_left ? box.right() : box.left();
985 hsearch.StartSideSearch(x, bottom, top);
986 ColPartition *leader = nullptr;
987 while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
988 // The leader could be a horizontal ruling in the grid.
989 // Make sure it is actually a leader.
990 if (leader->flow() != BTFT_LEADER) {
991 continue;
992 }
993 // This should not happen, they are in different grids.
994 ASSERT_HOST(&part != leader);
995 // Make sure the leader shares a page column with the partition,
996 // otherwise we are spreading across columns.
997 if (!part.IsInSameColumnAs(*leader)) {
998 break;
999 }
1000 // There should be a significant vertical overlap
1001 if (!leader->VSignificantCoreOverlap(part)) {
1002 continue;
1003 }
1004 // Leader passed all tests, so it is adjacent.
1005 return true;
1006 }
1007 }
1008 // No leaders are adjacent to the given partition.
1009 return false;
1010}
#define ASSERT_HOST(x)
Definition: errcode.h:54
@ TBOX
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
const int kAdjacentLeaderSearchPadding
Definition: tablefind.cpp:117
@ BTFT_LEADER
Definition: blobbox.h:117
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:397

◆ InsertLeaderPartition()

void tesseract::TableFinder::InsertLeaderPartition ( ColPartition part)

Definition at line 169 of file tablefind.cpp.

411 {
412 ASSERT_HOST(part != nullptr);
413 if (!part->IsEmpty() && part->bounding_box().area() > 0) {
414 leader_and_ruling_grid_.InsertBBox(true, true, part);
415 } else {
416 delete part;
417 }
418}
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529

◆ InsertTextPartition()

void tesseract::TableFinder::InsertTextPartition ( ColPartition part)

Definition at line 167 of file tablefind.cpp.

395 {
396 ASSERT_HOST(part != nullptr);
397 if (AllowTextPartition(*part)) {
398 clean_part_grid_.InsertBBox(true, true, part);
399 } else {
400 delete part;
401 }
402}
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:395
bool AllowTextPartition(const ColPartition &part) const
Definition: tablefind.cpp:490

◆ set_global_median_blob_width()

void tesseract::TableFinder::set_global_median_blob_width ( int  width)

Definition at line 214 of file tablefind.cpp.

766 {
768}

◆ set_global_median_ledding()

void tesseract::TableFinder::set_global_median_ledding ( int  ledding)

Definition at line 219 of file tablefind.cpp.

769 {
770 global_median_ledding_ = ledding;
771}

◆ set_global_median_xheight()

void tesseract::TableFinder::set_global_median_xheight ( int  xheight)

Definition at line 211 of file tablefind.cpp.

763 {
764 global_median_xheight_ = xheight;
765}

◆ SplitAndInsertFragmentedTextPartition()

void tesseract::TableFinder::SplitAndInsertFragmentedTextPartition ( ColPartition part)

Definition at line 172 of file tablefind.cpp.

437 {
438 ASSERT_HOST(part != nullptr);
439 // Bye bye empty partitions!
440 if (part->boxes()->empty()) {
441 delete part;
442 return;
443 }
444
445 // The AllowBlob function prevents this.
446 ASSERT_HOST(part->median_width() > 0);
447 const double kThreshold = part->median_width() * kSplitPartitionSize;
448
449 ColPartition *right_part = part;
450 bool found_split = true;
451 while (found_split) {
452 found_split = false;
453 BLOBNBOX_C_IT box_it(right_part->boxes());
454 // Blobs are sorted left side first. If blobs overlap,
455 // the previous blob may have a "more right" right side.
456 // Account for this by always keeping the largest "right"
457 // so far.
458 int previous_right = INT32_MIN;
459
460 // Look for the next split in the partition.
461 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
462 const TBOX &box = box_it.data()->bounding_box();
463 if (previous_right != INT32_MIN &&
464 box.left() - previous_right > kThreshold) {
465 // We have a split position. Split the partition in two pieces.
466 // Insert the left piece in the grid and keep processing the right.
467 int mid_x = (box.left() + previous_right) / 2;
468 ColPartition *left_part = right_part;
469 right_part = left_part->SplitAt(mid_x);
470
472 found_split = true;
473 break;
474 }
475
476 // The right side of the previous blobs.
477 previous_right = std::max(previous_right, static_cast<int>(box.right()));
478 }
479 }
480 // When a split is not found, the right part is minimized
481 // as much as possible, so process it.
483}
const double kSplitPartitionSize
Definition: tablefind.cpp:44
void InsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:403

The documentation for this class was generated from the following file: