tesseract v5.3.3.20231005
tesseract::TestableStructuredTable Class Reference
Inheritance diagram for tesseract::TestableStructuredTable:
tesseract::StructuredTable

Public Member Functions

void InjectCellY (int y)
 
void InjectCellX (int x)
 
void ExpectCellX (int x_min, int second, int add, int almost_done, int x_max)
 
void ExpectSortedX ()
 
int CountHorizontalIntersections (int y)
 
int CountVerticalIntersections (int x)
 
bool FindLinedStructure ()
 
void FindWhitespacedColumns ()
 
bool FindWhitespacedStructure ()
 
bool VerifyLinedTableCells ()
 
- Public Member Functions inherited from tesseract::StructuredTable
 StructuredTable ()
 
 ~StructuredTable ()=default
 
void Init ()
 
void set_text_grid (ColPartitionGrid *text)
 
void set_line_grid (ColPartitionGrid *lines)
 
void set_max_text_height (int height)
 
bool is_lined () const
 
unsigned row_count () const
 
unsigned column_count () const
 
unsigned cell_count () const
 
void set_bounding_box (const TBOX &box)
 
const TBOXbounding_box () const
 
int median_cell_height ()
 
int median_cell_width ()
 
int row_height (unsigned row) const
 
int column_width (unsigned column) const
 
int space_above () const
 
int space_below () const
 
bool FindLinedStructure ()
 
bool FindWhitespacedStructure ()
 
bool DoesPartitionFit (const ColPartition &part) const
 
int CountFilledCells ()
 
int CountFilledCellsInRow (int row)
 
int CountFilledCellsInColumn (int column)
 
int CountFilledCells (unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end)
 
bool VerifyRowFilled (int row)
 
double CalculateCellFilledPercentage (unsigned row, unsigned column)
 
void Display (ScrollView *window, ScrollView::Color color)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::StructuredTable
void ClearStructure ()
 
bool VerifyLinedTableCells ()
 
bool VerifyWhitespacedTable ()
 
void FindWhitespacedColumns ()
 
void FindWhitespacedRows ()
 
void CalculateMargins ()
 
void UpdateMargins (ColPartitionGrid *grid)
 
int FindVerticalMargin (ColPartitionGrid *grid, int start_x, bool decrease) const
 
int FindHorizontalMargin (ColPartitionGrid *grid, int start_y, bool decrease) const
 
void CalculateStats ()
 
void AbsorbNearbyLines ()
 
int CountVerticalIntersections (int x)
 
int CountHorizontalIntersections (int y)
 
int CountPartitions (const TBOX &box)
 
- Static Protected Member Functions inherited from tesseract::StructuredTable
static void FindCellSplitLocations (const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
 
- Protected Attributes inherited from tesseract::StructuredTable
ColPartitionGridtext_grid_
 
ColPartitionGridline_grid_
 
TBOX bounding_box_
 
std::vector< int > cell_x_
 
std::vector< int > cell_y_
 
bool is_lined_
 
int space_above_
 
int space_below_
 
int space_left_
 
int space_right_
 
int median_cell_height_
 
int median_cell_width_
 
int max_text_height_
 

Detailed Description

Definition at line 31 of file tablerecog_test.cc.

Member Function Documentation

◆ CountHorizontalIntersections()

int tesseract::StructuredTable::CountHorizontalIntersections ( int  y)

Definition at line 221 of file tablerecog.cpp.

699 {
700 int count = 0;
701 // Make a small box to keep the search time down.
702 const int kGridSize = text_grid_->gridsize();
703 TBOX horizontal_box = bounding_box_;
704 horizontal_box.set_bottom(y - kGridSize);
705 horizontal_box.set_top(y + kGridSize);
706
708 gsearch.SetUniqueMode(true);
709 gsearch.StartRectSearch(horizontal_box);
710 ColPartition *text = nullptr;
711 while ((text = gsearch.NextRectSearch()) != nullptr) {
712 if (!text->IsTextType()) {
713 continue;
714 }
715
716 const TBOX &box = text->bounding_box();
717 if (box.bottom() < y && y < box.top()) {
718 ++count;
719 }
720 }
721 return count;
722}
@ TBOX
const double y
int * count
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
void set_bottom(int y)
Definition: rect.h:78
int gridsize() const
Definition: bbgrid.h:63
ColPartitionGrid * text_grid_
Definition: tablerecog.h:231

◆ CountVerticalIntersections()

int tesseract::StructuredTable::CountVerticalIntersections ( int  x)

Definition at line 220 of file tablerecog.cpp.

673 {
674 int count = 0;
675 // Make a small box to keep the search time down.
676 const int kGridSize = text_grid_->gridsize();
677 TBOX vertical_box = bounding_box_;
678 vertical_box.set_left(x - kGridSize);
679 vertical_box.set_right(x + kGridSize);
680
682 gsearch.SetUniqueMode(true);
683 gsearch.StartRectSearch(vertical_box);
684 ColPartition *text = nullptr;
685 while ((text = gsearch.NextRectSearch()) != nullptr) {
686 if (!text->IsTextType()) {
687 continue;
688 }
689 const TBOX &box = text->bounding_box();
690 if (box.left() < x && x < box.right()) {
691 ++count;
692 }
693 }
694 return count;
695}
void set_left(int x)
Definition: rect.h:85

◆ ExpectCellX()

void tesseract::TestableStructuredTable::ExpectCellX ( int  x_min,
int  second,
int  add,
int  almost_done,
int  x_max 
)
inline

Definition at line 49 of file tablerecog_test.cc.

49 {
50 ASSERT_EQ(0, (almost_done - second) % add);
51 EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.size());
52 EXPECT_EQ(x_min, cell_x_.at(0));
53 EXPECT_EQ(x_max, cell_x_.at(cell_x_.size() - 1));
54 for (unsigned i = 1; i < cell_x_.size() - 1; ++i) {
55 EXPECT_EQ(second + add * (i - 1), cell_x_.at(i));
56 }
57 }
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2073
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
std::vector< int > cell_x_
Definition: tablerecog.h:237

◆ ExpectSortedX()

void tesseract::TestableStructuredTable::ExpectSortedX ( )
inline

Definition at line 59 of file tablerecog_test.cc.

59 {
60 EXPECT_GT(cell_x_.size(), 0);
61 for (unsigned i = 1; i < cell_x_.size(); ++i) {
62 EXPECT_LT(cell_x_.at(i - 1), cell_x_.at(i));
63 }
64 }
#define EXPECT_GT(val1, val2)
Definition: gtest.h:2053
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049

◆ FindLinedStructure()

bool tesseract::StructuredTable::FindLinedStructure ( )

Definition at line 105 of file tablerecog.cpp.

157 {
159
160 // Search for all of the lines in the current box.
161 // Update the cellular structure with the exact lines.
163 box_search.SetUniqueMode(true);
164 box_search.StartRectSearch(bounding_box_);
165 ColPartition *line = nullptr;
166
167 while ((line = box_search.NextRectSearch()) != nullptr) {
168 if (line->IsHorizontalLine()) {
169 cell_y_.push_back(line->MidY());
170 }
171 if (line->IsVerticalLine()) {
172 cell_x_.push_back(line->MidX());
173 }
174 }
175
176 // HasSignificantLines should guarantee cells.
177 // Because that code is a different class, just gracefully
178 // return false. This could be an assert.
179 if (cell_x_.size() < 3 || cell_y_.size() < 3) {
180 return false;
181 }
182
183 // Sort and remove duplicates that may have occurred due to split lines.
184 std::sort(cell_x_.begin(), cell_x_.end());
185 auto last_x = std::unique(cell_x_.begin(), cell_x_.end());
186 cell_x_.erase(last_x, cell_x_.end());
187 std::sort(cell_y_.begin(), cell_y_.end());
188 auto last_y = std::unique(cell_y_.begin(), cell_y_.end());
189 cell_y_.erase(last_y, cell_y_.end());
190
191 // The border should be the extents of line boxes, not middle.
193 cell_x_[cell_x_.size() - 1] = bounding_box_.right();
195 cell_y_[cell_y_.size() - 1] = bounding_box_.top();
196
197 // Remove duplicates that may have occurred due to moving the borders.
198 last_x = std::unique(cell_x_.begin(), cell_x_.end());
199 cell_x_.erase(last_x, cell_x_.end());
200 last_y = std::unique(cell_y_.begin(), cell_y_.end());
201 cell_y_.erase(last_y, cell_y_.end());
202
206 return is_lined_;
207}
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
std::vector< int > cell_y_
Definition: tablerecog.h:238
ColPartitionGrid * line_grid_
Definition: tablerecog.h:232

◆ FindWhitespacedColumns()

void tesseract::StructuredTable::FindWhitespacedColumns ( )

Definition at line 160 of file tablerecog.cpp.

384 {
385 // Set of the extents of all partitions on the page.
386 std::vector<int> left_sides;
387 std::vector<int> right_sides;
388
389 // Look at each text partition. We want to find the partitions
390 // that have extremal left/right sides. These will give us a basis
391 // for the table columns.
393 gsearch.SetUniqueMode(true);
394 gsearch.StartRectSearch(bounding_box_);
395 ColPartition *text = nullptr;
396 while ((text = gsearch.NextRectSearch()) != nullptr) {
397 if (!text->IsTextType()) {
398 continue;
399 }
400
401 ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
402 int spacing = static_cast<int>(text->median_width() * kHorizontalSpacing / 2.0 + 0.5);
403 left_sides.push_back(text->bounding_box().left() - spacing);
404 right_sides.push_back(text->bounding_box().right() + spacing);
405 }
406 // It causes disaster below, so avoid it!
407 if (left_sides.empty() || right_sides.empty()) {
408 return;
409 }
410
411 // Since data may be inserted in grid order, we sort the left/right sides.
412 std::sort(left_sides.begin(), left_sides.end());
413 std::sort(right_sides.begin(), right_sides.end());
414
415 // At this point, in the "merged list", we expect to have a left side,
416 // followed by either more left sides or a right side. The last number
417 // should be a right side. We find places where the splits occur by looking
418 // for "valleys". If we want to force gap sizes or allow overlap, change
419 // the spacing above. If you want to let lines "slice" partitions as long
420 // as it is infrequent, change the following function.
421 FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, &cell_x_);
422}
#define ASSERT_HOST(x)
Definition: errcode.h:54
const double kHorizontalSpacing
Definition: tablerecog.cpp:34
const int kCellSplitColumnThreshold
Definition: tablerecog.cpp:41
static void FindCellSplitLocations(const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
Definition: tablerecog.cpp:630

◆ FindWhitespacedStructure()

bool tesseract::StructuredTable::FindWhitespacedStructure ( )

Definition at line 110 of file tablerecog.cpp.

210 {
214
215 if (!VerifyWhitespacedTable()) {
216 return false;
217 } else {
225 return true;
226 }
227}
void set_right(int x)
Definition: rect.h:92
void set_top(int y)
Definition: rect.h:71

◆ InjectCellX()

void tesseract::TestableStructuredTable::InjectCellX ( int  x)
inline

Definition at line 44 of file tablerecog_test.cc.

44 {
45 cell_x_.push_back(x);
46 std::sort(cell_x_.begin(), cell_x_.end());
47 }

◆ InjectCellY()

void tesseract::TestableStructuredTable::InjectCellY ( int  y)
inline

Definition at line 40 of file tablerecog_test.cc.

40 {
41 cell_y_.push_back(y);
42 std::sort(cell_y_.begin(), cell_y_.end());
43 }

◆ VerifyLinedTableCells()

bool tesseract::StructuredTable::VerifyLinedTableCells ( )

Definition at line 148 of file tablerecog.cpp.

350 {
351 // Function only called when lines exist.
352 ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
353 for (int i : cell_y_) {
355 return false;
356 }
357 }
358 for (int i : cell_x_) {
359 if (CountVerticalIntersections(i) > 0) {
360 return false;
361 }
362 }
363 return true;
364}

The documentation for this class was generated from the following file: