tesseract v5.3.3.20231005
tablerecog_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include <memory>
13
14#include "colpartition.h"
15#include "colpartitiongrid.h"
16#include "tablerecog.h"
17
18#include "include_gunit.h"
19
20namespace tesseract {
21
23public:
29};
30
32public:
39
40 void InjectCellY(int y) {
41 cell_y_.push_back(y);
42 std::sort(cell_y_.begin(), cell_y_.end());
43 }
44 void InjectCellX(int x) {
45 cell_x_.push_back(x);
46 std::sort(cell_x_.begin(), cell_x_.end());
47 }
48
49 void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max) {
50 ASSERT_EQ(0, (almost_done - second) % add);
51 EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.size());
52 EXPECT_EQ(x_min, cell_x_.at(0));
53 EXPECT_EQ(x_max, cell_x_.at(cell_x_.size() - 1));
54 for (unsigned i = 1; i < cell_x_.size() - 1; ++i) {
55 EXPECT_EQ(second + add * (i - 1), cell_x_.at(i));
56 }
57 }
58
60 EXPECT_GT(cell_x_.size(), 0);
61 for (unsigned i = 1; i < cell_x_.size(); ++i) {
62 EXPECT_LT(cell_x_.at(i - 1), cell_x_.at(i));
63 }
64 }
65};
66
67class SharedTest : public testing::Test {
68protected:
69 void SetUp() override {
70 std::locale::global(std::locale(""));
71 ICOORD bleft(0, 0);
72 ICOORD tright(1000, 1000);
73 text_grid_ = std::make_unique<ColPartitionGrid>(5, bleft, tright);
74 line_grid_ = std::make_unique<ColPartitionGrid>(5, bleft, tright);
75 }
76
77 void TearDown() override {
78 tesseract::ColPartition_IT memory(&allocated_parts_);
79 for (memory.mark_cycle_pt(); !memory.cycled_list(); memory.forward()) {
80 memory.data()->DeleteBoxes();
81 }
82 }
83
85 for (int row = 0; row < 800; row += 20) {
86 for (int col = 0; col < 500; col += 25) {
87 InsertPartition(col + 1, row + 1, col + 24, row + 19);
88 }
89 }
90 }
91
92 void InsertPartition(int left, int bottom, int right, int top) {
93 TBOX box(left, bottom, right, top);
95 part->set_median_width(3);
96 part->set_median_height(3);
97 text_grid_->InsertBBox(true, true, part);
98
99 tesseract::ColPartition_IT add_it(&allocated_parts_);
100 add_it.add_after_stay_put(part);
101 }
102
103 void InsertLines() {
104 line_box_.set_to_given_coords(100 - line_grid_->gridsize(), 10 - line_grid_->gridsize(),
105 450 + line_grid_->gridsize(), 50 + line_grid_->gridsize());
106 for (int i = 10; i <= 50; i += 10) {
107 InsertHorizontalLine(100, 450, i);
108 }
109 for (int i = 100; i <= 450; i += 50) {
110 InsertVerticalLine(i, 10, 50);
111 }
112
113 for (int i = 100; i <= 200; i += 20) {
114 InsertHorizontalLine(0, 100, i);
115 }
116 }
117
118 void InsertHorizontalLine(int left, int right, int y) {
119 TBOX box(left, y - line_grid_->gridsize(), right, y + line_grid_->gridsize());
121 line_grid_->InsertBBox(true, true, part);
122
123 tesseract::ColPartition_IT add_it(&allocated_parts_);
124 add_it.add_after_stay_put(part);
125 }
126 void InsertVerticalLine(int x, int bottom, int top) {
127 TBOX box(x - line_grid_->gridsize(), bottom, x + line_grid_->gridsize(), top);
129 line_grid_->InsertBBox(true, true, part);
130
131 tesseract::ColPartition_IT add_it(&allocated_parts_);
132 add_it.add_after_stay_put(part);
133 }
134
136 for (int y = 10; y <= 50; y += 10) {
137 for (int x = 100; x <= 450; x += 50) {
138 InsertPartition(x + 1, y + 1, x + 49, y + 9);
139 }
140 }
141 }
142
144 std::unique_ptr<ColPartitionGrid> text_grid_;
145 std::unique_ptr<ColPartitionGrid> line_grid_;
146 ColPartition_LIST allocated_parts_;
147};
148
150protected:
151 void SetUp() override {
153 recognizer_ = std::make_unique<TestableTableRecognizer>();
154 recognizer_->Init();
155 recognizer_->set_text_grid(text_grid_.get());
156 recognizer_->set_line_grid(line_grid_.get());
157 }
158
159 std::unique_ptr<TestableTableRecognizer> recognizer_;
160};
161
163protected:
164 void SetUp() override {
166 table_ = std::make_unique<TestableStructuredTable>();
167 table_->Init();
168 table_->set_text_grid(text_grid_.get());
169 table_->set_line_grid(line_grid_.get());
170 }
171
172 std::unique_ptr<TestableStructuredTable> table_;
173};
174
175TEST_F(TableRecognizerTest, HasSignificantLinesBasicPass) {
176 InsertLines();
177 TBOX smaller_guess(120, 15, 370, 45);
178 TBOX larger_guess(90, 5, 490, 70);
179 EXPECT_TRUE(recognizer_->HasSignificantLines(line_box_));
180 EXPECT_TRUE(recognizer_->HasSignificantLines(larger_guess));
181 EXPECT_TRUE(recognizer_->HasSignificantLines(smaller_guess));
182}
183
184TEST_F(TableRecognizerTest, HasSignificantLinesBasicFail) {
185 InsertLines();
186 TBOX box(370, 35, 500, 45);
187 EXPECT_FALSE(recognizer_->HasSignificantLines(box));
188}
189
190TEST_F(TableRecognizerTest, HasSignificantLinesHorizontalOnlyFails) {
191 InsertLines();
192 TBOX box(0, 100, 200, 200);
193 EXPECT_FALSE(recognizer_->HasSignificantLines(box));
194}
195
196TEST_F(TableRecognizerTest, FindLinesBoundingBoxBasic) {
197 InsertLines();
198 TBOX box(0, 0, 200, 50);
199 bool result = recognizer_->FindLinesBoundingBox(&box);
200 EXPECT_TRUE(result);
201 EXPECT_EQ(line_box_.left(), box.left());
202 EXPECT_EQ(line_box_.right(), box.right());
203 EXPECT_EQ(line_box_.bottom(), box.bottom());
204 EXPECT_EQ(line_box_.top(), box.top());
205}
206
207TEST_F(TableRecognizerTest, RecognizeLinedTableBasic) {
208 InsertLines();
209 TBOX guess(120, 15, 370, 45);
211 table.set_text_grid(text_grid_.get());
212 table.set_line_grid(line_grid_.get());
213
214 EXPECT_TRUE(recognizer_->RecognizeLinedTable(guess, &table));
215 EXPECT_EQ(line_box_.bottom(), table.bounding_box().bottom());
216 EXPECT_EQ(line_box_.top(), table.bounding_box().top());
217 EXPECT_EQ(line_box_.left(), table.bounding_box().left());
218 EXPECT_EQ(line_box_.right(), table.bounding_box().right());
219 EXPECT_EQ(line_box_.area(), table.bounding_box().area());
220 EXPECT_EQ(7, table.column_count());
221 EXPECT_EQ(4, table.row_count());
222 EXPECT_EQ(28, table.cell_count());
223 EXPECT_TRUE(table.is_lined());
224}
225
226TEST_F(TableRecognizerTest, RecognizeWhitespacedTableBasic) {
227 InsertPartitions();
228 TBOX guess(0, 0, 500, 800);
229
231 table.set_text_grid(text_grid_.get());
232 table.set_line_grid(line_grid_.get());
233 EXPECT_TRUE(recognizer_->RecognizeWhitespacedTable(guess, &table));
234 EXPECT_EQ(1, table.bounding_box().bottom());
235 EXPECT_EQ(799, table.bounding_box().top());
236 EXPECT_EQ(1, table.bounding_box().left());
237 EXPECT_EQ(499, table.bounding_box().right());
238 EXPECT_EQ(798 * 498, table.bounding_box().area());
239 EXPECT_EQ(500 / 25, table.column_count());
240 EXPECT_EQ(800 / 20, table.row_count());
241 EXPECT_EQ(500 * 800 / 20 / 25, table.cell_count());
242 EXPECT_FALSE(table.is_lined());
243}
244
245TEST_F(StructuredTableTest, CountVerticalIntersectionsAll) {
246 table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
247 InsertPartition(0, 0, 100, 10);
248 InsertPartition(1, 12, 43, 21);
249 EXPECT_EQ(2, table_->CountVerticalIntersections(4));
250 EXPECT_EQ(2, table_->CountVerticalIntersections(20));
251 EXPECT_EQ(2, table_->CountVerticalIntersections(40));
252 EXPECT_EQ(1, table_->CountVerticalIntersections(50));
253 EXPECT_EQ(1, table_->CountVerticalIntersections(60));
254 EXPECT_EQ(1, table_->CountVerticalIntersections(80));
255 EXPECT_EQ(1, table_->CountVerticalIntersections(95));
256 EXPECT_EQ(0, table_->CountVerticalIntersections(104));
257 EXPECT_EQ(0, table_->CountVerticalIntersections(150));
258}
259
260TEST_F(StructuredTableTest, CountHorizontalIntersectionsAll) {
261 table_->set_bounding_box(TBOX(0, 0, 1000, 1000));
262 InsertPartition(0, 3, 100, 10);
263 InsertPartition(110, 5, 200, 16);
264
265 EXPECT_EQ(0, table_->CountHorizontalIntersections(0));
266 EXPECT_EQ(1, table_->CountHorizontalIntersections(4));
267 EXPECT_EQ(2, table_->CountHorizontalIntersections(8));
268 EXPECT_EQ(1, table_->CountHorizontalIntersections(12));
269 EXPECT_EQ(0, table_->CountHorizontalIntersections(20));
270}
271
272TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) {
273 for (int y = 10; y <= 50; y += 10) {
274 table_->InjectCellY(y);
275 }
276 for (int x = 100; x <= 450; x += 50) {
277 table_->InjectCellX(x);
278 }
279 InsertLines();
280 InsertCellsInLines();
281 table_->set_bounding_box(line_box_);
282 EXPECT_TRUE(table_->VerifyLinedTableCells());
283}
284
285TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) {
286 for (int y = 10; y <= 50; y += 10) {
287 table_->InjectCellY(y);
288 }
289 for (int x = 100; x <= 450; x += 50) {
290 table_->InjectCellX(x);
291 }
292 InsertLines();
293 InsertCellsInLines();
294 InsertPartition(101, 11, 299, 19);
295 table_->set_bounding_box(line_box_);
296 EXPECT_FALSE(table_->VerifyLinedTableCells());
297}
298
299TEST_F(StructuredTableTest, VerifyLinedTableVerticalFail) {
300 for (int y = 10; y <= 50; y += 10) {
301 table_->InjectCellY(y);
302 }
303 for (int x = 100; x <= 450; x += 50) {
304 table_->InjectCellX(x);
305 }
306 InsertLines();
307 InsertCellsInLines();
308 InsertPartition(151, 21, 199, 39);
309 table_->set_bounding_box(line_box_);
310 EXPECT_FALSE(table_->VerifyLinedTableCells());
311}
312
313TEST_F(StructuredTableTest, FindWhitespacedColumnsBasic) {
314 InsertPartitions();
315 TBOX guess(0, 0, 500, 800);
316 table_->set_bounding_box(guess);
317 table_->FindWhitespacedColumns();
318 table_->ExpectCellX(1, 25, 25, 475, 499);
319}
320
321TEST_F(StructuredTableTest, FindWhitespacedColumnsSorted) {
322 InsertPartitions();
323 TBOX guess(0, 0, 500, 800);
324 table_->set_bounding_box(guess);
325 table_->FindWhitespacedColumns();
326 table_->ExpectSortedX();
327}
328
329// TODO(nbeato): check failure cases
330// TODO(nbeato): check Recognize processes correctly on trivial real examples.
331
332} // namespace tesseract
@ TBOX
const double y
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2073
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_GT(val1, val2)
Definition: gtest.h:2053
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_HLINE
Definition: blobbox.h:76
@ BRT_VLINE
Definition: blobbox.h:77
@ BTFT_NONE
Definition: blobbox.h:111
TEST_F(EuroText, FastLatinOCR)
@ PT_HORZ_LINE
Definition: publictypes.h:64
@ PT_VERT_LINE
Definition: publictypes.h:65
@ PT_FLOWING_TEXT
Definition: publictypes.h:53
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:282
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
int32_t area() const
Definition: rect.h:134
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
void set_median_width(int width)
Definition: colpartition.h:144
void set_median_height(int height)
Definition: colpartition.h:138
std::vector< int > cell_y_
Definition: tablerecog.h:238
unsigned column_count() const
Definition: tablerecog.cpp:117
const TBOX & bounding_box() const
Definition: tablerecog.cpp:126
std::vector< int > cell_x_
Definition: tablerecog.h:237
unsigned cell_count() const
Definition: tablerecog.cpp:120
void set_line_grid(ColPartitionGrid *lines)
Definition: tablerecog.cpp:105
int CountHorizontalIntersections(int y)
Definition: tablerecog.cpp:699
int CountVerticalIntersections(int x)
Definition: tablerecog.cpp:673
void set_text_grid(ColPartitionGrid *text)
Definition: tablerecog.cpp:102
unsigned row_count() const
Definition: tablerecog.cpp:114
bool RecognizeLinedTable(const TBOX &guess_box, StructuredTable *table)
Definition: tablerecog.cpp:788
bool FindLinesBoundingBox(TBOX *bounding_box)
Definition: tablerecog.cpp:847
bool HasSignificantLines(const TBOX &guess)
Definition: tablerecog.cpp:806
bool RecognizeWhitespacedTable(const TBOX &guess_box, StructuredTable *table)
Definition: tablerecog.cpp:908
StructuredTable * RecognizeTable(const TBOX &guess_box)
Definition: tablerecog.cpp:763
void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max)
void SetUp() override
ColPartition_LIST allocated_parts_
std::unique_ptr< ColPartitionGrid > text_grid_
std::unique_ptr< ColPartitionGrid > line_grid_
void TearDown() override
void InsertVerticalLine(int x, int bottom, int top)
void InsertHorizontalLine(int left, int right, int y)
void InsertPartition(int left, int bottom, int right, int top)
std::unique_ptr< TestableTableRecognizer > recognizer_
std::unique_ptr< TestableStructuredTable > table_