tesseract v5.3.3.20231005
tablefind_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include <memory>
13
14#include "colpartition.h"
15#include "colpartitiongrid.h"
16#include "tablefind.h"
17
18#include "include_gunit.h"
19
20namespace tesseract {
21
23public:
32
33 void ExpectPartition(const TBOX &box) {
35 gsearch.SetUniqueMode(true);
36 gsearch.StartFullSearch();
37 ColPartition *part = nullptr;
38 bool found = false;
39 while ((part = gsearch.NextFullSearch()) != nullptr) {
40 if (part->bounding_box().left() == box.left() &&
41 part->bounding_box().bottom() == box.bottom() &&
42 part->bounding_box().right() == box.right() && part->bounding_box().top() == box.top()) {
43 found = true;
44 }
45 }
46 EXPECT_TRUE(found);
47 }
48 void ExpectPartitionCount(int expected_count) {
50 gsearch.SetUniqueMode(true);
51 gsearch.StartFullSearch();
52 ColPartition *part = nullptr;
53 int count = 0;
54 while ((part = gsearch.NextFullSearch()) != nullptr) {
55 ++count;
56 }
57 EXPECT_EQ(expected_count, count);
58 }
59};
60
62protected:
63 void SetUp() override {
64 std::locale::global(std::locale(""));
65 free_boxes_it_.set_to_list(&free_boxes_);
66 finder_ = std::make_unique<TestableTableFinder>();
67 finder_->Init(1, ICOORD(0, 0), ICOORD(500, 500));
68 // gap finding
69 finder_->set_global_median_xheight(5);
70 finder_->set_global_median_blob_width(5);
71 }
72
73 void TearDown() override {
74 if (partition_.get() != nullptr) {
75 partition_->DeleteBoxes();
76 }
78 finder_.reset(nullptr);
79 }
80
81 void MakePartition(int x_min, int y_min, int x_max, int y_max) {
82 MakePartition(x_min, y_min, x_max, y_max, 0, 0);
83 }
84
85 void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column,
86 int last_column) {
87 if (partition_.get() != nullptr) {
88 partition_->DeleteBoxes();
89 }
90 TBOX box;
91 box.set_to_given_coords(x_min, y_min, x_max, y_max);
93 partition_->set_first_column(first_column);
94 partition_->set_last_column(last_column);
95 }
96
98 finder_->InsertTextPartition(part);
99 free_boxes_it_.add_after_then_move(part);
100 }
101
102 void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max) {
103 InsertLeaderPartition(x_min, y_min, x_max, y_max, 0, 0);
104 }
105
106 void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max, int first_column,
107 int last_column) {
108 TBOX box;
109 box.set_to_given_coords(x_min, y_min, x_max, y_max);
110 ColPartition *part =
112 part->set_first_column(first_column);
113 part->set_last_column(last_column);
114 finder_->InsertLeaderPartition(part);
115 free_boxes_it_.add_after_then_move(part);
116 }
117
119 for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list(); free_boxes_it_.forward()) {
120 ColPartition *part = free_boxes_it_.data();
121 part->DeleteBoxes();
122 }
123 }
124
125 std::unique_ptr<TestableTableFinder> finder_;
126 std::unique_ptr<ColPartition> partition_;
127
128private:
129 tesseract::ColPartition_CLIST free_boxes_;
130 tesseract::ColPartition_C_IT free_boxes_it_;
131};
132
133TEST_F(TableFinderTest, GapInXProjectionNoGap) {
134 int data[100];
135 for (int &i : data) {
136 i = 10;
137 }
138 EXPECT_FALSE(finder_->GapInXProjection(data, 100));
139}
140
141TEST_F(TableFinderTest, GapInXProjectionEdgeGap) {
142 int data[100];
143 for (int i = 0; i < 10; ++i) {
144 data[i] = 2;
145 }
146 for (int i = 10; i < 90; ++i) {
147 data[i] = 10;
148 }
149 for (int i = 90; i < 100; ++i) {
150 data[i] = 2;
151 }
152 EXPECT_FALSE(finder_->GapInXProjection(data, 100));
153}
154
155TEST_F(TableFinderTest, GapInXProjectionExists) {
156 int data[100];
157 for (int i = 0; i < 10; ++i) {
158 data[i] = 10;
159 }
160 for (int i = 10; i < 90; ++i) {
161 data[i] = 2;
162 }
163 for (int i = 90; i < 100; ++i) {
164 data[i] = 10;
165 }
166 EXPECT_TRUE(finder_->GapInXProjection(data, 100));
167}
168
169TEST_F(TableFinderTest, HasLeaderAdjacentOverlapping) {
170 InsertLeaderPartition(90, 0, 150, 5);
171 MakePartition(0, 0, 100, 10);
172 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
173 MakePartition(0, 25, 100, 40);
174 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
175 MakePartition(145, 0, 200, 20);
176 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
177 MakePartition(40, 0, 50, 4);
178 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
179}
180
181TEST_F(TableFinderTest, HasLeaderAdjacentNoOverlap) {
182 InsertLeaderPartition(90, 10, 150, 15);
183 MakePartition(0, 10, 85, 20);
184 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
185 MakePartition(0, 25, 100, 40);
186 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
187 MakePartition(0, 0, 100, 10);
188 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
189 // TODO(nbeato): is this a useful metric? case fails
190 // MakePartition(160, 0, 200, 15); // leader is primarily above it
191 // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
192}
193
194TEST_F(TableFinderTest, HasLeaderAdjacentPreservesColumns) {
195 InsertLeaderPartition(90, 0, 150, 5, 1, 2);
196 MakePartition(0, 0, 85, 10, 0, 0);
197 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
198 MakePartition(0, 0, 100, 10, 0, 1);
199 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
200 MakePartition(0, 0, 200, 10, 0, 5);
201 EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
202 MakePartition(155, 0, 200, 10, 5, 5);
203 EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
204}
205
206// TODO(nbeato): Only testing a splitting case. Add more...
207// Also test non-split cases.
208TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) {
209 finder_->set_global_median_blob_width(3);
210 finder_->set_global_median_xheight(10);
211
212 TBOX part_box(10, 5, 100, 15);
213 auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
214 all->set_type(PT_FLOWING_TEXT);
215 all->set_blob_type(BRT_TEXT);
216 all->set_flow(BTFT_CHAIN);
217 all->set_left_margin(10);
218 all->set_right_margin(100);
219 TBOX blob_box = part_box;
220 for (int i = 10; i <= 20; i += 5) {
221 blob_box.set_left(i + 1);
222 blob_box.set_right(i + 4);
223 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
224 }
225 for (int i = 35; i <= 55; i += 5) {
226 blob_box.set_left(i + 1);
227 blob_box.set_right(i + 4);
228 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
229 }
230 for (int i = 80; i <= 95; i += 5) {
231 blob_box.set_left(i + 1);
232 blob_box.set_right(i + 4);
233 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
234 }
235 // TODO(nbeato): Ray's newer code...
236 // all->ClaimBoxes();
237 all->ComputeLimits(); // This is to make sure median iinfo is set.
238 InsertTextPartition(all); // This is to delete blobs
239 ColPartition *fragment_me = all->CopyButDontOwnBlobs();
240
241 finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
242 finder_->ExpectPartition(TBOX(11, 5, 24, 15));
243 finder_->ExpectPartition(TBOX(36, 5, 59, 15));
244 finder_->ExpectPartition(TBOX(81, 5, 99, 15));
245 finder_->ExpectPartitionCount(3);
246}
247
248TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) {
249 finder_->set_global_median_blob_width(3);
250 finder_->set_global_median_xheight(10);
251
252 TBOX part_box(10, 5, 100, 15);
253 auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
254 all->set_type(PT_FLOWING_TEXT);
255 all->set_blob_type(BRT_TEXT);
256 all->set_flow(BTFT_CHAIN);
257 all->set_left_margin(10);
258 all->set_right_margin(100);
259 TBOX blob_box = part_box;
260 for (int i = 10; i <= 95; i += 5) {
261 blob_box.set_left(i + 1);
262 blob_box.set_right(i + 4);
263 all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
264 }
265 // TODO(nbeato): Ray's newer code...
266 // all->ClaimBoxes();
267 all->ComputeLimits(); // This is to make sure median iinfo is set.
268 InsertTextPartition(all); // This is to delete blobs
269 ColPartition *fragment_me = all->CopyButDontOwnBlobs();
270
271 finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
272 finder_->ExpectPartition(TBOX(11, 5, 99, 15));
273 finder_->ExpectPartitionCount(1);
274}
275
276} // namespace tesseract
@ TBOX
int * count
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONE
Definition: blobbox.h:111
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_LEADER
Definition: blobbox.h:117
TEST_F(EuroText, FastLatinOCR)
@ PT_FLOWING_TEXT
Definition: publictypes.h:53
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
void set_right(int x)
Definition: rect.h:92
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:282
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
void SetUniqueMode(bool mode)
Definition: bbgrid.h:249
void StartFullSearch()
Definition: bbgrid.h:701
BBC * NextFullSearch()
Definition: bbgrid.h:711
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
ColPartition * CopyButDontOwnBlobs()
const TBOX & bounding_box() const
Definition: colpartition.h:108
void set_last_column(int column)
Definition: colpartition.h:732
void set_first_column(int column)
Definition: colpartition.h:729
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:437
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:969
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:766
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:411
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1838
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:763
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:769
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:401
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:395
void ExpectPartitionCount(int expected_count)
void ExpectPartition(const TBOX &box)
void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max)
void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column, int last_column)
void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max, int first_column, int last_column)
std::unique_ptr< ColPartition > partition_
void InsertTextPartition(ColPartition *part)
std::unique_ptr< TestableTableFinder > finder_
void MakePartition(int x_min, int y_min, int x_max, int y_max)