tesseract v5.3.3.20231005
textlineprojection_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include <allheaders.h>
13#include <string> // for std::string
14
15#include "include_gunit.h"
16
17#include <tesseract/baseapi.h>
18#include <tesseract/osdetect.h>
19#include "colfind.h"
20#include "log.h" // for LOG
21#include "mutableiterator.h"
22#include "pageres.h"
23#include "tesseractclass.h"
24#include "textlineprojection.h"
25
26namespace tesseract {
27
28// Minimum score for a STRONG_CHAIN textline.
29// NOTE: Keep in sync with textlineprojection.cc.
30const int kMinStrongTextValue = 6;
31
32// The fixture for testing Tesseract.
34protected:
35 std::string OutputNameToPath(const std::string &name) {
37 return file::JoinPath(FLAGS_test_tmpdir, name);
38 }
39
41 src_pix_ = nullptr;
42 bin_pix_ = nullptr;
43 finder_ = nullptr;
44 denorm_ = nullptr;
45 projection_ = nullptr;
46 }
50 delete finder_;
51 }
52
53 void SetImage(const char *filename) {
55 src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
56 api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
59 }
60
61 // Ugly hacked-together function sets up projection_ and denorm_ by setting
62 // up for auto pagelayout, setting up a ColumnFinder, running it, and
63 // using accessors to get at the internal denorm and projection.
64 // If the coordinates have been rotated, the denorm should match
65 // correctly and transform coordinates back to the projection.
66 // We throw away all the blocks, blobs etc, and test the projection with
67 // the resultiterator from a separate BaseAPI run.
70 auto osd_tess = std::make_unique<Tesseract>();
71 OSResults osr;
72 EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
73 nullptr, 0, nullptr, nullptr, false, &mgr),
74 0);
75 tesseract_ = std::make_unique<Tesseract>();
76 EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
77 nullptr, 0, nullptr, nullptr, false, &mgr),
78 0);
80 *tesseract_->mutable_pix_binary() = bin_pix_.clone();
81 osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
82 tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
83 int width = pixGetWidth(bin_pix_);
84 int height = pixGetHeight(bin_pix_);
85 // First make a single block covering the whole image.
86 auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
87 block->set_right_to_left(false);
88 BLOCK_LIST src_blocks;
89 BLOCK_IT block_it(&src_blocks);
90 block_it.add_to_end(block);
91 Image photomask_pix = nullptr;
92 // The blocks made by the ColumnFinder. Moved to blocks before return.
93 BLOCK_LIST found_blocks;
94 TO_BLOCK_LIST temp_blocks;
95 finder_ =
96 tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
97 &osr, &temp_blocks, &photomask_pix, nullptr);
98 TO_BLOCK_IT to_block_it(&temp_blocks);
99 TO_BLOCK *to_block = to_block_it.data();
101 TO_BLOCK_LIST to_blocks;
102 BLOBNBOX_LIST diacritic_blobs;
103 EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
104 nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
105 0);
107 photomask_pix.destroy();
108 }
109
110 // Helper evaluates the given box, expects the result to be greater_than
111 // or !greater_than the target_value and provides diagnostics if not.
112 void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text,
113 const char *message) {
114 int value = projection_->EvaluateBox(box, denorm_, false);
115 if (greater_or_equal != (value > target_value)) {
116 LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
117 << ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
118 box.print();
119 value = projection_->EvaluateBox(box, denorm_, true);
120 } else {
121 LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
122 }
123 if (greater_or_equal) {
124 EXPECT_GE(value, target_value);
125 } else {
126 EXPECT_LT(value, target_value);
127 }
128 }
129
130 // Helper evaluates the DistanceOfBoxFromBox function by expecting that
131 // box should be nearer to true_box than false_box.
132 void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box,
133 const char *text, const char *message) {
134 int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
135 int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
136 if (false_dist <= true_dist) {
137 LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
138 << " for " << message << " word '" << text << "' at:";
139 true_box.print();
140 projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
141 projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
142 } else {
143 LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
144 << ") for " << message << " word '" << text << "'";
145 }
146 }
147
148 // Tests the projection on the word boxes of the given image.
149 // line_height is the cap + descender size of the text.
150 void VerifyBoxes(const char *imagefile, int line_height) {
151 SetImage(imagefile);
152 api_.Recognize(nullptr);
155 do {
156 char *text = it->GetUTF8Text(tesseract::RIL_WORD);
157 const PAGE_RES_IT *pr_it = it->PageResIt();
158 WERD_RES *word = pr_it->word();
159 // The word_box refers to the internal, possibly rotated, coords.
160 TBOX word_box = word->word->bounding_box();
161 bool small_word = word_box.height() * 1.5 < line_height;
162 bool tall_word = word_box.height() * 1.125 > line_height;
163 // We pad small and tall words differently because ascenders and
164 // descenders affect the position and size of the upper/lower boxes.
165 int padding;
166 if (small_word) {
167 padding = word_box.height();
168 } else if (tall_word) {
169 padding = word_box.height() / 3;
170 } else {
171 padding = word_box.height() / 2;
172 }
173 // Test that the word box gets a good score.
174 EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
175
176 // Now test a displaced box, both above and below the word.
177 TBOX upper_box(word_box);
178 upper_box.set_bottom(word_box.top());
179 upper_box.set_top(word_box.top() + padding);
180 EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
181 EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
182 TBOX lower_box = word_box;
183 lower_box.set_top(word_box.bottom());
184 lower_box.set_bottom(word_box.bottom() - padding);
185 if (tall_word) {
186 lower_box.move(ICOORD(0, padding / 2));
187 }
188 EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
189 EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
190
191 // Since some words have no text below and some words have no text above
192 // check that at least one of the boxes satisfies BoxOutOfTextline.
193 bool upper_or_lower_out_of_textline =
194 projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
195 projection_->BoxOutOfHTextline(lower_box, denorm_, false);
196 if (!upper_or_lower_out_of_textline) {
197 projection_->BoxOutOfHTextline(upper_box, denorm_, true);
198 projection_->BoxOutOfHTextline(lower_box, denorm_, true);
199 }
200 EXPECT_TRUE(upper_or_lower_out_of_textline);
201
202 // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
203 // that each pad box be nearer to its true textline than the
204 // challenger. Due to the tight spacing of latin text, getting
205 // the right position and size of these test boxes is quite fiddly.
206 padding = line_height / 4;
207 upper_box.set_top(upper_box.bottom() + padding);
208 TBOX target_box(word_box);
209 if (!small_word) {
210 upper_box.move(ICOORD(0, -padding * 3 / 2));
211 }
212 target_box.set_top(upper_box.bottom());
213 TBOX upper_challenger(upper_box);
214 upper_challenger.set_bottom(upper_box.top());
215 upper_challenger.set_top(upper_box.top() + word_box.height());
216 EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
217 if (tall_word) {
218 lower_box.move(ICOORD(0, padding / 2));
219 }
220 lower_box.set_bottom(lower_box.top() - padding);
221 target_box = word_box;
222 target_box.set_bottom(lower_box.top());
223 TBOX lower_challenger(lower_box);
224 lower_challenger.set_top(lower_box.bottom());
225 lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
226 EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");
227
228 delete[] text;
229 } while (it->Next(tesseract::RIL_WORD));
230 delete it;
231 }
232
235 BLOCK_LIST blocks_;
236 std::string ocr_text_;
238 std::unique_ptr<Tesseract> tesseract_;
242};
243
244// Tests all word boxes on an unrotated image.
246 VerifyBoxes("phototest.tif", 31);
247}
248
249// Tests character-level applyboxes on italic Times New Roman.
251 VerifyBoxes("phototestrot.tif", 31);
252}
253
254} // namespace tesseract
@ LOG
@ INFO
Definition: log.h:28
int value
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_GE(val1, val2)
Definition: gtest.h:2051
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:264
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:162
const int kMinStrongTextValue
TEST_F(EuroText, FastLatinOCR)
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1354
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576
Tesseract * tesseract() const
Definition: baseapi.h:711
Pix * GetThresholdedImage()
Definition: baseapi.cpp:631
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool Next(PageIteratorLevel level) override
const PAGE_RES_IT * PageResIt() const
int source_resolution() const
Image clone() const
Definition: image.cpp:24
void destroy()
Definition: image.cpp:32
WERD_RES * word() const
Definition: pageres.h:763
integer coordinate
Definition: points.h:36
TDimension height() const
Definition: rect.h:118
void move(const ICOORD vec)
Definition: rect.h:170
TDimension top() const
Definition: rect.h:68
void set_bottom(int y)
Definition: rect.h:78
void print() const
Definition: rect.h:289
TDimension bottom() const
Definition: rect.h:75
void set_top(int y)
Definition: rect.h:71
TBOX bounding_box() const
Definition: werd.cpp:155
const TextlineProjection * projection() const
Definition: colfind.h:70
const DENORM * denorm() const
Definition: colfind.h:67
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
Definition: colfind.cpp:286
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
static void MakeTmpdir()
Definition: include_gunit.h:38
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65
void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box, const char *text, const char *message)
void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text, const char *message)
std::string OutputNameToPath(const std::string &name)
void VerifyBoxes(const char *imagefile, int line_height)
std::unique_ptr< Tesseract > tesseract_