tesseract v5.3.3.20231005
baseapi_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include "include_gunit.h"
13
14#include "cycletimer.h" // for CycleTimer
15#include "log.h" // for LOG
16#include "ocrblock.h" // for class BLOCK
17#include "pageres.h"
18
19#include <tesseract/baseapi.h>
20
21#include <allheaders.h>
23
24#include <memory>
25#include <regex>
26#include <string>
27#include <vector>
28
29namespace tesseract {
30
31using ::testing::ContainsRegex;
32using ::testing::HasSubstr;
33
34static const char *langs[] = {"eng", "vie", "hin", "ara", nullptr};
35static const char *image_files[] = {"HelloGoogle.tif", "viet.tif", "raaj.tif", "arabic.tif",
36 nullptr};
37static const char *gt_text[] = {"Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67",
38 "\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c",
39 "\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", nullptr};
40
42 FRIEND_TEST(TesseractTest, LSTMGeometryTest);
43};
44
46 tess->SetImage(pix);
47 char *result = tess->GetUTF8Text();
48 std::string ocr_result = result;
49 delete[] result;
50 trim(ocr_result);
51 return ocr_result;
52}
53
54// The fixture for testing Tesseract.
56protected:
57 static std::string TestDataNameToPath(const std::string &name) {
58 return file::JoinPath(TESTING_DIR, name);
59 }
60 static std::string TessdataPath() {
61 return TESSDATA_DIR;
62 }
63};
64
65// Test static TessBaseAPI (like it is used by tesserocr).
66TEST_F(TesseractTest, StaticTessBaseAPI) {
67 static tesseract::TessBaseAPI api;
68 api.End();
69}
70
71// Tests that Tesseract gets exactly the right answer on phototest.
72TEST_F(TesseractTest, BasicTesseractTest) {
74 std::string truth_text;
75 std::string ocr_text;
76 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
77 Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
78 CHECK(src_pix);
79 ocr_text = GetCleanedTextResult(&api, src_pix);
81 file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
82 trim(truth_text);
83 EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
84 src_pix.destroy();
85 } else {
86 // eng.traineddata not found.
87 GTEST_SKIP();
88 }
89}
90
91// Test that api.GetComponentImages() will return a set of images for
92// paragraphs even if text recognition was not run.
93TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
95 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
97 api.SetVariable("paragraph_debug_level", "3");
98#if 0 // TODO: b622.png is missing
99 Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
100 CHECK(src_pix);
101 api.SetImage(src_pix);
102 Boxa* para_boxes =
103 api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
104 EXPECT_TRUE(para_boxes != nullptr);
105 Boxa* block_boxes =
106 api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
107 EXPECT_TRUE(block_boxes != nullptr);
108 // TODO(eger): Get paragraphs out of this page pre-text.
109 EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
110 boxaDestroy(&block_boxes);
111 boxaDestroy(&para_boxes);
112 src_pix.destroy();
113#endif
114 } else {
115 // eng.traineddata not found.
116 GTEST_SKIP();
117 }
118}
119
120// We should get hOCR output and not seg fault, even if the api caller doesn't
121// call SetInputName().
122TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
124 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
125 // eng.traineddata not found.
126 GTEST_SKIP();
127 return;
128 }
129 Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
130 CHECK(src_pix);
131 api.SetImage(src_pix);
132 char *result = api.GetHOCRText(0);
133 EXPECT_TRUE(result != nullptr);
134 EXPECT_THAT(result, HasSubstr("Hello"));
135 EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
136 delete[] result;
137 src_pix.destroy();
138}
139
140// hOCR output should contain baseline info for upright textlines.
141TEST_F(TesseractTest, HOCRContainsBaseline) {
143 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
144 // eng.traineddata not found.
145 GTEST_SKIP();
146 return;
147 }
148 Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
149 CHECK(src_pix);
150 api.SetInputName("HelloGoogle.tif");
151 api.SetImage(src_pix);
152 char *result = api.GetHOCRText(0);
153 EXPECT_TRUE(result != nullptr);
154 EXPECT_THAT(result, HasSubstr("Hello"));
155 EXPECT_TRUE(std::regex_search(
156 result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
157
158 delete[] result;
159 src_pix.destroy();
160}
161
162// Tests that Tesseract gets exactly the right answer on some page numbers.
163TEST_F(TesseractTest, AdaptToWordStrTest) {
164#ifdef DISABLED_LEGACY_ENGINE
165 // Skip test because TessBaseAPI::AdaptToWordStr is missing.
166 GTEST_SKIP();
167#else
168 static const char *kTrainingPages[] = {"136.tif", "256.tif", "410.tif", "432.tif", "540.tif",
169 "692.tif", "779.tif", "793.tif", "808.tif", "815.tif",
170 "12.tif", "12.tif", nullptr};
171 static const char *kTrainingText[] = {"1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0",
172 "6 9 2", "7 7 9", "7 9 3", "8 0 8", "8 1 5",
173 "1 2", "1 2", nullptr};
174 static const char *kTestPages[] = {"324.tif", "433.tif", "12.tif", nullptr};
175 static const char *kTestText[] = {"324", "433", "12", nullptr};
177 std::string truth_text;
178 std::string ocr_text;
179 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
180 // eng.traineddata not found.
181 GTEST_SKIP();
182 return;
183 }
184 api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
185 api.SetVariable("classify_class_pruner_threshold", "220");
186 // Train on the training text.
187 for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
188 std::string image_file = TestDataNameToPath(kTrainingPages[i]);
189 Image src_pix = pixRead(image_file.c_str());
190 CHECK(src_pix);
191 api.SetImage(src_pix);
193 << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
194 src_pix.destroy();
195 }
196 // Test the test text.
197 api.SetVariable("tess_bn_matching", "1");
199 for (int i = 0; kTestPages[i] != nullptr; ++i) {
200 Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
201 CHECK(src_pix);
202 ocr_text = GetCleanedTextResult(&api, src_pix);
203 trim(truth_text);
204 EXPECT_STREQ(kTestText[i], ocr_text.c_str());
205 src_pix.destroy();
206 }
207#endif
208}
209
210// Tests that LSTM gets exactly the right answer on phototest.
211TEST_F(TesseractTest, BasicLSTMTest) {
213 std::string truth_text;
214 std::string ocr_text;
215 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
216 // eng.traineddata not found.
217 GTEST_SKIP();
218 return;
219 }
220 Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
221 CHECK(src_pix);
222 ocr_text = GetCleanedTextResult(&api, src_pix);
223 CHECK_OK(
224 file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
225 trim(truth_text);
226 EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
227 src_pix.destroy();
228}
229
230// Test that LSTM's character bounding boxes are properly converted to
231// Tesseract structures. Note that we can't guarantee that LSTM's
232// character boxes fall completely within Tesseract's word box because
233// the baseline denormalization/normalization transforms may introduce
234// errors due to float/int conversions (e.g., see OUTLINE::move() in
235// ccstruct/poutline.h) Instead, we do a loose check.
236TEST_F(TesseractTest, LSTMGeometryTest) {
237 Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
239 if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
240 // eng.traineddata not found.
241 GTEST_SKIP();
242 return;
243 }
244 api.SetImage(src_pix);
245 ASSERT_EQ(api.Recognize(nullptr), 0);
246
247 const PAGE_RES *page_res = api.GetPageRes();
248 PAGE_RES_IT page_res_it(const_cast<PAGE_RES *>(page_res));
249 page_res_it.restart_page();
250 BLOCK *block = page_res_it.block()->block;
251 CHECK(block);
252
253 // extract word and character boxes for each word
254 for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
255 WERD_RES *word = page_res_it.word();
256 CHECK(word);
257 CHECK(word->best_choice);
258 CHECK_GT(word->best_choice->length(), 0);
259 CHECK(word->word);
260 CHECK(word->box_word);
261 // tesseract's word box
262 TBOX tess_blob_box;
263 tess_blob_box = word->word->bounding_box();
264 tess_blob_box.rotate(block->re_rotation());
265 // verify that each of LSTM's character boxes lies close to within
266 // tesseract's word box
267 for (int i = 0; i < word->box_word->length(); ++i) {
268 TBOX lstm_blob_box = word->box_word->BlobBox(i);
269 // LSTM character box should not spill out of tesseract word box
270 // by more than a few pixels in any direction
271 EXPECT_LT(tess_blob_box.left() - lstm_blob_box.left(), 5);
272 EXPECT_LT(lstm_blob_box.right() - tess_blob_box.right(), 5);
273 EXPECT_LT(tess_blob_box.bottom() - lstm_blob_box.bottom(), 5);
274 EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
275 }
276 }
277 src_pix.destroy();
278}
279
280TEST_F(TesseractTest, InitConfigOnlyTest) {
281 // Languages for testing initialization.
282 const char *langs[] = {"eng", "chi_tra", "jpn", "vie"};
283 std::unique_ptr<tesseract::TessBaseAPI> api;
284 CycleTimer timer;
285 for (auto &lang : langs) {
286 api = std::make_unique<tesseract::TessBaseAPI>();
287 timer.Restart();
288 EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY));
289 timer.Stop();
290 LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in regular init";
291 }
292 // Init variables to set for config-only initialization.
293 std::vector<std::string> vars_vec, vars_values;
294 vars_vec.emplace_back("tessedit_init_config_only");
295 vars_values.emplace_back("1");
296 LOG(INFO) << "Switching to config only initialization:";
297 for (auto &lang : langs) {
298 api = std::make_unique<tesseract::TessBaseAPI>();
299 timer.Restart();
300 EXPECT_EQ(0, api->Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY, nullptr, 0,
301 &vars_vec, &vars_values, false));
302 timer.Stop();
303 LOG(INFO) << "Lang " << lang << " took " << timer.GetInMs() << "ms in config-only init";
304 }
305}
306
307// Tests if two instances of Tesseract/LSTM can co-exist in the same thread.
308// NOTE: This is not an exhaustive test and current support for multiple
309// instances in Tesseract is fragile. This test is intended largely as a means
310// of detecting and guarding against the existing support being possibly broken
311// by future CLs. TessBaseAPI instances are initialized using the default
312// OEM_DEFAULT mode.
313TEST(TesseractInstanceTest, TestMultipleTessInstances) {
314 int num_langs = 0;
315 while (langs[num_langs] != nullptr) {
316 ++num_langs;
317 }
318
319 const std::string kTessdataPath = TESSDATA_DIR;
320
321 // Preload images and verify that OCR is correct on them individually.
322 std::vector<Image > pix(num_langs);
323 for (int i = 0; i < num_langs; ++i) {
324 std::string tracestring = "Single instance test with lang = ";
325 tracestring += langs[i];
326 SCOPED_TRACE(tracestring);
327 std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
328 pix[i] = pixRead(path.c_str());
329 QCHECK(pix[i] != nullptr) << "Could not read " << path;
330
332 EXPECT_EQ(0, tess.Init(kTessdataPath.c_str(), langs[i]));
333 std::string ocr_result = GetCleanedTextResult(&tess, pix[i]);
334 EXPECT_STREQ(gt_text[i], ocr_result.c_str());
335 }
336
337 // Process the images in all pairwise combinations of associated languages.
338 std::string ocr_result[2];
339 for (int i = 0; i < num_langs; ++i) {
340 for (int j = i + 1; j < num_langs; ++j) {
341 tesseract::TessBaseAPI tess1, tess2;
342 tess1.Init(kTessdataPath.c_str(), langs[i]);
343 tess2.Init(kTessdataPath.c_str(), langs[j]);
344
345 ocr_result[0] = GetCleanedTextResult(&tess1, pix[i]);
346 ocr_result[1] = GetCleanedTextResult(&tess2, pix[j]);
347
348 EXPECT_FALSE(strcmp(gt_text[i], ocr_result[0].c_str()) ||
349 strcmp(gt_text[j], ocr_result[1].c_str()))
350 << "OCR failed on language pair " << langs[i] << "-" << langs[j];
351 }
352 }
353
354 for (int i = 0; i < num_langs; ++i) {
355 pix[i].destroy();
356 }
357}
358
359// Tests whether Tesseract parameters are correctly set for the two instances.
360TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) {
361 std::string illegal_name = "an_illegal_name";
362 std::string langs[2] = {"eng", "hin"};
363 std::string int_param_name = "tessedit_pageseg_mode";
364 int int_param[2] = {1, 2};
365 std::string int_param_str[2] = {"1", "2"};
366 std::string bool_param_name = "tessedit_ambigs_training";
367 bool bool_param[2] = {false, true};
368 std::string bool_param_str[2] = {"F", "T"};
369 std::string str_param_name = "tessedit_char_blacklist";
370 std::string str_param[2] = {"abc", "def"};
371 std::string double_param_name = "segment_penalty_dict_frequent_word";
372 std::string double_param_str[2] = {"0.01", "2"};
373 double double_param[2] = {0.01, 2};
374
375 const std::string kTessdataPath = TESSDATA_DIR;
376
377 tesseract::TessBaseAPI tess1, tess2;
378 for (int i = 0; i < 2; ++i) {
379 tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
380 api->Init(kTessdataPath.c_str(), langs[i].c_str());
381 api->SetVariable(illegal_name.c_str(), "none");
382 api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str());
383 api->SetVariable(bool_param_name.c_str(), bool_param_str[i].c_str());
384 api->SetVariable(str_param_name.c_str(), str_param[i].c_str());
385 api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str());
386 }
387 for (int i = 0; i < 2; ++i) {
388 tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
389 EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str()));
390 int intvar;
391 EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar));
392 EXPECT_EQ(int_param[i], intvar);
393 bool boolvar;
394 EXPECT_TRUE(api->GetBoolVariable(bool_param_name.c_str(), &boolvar));
395 EXPECT_EQ(bool_param[i], boolvar);
396 EXPECT_STREQ(str_param[i].c_str(), api->GetStringVariable(str_param_name.c_str()));
397 double doublevar;
398 EXPECT_TRUE(api->GetDoubleVariable(double_param_name.c_str(), &doublevar));
399 EXPECT_EQ(double_param[i], doublevar);
400 }
401}
402
403} // namespace tesseract
@ LOG
@ INFO
Definition: log.h:28
#define EXPECT_THAT(value, matcher)
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2073
#define GTEST_SKIP()
Definition: gtest.h:1889
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define SCOPED_TRACE(message)
Definition: gtest.h:2281
#define EXPECT_GE(val1, val2)
Definition: gtest.h:2051
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_STREQ(s1, s2)
Definition: gtest.h:2112
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
#define CHECK(condition)
Definition: include_gunit.h:76
#define CHECK_GT(test, value)
Definition: include_gunit.h:81
#define CHECK_OK(test)
Definition: include_gunit.h:84
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:264
@ PSM_SINGLE_WORD
Treat the image as a single word.
Definition: publictypes.h:168
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:166
std::string TestDataNameToPath(const std::string &name)
TEST_F(EuroText, FastLatinOCR)
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix)
Definition: baseapi_test.cc:45
TEST(TesseractInstanceTest, TestMultipleTessInstances)
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:834
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:511
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:294
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:279
const PAGE_RES * GetPageRes() const
Definition: baseapi.h:760
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
void SetInputName(const char *name)
Definition: baseapi.cpp:270
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:368
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:576
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:702
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:314
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:1835
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:304
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:320
unsigned length() const
Definition: boxword.h:81
const TBOX & BlobBox(unsigned index) const
Definition: boxword.h:84
void destroy()
Definition: image.cpp:32
FCOORD re_rotation() const
Definition: ocrblock.h:129
WERD_CHOICE * best_choice
Definition: pageres.h:239
tesseract::BoxWord * box_word
Definition: pageres.h:270
BLOCK_RES * block() const
Definition: pageres.h:769
WERD_RES * forward()
Definition: pageres.h:743
WERD_RES * word() const
Definition: pageres.h:763
WERD_RES * restart_page()
Definition: pageres.h:710
unsigned length() const
Definition: ratngs.h:287
TDimension left() const
Definition: rect.h:82
void rotate(const FCOORD &vec)
Definition: rect.h:210
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
TBOX bounding_box() const
Definition: werd.cpp:155
static std::string TessdataPath()
Definition: baseapi_test.cc:60
static std::string TestDataNameToPath(const std::string &name)
Definition: baseapi_test.cc:57
void Stop()
Definition: cycletimer.h:48
void Restart()
Definition: cycletimer.h:43
int64_t GetInMs() const
Definition: cycletimer.h:54
static int Defaults()
Definition: include_gunit.h:61
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65
static bool GetContents(const std::string &filename, std::string *out, int)
Definition: include_gunit.h:52