tesseract v5.3.3.20231005
apiexample_test.cc
Go to the documentation of this file.
1
2// File: apiexample_test.cc
3// Description: Api Test for Tesseract using text fixtures and parameters.
4// Tests for Devanagari, Latin and Arabic scripts are disabled by default.
5// Disabled tests can be run when required by using the
6// --gtest_also_run_disabled_tests argument.
7// ./unittest/apiexample_test --gtest_also_run_disabled_tests
8//
9// Author: ShreeDevi Kumar
10//
11// Licensed under the Apache License, Version 2.0 (the "License");
12// you may not use this file except in compliance with the License.
13// You may obtain a copy of the License at
14// http://www.apache.org/licenses/LICENSE-2.0
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
21
22// expects clone of tessdata_fast repo in ../../tessdata_fast
23
24//#include "log.h"
25#include <allheaders.h>
26#include <tesseract/baseapi.h>
27#include <time.h>
28#include <fstream>
29#include <iostream>
30#include <locale>
31#include <memory> // std::unique_ptr
32#include <string>
33#include "include_gunit.h"
34#include "image.h"
35
36namespace tesseract {
37
38class QuickTest : public testing::Test {
39protected:
40 void SetUp() override {
41 start_time_ = time(nullptr);
42 }
43 void TearDown() override {
44#ifndef NDEBUG
45 // Debug builds can be very slow, so allow 4 min for OCR of a test image.
46 // apitest_example including disabled tests takes about 18 min on ARMv7.
47 const time_t MAX_SECONDS_FOR_TEST = 240;
48#else
49 // Release builds typically need less than 10 s for OCR of a test image,
50 // apitest_example including disabled tests takes about 90 s on ARMv7.
51 const time_t MAX_SECONDS_FOR_TEST = 55;
52#endif
53 const time_t end_time = time(nullptr);
54 EXPECT_TRUE(end_time - start_time_ <= MAX_SECONDS_FOR_TEST)
55 << "The test took too long - " << ::testing::PrintToString(end_time - start_time_);
56 }
58};
59
60void OCRTester(const char *imgname, const char *groundtruth, const char *tessdatadir,
61 const char *lang) {
62 // log.info() << tessdatadir << " for language: " << lang << std::endl;
63 char *outText;
64 std::locale loc("C"); // You can also use "" for the default system locale
65 std::ifstream file(groundtruth);
66 file.imbue(loc); // Use it for file input
67 std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
68 auto api = std::make_unique<tesseract::TessBaseAPI>();
69 ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
70 Image image = pixRead(imgname);
71 ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
72 api->SetImage(image);
73 outText = api->GetUTF8Text();
74 EXPECT_EQ(gtText, outText) << "Phototest.tif OCR does not match ground truth for "
76 api->End();
77 api->ClearPersistentCache();
78 delete[] outText;
79 image.destroy();
80}
81
82class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface<const char *> {};
83
84TEST_P(MatchGroundTruth, FastPhototestOCR) {
85 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_fast",
86 GetParam());
87}
88
89TEST_P(MatchGroundTruth, BestPhototestOCR) {
90 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR "_best",
91 GetParam());
92}
93
94TEST_P(MatchGroundTruth, TessPhototestOCR) {
95 OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", TESSDATA_DIR, GetParam());
96}
97
100INSTANTIATE_TEST_SUITE_P(DISABLED_Deva, MatchGroundTruth, ::testing::Values("script/Devanagari"));
102
103class EuroText : public QuickTest {};
104
105TEST_F(EuroText, FastLatinOCR) {
106 OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt", TESSDATA_DIR "_fast",
107 "script/Latin");
108}
109
110// script/Latin for eurotext.tif does not match groundtruth
111// for tessdata & tessdata_best.
112// so do not test these here.
113
114} // namespace tesseract
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define ASSERT_FALSE(condition)
Definition: gtest.h:1994
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define ASSERT_TRUE(condition)
Definition: gtest.h:1990
void OCRTester(const char *imgname, const char *groundtruth, const char *tessdatadir, const char *lang)
INSTANTIATE_TEST_SUITE_P(Eng, MatchGroundTruth, ::testing::Values("eng"))
TEST_P(MatchGroundTruth, FastPhototestOCR)
TEST_F(EuroText, FastLatinOCR)
::std::string PrintToString(const T &value)
internal::ValueArray< T... > Values(T... v)
void destroy()
Definition: image.cpp:32
void TearDown() override
void SetUp() override