54 Image pix = pixCreate(width, height, depth);
60 int left, top, right, bottom;
63 if (!it->
BoundingBox(level, &left, &top, &right, &bottom)) {
67 LOG(
INFO) <<
"BBox: [L:" << left <<
", T:" << top <<
", R:" << right <<
", B:" << bottom
73 pixRasterop(pix, left, top, right - left, bottom - top, PIX_SRC ^ PIX_DST, block_pix, 0, 0);
76 pixRasterop(pix, left, top, pixGetWidth(block_pix), pixGetHeight(block_pix),
77 PIX_SRC & PIX_DST, block_pix, 0, 0);
79 CHECK(block_pix !=
nullptr);
81 }
while (it->
Next(level));
84 pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST,
src_pix_, 0, 0);
86 Image binary_pix = pixThresholdToBinary(pix, 128);
88 pixInvert(binary_pix, binary_pix);
94 pixCountPixels(pix, &pixcount,
nullptr);
95 if (pixcount > max_diff) {
97 LOG(
INFO) <<
"outfile = " << outfile <<
"\n";
98 pixWrite(outfile.c_str(), pix, IFF_PNG);
101 LOG(
INFO) <<
"At level " << level <<
": pix diff = " << pixcount <<
"\n";
109 LOG(
INFO) <<
"Text Test Level " << level <<
"\n";
128 }
while (it->
Next(level));
129 EXPECT_STREQ(truth.c_str(), result.c_str()) <<
"Rebuild failed at Text Level " << level;
132 void VerifyRebuilds(
int block_limit,
int para_limit,
int line_limit,
int word_limit,
157 int num_words,
int *expected_reading_order,
158 int num_reading_order_entries)
const {
159 std::vector<StrongScriptDirection> gv_word_dirs;
160 for (
int i = 0;
i < num_words;
i++) {
161 gv_word_dirs.push_back(word_dirs[
i]);
164 std::vector<int> calculated_order;
167 std::vector<int> correct_order(expected_reading_order,
168 expected_reading_order + num_reading_order_entries);
169 EXPECT_EQ(correct_order, calculated_order);
177 int num_words)
const {
178 std::vector<StrongScriptDirection> gv_word_dirs;
179 for (
int i = 0;
i < num_words;
i++) {
180 gv_word_dirs.push_back(word_dirs[
i]);
186 std::vector<int> output_copy(
output);
187 std::sort(output_copy.begin(), output_copy.end());
190 while (j < output_copy.size() && output_copy[j] < 0) {
193 for (
int i = 0;
i < num_words;
i++, j++) {
194 if (output_copy[j] !=
i) {
199 if (j != output_copy.size()) {
203 std::vector<int> empty;
204 EXPECT_EQ(
output, empty) <<
" permutation of 0.." << num_words - 1 <<
" not found in "
205 << (in_ltr_context ?
"ltr" :
"rtl") <<
" context.";
259 SetImage(
"phototest.tif");
270 LOG(
INFO) <<
"Verifying image rebuilds 1 (pageiterator)"
272 VerifyRebuilds(10, 10, 0, 0, 0, p_it);
275 char *result = api_.GetUTF8Text();
280 LOG(
INFO) <<
"Verifying image rebuilds 2a (resultiterator)"
284 LOG(
INFO) <<
"Verifying text rebuilds 1 (resultiterator)"
286 VerifyAllText(ocr_text_, r_it);
289 LOG(
INFO) <<
"Verifying image rebuilds 2b (resultiterator)"
298 << x1 <<
',' << y1 <<
")->(" << x2 <<
',' << y2 <<
")\n";
308 int product = x2 * y3 - x3 * y2;
314#ifndef DISABLED_LEGACY_ENGINE
315 int pointsize, font_id;
316 bool bold, italic, underlined, monospace, serif, smallcaps;
317 const char *font = r_it->
WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
318 &smallcaps, &pointsize, &font_id);
323#ifdef DISABLED_LEGACY_ENGINE
324 LOG(
INFO) <<
"Word " << word_str <<
", conf " << confidence <<
"\n";
326 LOG(
INFO) <<
"Word " << word_str <<
" in font " << font
327 <<
", id " << font_id <<
", size " << pointsize
328 <<
", conf " << confidence <<
"\n";
331#ifndef DISABLED_LEGACY_ENGINE
349 SetImage(
"8087_054.3B.tif");
354 VerifyRebuilds(2073, 2073, 2080, 2081, 2090, it);
360 SetImage(
"8087_054.3G.tif");
365 VerifyRebuilds(600, 600, 600, 600, 600, it);
371#ifdef DISABLED_LEGACY_ENGINE
375 SetImage(
"8071_093.3B.tif");
376 char *result = api_.GetUTF8Text();
380 int found_dropcaps = 0;
381 int found_smallcaps = 0;
382 int false_positives = 0;
384 bool bold, italic, underlined, monospace, serif, smallcaps;
385 int pointsize, font_id;
387 &pointsize, &font_id);
389 if (word_str !=
nullptr) {
390 LOG(
INFO) <<
"Word " << word_str
391 <<
" is " << (smallcaps ?
"SMALLCAPS" :
"Normal") <<
"\n";
395 if (strcmp(word_str,
"SHE") == 0 || strcmp(word_str,
"MOPED") == 0 ||
396 strcmp(word_str,
"RALPH") == 0 || strcmp(word_str,
"KINNEY") == 0 ||
397 strcmp(word_str,
"BENNETT") == 0) {
410 LOG(
ERROR) <<
"Symbol " << sym_str <<
" of word " << word_str <<
" is dropcap";
433TEST_F(ResultIteratorTest, SubSuperTest) {
434 SetImage(
"0146_281.3B.tif");
435 char* result = api_.GetUTF8Text();
437 ResultIterator* r_it = api_.GetIterator();
441 const char kAllowedSupers[] =
"O0123456789-";
443 int found_supers = 0;
444 int found_normal = 0;
446 if (r_it->SymbolIsSubscript()) {
448 }
else if (r_it->SymbolIsSuperscript()) {
450 if (strchr(kAllowedSupers, result[0]) ==
nullptr) {
452 LOG(
ERROR) <<
"Char " << result <<
" in word " << word <<
" is unexpected super!";
454 EXPECT_TRUE(strchr(kAllowedSupers, result[0]) !=
nullptr);
463 LOG(
INFO) <<
"Subs = " << found_subs <<
", supers= " << found_supers
464 <<
", normal = " << found_normal <<
"\n";
482 int reading_order_ltr_context[] = {
485 ExpectTextlineReadingOrder(
true, word_dirs,
countof(word_dirs), reading_order_ltr_context,
486 countof(reading_order_ltr_context));
487 ExpectTextlineReadingOrder(
false, word_dirs,
countof(word_dirs), reading_order_rtl_context,
488 countof(reading_order_rtl_context));
496 int reading_order_ltr_context[] = {0, 1, 2, 3, 4, 5, 6, 7};
502 ExpectTextlineReadingOrder(
true, word_dirs,
countof(word_dirs), reading_order_ltr_context,
503 countof(reading_order_ltr_context));
504 ExpectTextlineReadingOrder(
false, word_dirs,
countof(word_dirs), reading_order_rtl_context,
505 countof(reading_order_rtl_context));
513 int reading_order_rtl_context[] = {7, 6, 5, 4, 3, 2, 1, 0};
514 ExpectTextlineReadingOrder(
false, word_dirs,
countof(word_dirs), reading_order_rtl_context,
515 countof(reading_order_rtl_context));
521 const int kNumWords(7);
522 const int kNumCombos = 1 << (2 * kNumWords);
524 for (
int i = 0;
i < kNumCombos;
i++) {
527 for (
auto &word_dir : word_dirs) {
531 VerifySaneTextlineOrder(
true, word_dirs, kNumWords);
532 VerifySaneTextlineOrder(
false, word_dirs, kNumWords);
538 SetImage(
"5318c4b679264.jpg");
539 char *result = api_.GetUTF8Text();
545 if (word_str !=
nullptr) {
546 LOG(
INFO) <<
"Word " << word_str <<
":\n";
552 if (char_str ==
nullptr) {
553 LOG(
INFO) <<
"Null char choice"
556 LOG(
INFO) <<
"Char choice " << char_str <<
"\n";
558 CHECK(char_str !=
nullptr);
559 }
while (c_it.
Next());
571 SetImage(
"trainingitalline.tif");
575 char *result = api_.GetUTF8Text();
581 if (word_str !=
nullptr) {
587 CHECK(char_str !=
nullptr);
589 LOG(
INFO) <<
"Char " << char_str <<
" has confidence " << confidence <<
"\n";
595 LOG(
INFO) <<
"Empty word found"
#define EXPECT_EQ(val1, val2)
#define EXPECT_GE(val1, val2)
#define EXPECT_TRUE(condition)
#define EXPECT_STREQ(s1, s2)
#define EXPECT_LE(val1, val2)
#define EXPECT_FALSE(condition)
#define ASSERT_GE(val1, val2)
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
constexpr size_t countof(T const (&)[N]) noexcept
TEST_F(EuroText, FastLatinOCR)
void SetPageSegMode(PageSegMode mode)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
bool SymbolIsDropcap() const
float Confidence(PageIteratorLevel level) const
const char * GetUTF8Text() const
virtual bool Next(PageIteratorLevel level)
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
bool Empty(PageIteratorLevel level) const
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
Pix * GetBinaryImage(PageIteratorLevel level) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
static void CalculateTextlineOrder(bool paragraph_is_ltr, const std::vector< StrongScriptDirection > &word_dirs, std::vector< int > *reading_order)
bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override
static const int kMinorRunEnd
static const int kMinorRunStart
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool IsAtBeginningOf(PageIteratorLevel level) const override
bool Next(PageIteratorLevel level) override
static std::string JoinPath(const std::string &s1, const std::string &s2)
tesseract::TessBaseAPI api_
void VerifyAllText(const std::string &truth, ResultIterator *it)
std::string OutputNameToPath(const std::string &name)
std::string TessdataPath()
void VerifyIteratorText(const std::string &truth, PageIteratorLevel level, ResultIterator *it)
std::string TestDataNameToPath(const std::string &name)
void VerifyRebuild(int max_diff, PageIteratorLevel level, PageIterator *it)
void ExpectTextlineReadingOrder(bool in_ltr_context, const StrongScriptDirection *word_dirs, int num_words, int *expected_reading_order, int num_reading_order_entries) const
void VerifyRebuilds(int block_limit, int para_limit, int line_limit, int word_limit, int symbol_limit, PageIterator *it, PageIteratorLevel maxlevel=tesseract::RIL_SYMBOL)
void VerifySaneTextlineOrder(bool in_ltr_context, const StrongScriptDirection *word_dirs, int num_words) const
~ResultIteratorTest() override=default
void SetImage(const char *filename)