19#include <allheaders.h>
28const char kEngText[] =
"the quick brown fox jumps over the lazy dog";
29const char kHinText[] =
"पिताने विवाह की | हो गई उद्विग्न वह सोचा";
31const char kKorText[] =
"이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는";
33 "والفكر والصراع ، بالتأمل والفهم والتحليل ، "
34 "بالعلم والفن ، وأخيرا بالضحك أوبالبكاء ، ";
41static PangoFontMap *font_map;
47 font_map = pango_cairo_font_map_new_for_font_type(CAIRO_FONT_TYPE_FT);
49 pango_cairo_font_map_set_default(PANGO_CAIRO_FONT_MAP(font_map));
53 static std::locale system_locale(
"");
54 std::locale::global(system_locale);
56 l_chooseDisplayProg(L_DISPLAY_WITH_XZGV);
57 FLAGS_fonts_dir = TESTING_DIR;
58 FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
67 const std::vector<BoxChar *> &boxchars =
renderer_->GetBoxes();
68 Boxa *boxes = boxaCreate(0);
69 for (
const auto &boxchar : boxchars) {
71 boxaAddBox(boxes,
const_cast<Box *
>(boxchar->box()), L_CLONE);
74 Image box_pix = pixDrawBoxaRandom(pix, boxes, 1);
76 pixDisplay(box_pix, 0, 0);
83 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
87 EXPECT_GT(renderer_->GetBoxes().size(), 0);
88 DisplayClusterBoxes(pix);
91 renderer_ = std::make_unique<StringRenderer>(
"UnBatang 10", 600, 600);
93 EXPECT_GT(renderer_->GetBoxes().size(), 0);
94 DisplayClusterBoxes(pix);
97 renderer_ = std::make_unique<StringRenderer>(
"Lohit Hindi 10", 600, 600);
99 EXPECT_GT(renderer_->GetBoxes().size(), 0);
100 DisplayClusterBoxes(pix);
104 renderer_ = std::make_unique<StringRenderer>(
"Arab 10", 600, 600);
107 EXPECT_GT(renderer_->GetBoxes().size(), 0);
108 DisplayClusterBoxes(pix);
112 renderer_ = std::make_unique<StringRenderer>(
"Arab 10", 600, 600);
115 EXPECT_GT(renderer_->GetBoxes().size(), 0);
116 DisplayClusterBoxes(pix);
121 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
123 renderer_->set_underline_start_prob(1.0);
124 renderer_->set_underline_continuation_prob(0);
128 EXPECT_GT(renderer_->GetBoxes().size(), 0);
129 DisplayClusterBoxes(pix);
131 renderer_->ClearBoxes();
134 renderer_->set_underline_start_prob(1.0);
135 renderer_->set_underline_continuation_prob(1.0);
138 EXPECT_GT(renderer_->GetBoxes().size(), 0);
139 DisplayClusterBoxes(pix);
141 renderer_->ClearBoxes();
144 renderer_->set_underline_start_prob(0.5);
145 renderer_->set_underline_continuation_prob(0.5);
148 EXPECT_GT(renderer_->GetBoxes().size(), 0);
149 DisplayClusterBoxes(pix);
154 const char kRawText[] =
"\n\n\n A \nB \nC \n\n\n";
155 const char kStrippedText[] =
" A B C ";
156 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
158 EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
160 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
163 if (boxchars.size() == 7) {
165 for (
size_t i = 0;
i < boxchars.size(); ++
i) {
166 EXPECT_EQ(std::string(1, kStrippedText[
i]), boxchars[
i]->
ch());
169 DisplayClusterBoxes(pix);
174 renderer_ = std::make_unique<StringRenderer>(
"Arab 12", 600, 250);
175 const char kArabicLigature[] =
"لا";
179 renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
181 EXPECT_GT(renderer_->GetBoxes().size(), 0);
182 const std::vector<BoxChar *> &boxes = renderer_->GetBoxes();
186 DisplayClusterBoxes(pix);
189 renderer_ = std::make_unique<StringRenderer>(
"Arab 12", 600, 250);
190 const char kArabicMixedText[] =
"والفكر والصراع 1234,\nوالفكر لا والصراع";
191 renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix);
192 DisplayClusterBoxes(pix);
196static int FindBoxCharXCoord(
const std::vector<BoxChar *> &boxchars,
const std::string &
ch) {
197 for (
const auto &boxchar : boxchars) {
198 if (boxchar->ch() ==
ch) {
199 return boxchar->box()->x;
206 renderer_ = std::make_unique<StringRenderer>(
"Arab 10", 600, 600);
209 const char kArabicWord[] =
"\u0644\u0627\u0641\u0643\u0631";
210 const std::string kRevWord =
"\u0631\u0643\u0641\u0627\u0644";
211 renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
212 std::string boxes_str = renderer_->GetBoxesStr();
215 std::vector<std::string> texts;
218 for (
auto &text : texts) {
219 ltr_str += text.c_str();
231 renderer_ = std::make_unique<StringRenderer>(
"Arab 10", 600, 600);
234 const char kArabicWord[] =
"والفكر";
235 renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
236 EXPECT_GT(renderer_->GetBoxes().size(), 0);
237 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
238 for (
size_t i = 1;
i < boxchars.size(); ++
i) {
239 EXPECT_GT(boxchars[
i - 1]->box()->
x, boxchars[
i]->box()->
x) << boxchars[
i - 1]->ch();
244 const char kEnglishWord[] =
"Google";
245 renderer_->ClearBoxes();
246 renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
247 EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
248 for (
size_t i = 1;
i < boxchars.size(); ++
i) {
249 EXPECT_LT(boxchars[
i - 1]->box()->
x, boxchars[
i]->box()->
x) << boxchars[
i - 1]->ch();
254 renderer_->ClearBoxes();
256 EXPECT_LT(FindBoxCharXCoord(boxchars,
"a"), FindBoxCharXCoord(boxchars,
"b"));
257 EXPECT_LT(FindBoxCharXCoord(boxchars,
"1"), FindBoxCharXCoord(boxchars,
"2"));
258 EXPECT_GT(FindBoxCharXCoord(boxchars,
"و"), FindBoxCharXCoord(boxchars,
"ر"));
264 renderer_ = std::make_unique<StringRenderer>(
"UnBatang 10", 600, 600);
265 renderer_->set_vertical_text(
true);
267 EXPECT_GT(renderer_->GetBoxes().size(), 0);
268 DisplayClusterBoxes(pix);
275 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
277 int num_boxes_per_page = 0;
278 const int kNumTrials = 2;
279 for (
int i = 0;
i < kNumTrials; ++
i) {
283 EXPECT_GT(renderer_->GetBoxes().size(), 0);
284 if (!num_boxes_per_page) {
285 num_boxes_per_page = renderer_->GetBoxes().size();
287 EXPECT_EQ((
i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
289 for (
int j =
i * num_boxes_per_page; j < (
i + 1) * num_boxes_per_page; ++j) {
290 EXPECT_EQ(
i, renderer_->GetBoxes()[j]->page());
296 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
300 EXPECT_GT(renderer_->GetBoxes().size(), 0);
301 const int num_boxes_per_page = renderer_->GetBoxes().size();
303 renderer_->ClearBoxes();
306 EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size());
310 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
311 renderer_->set_add_ligatures(
true);
320 EXPECT_STREQ(
"fi", renderer_->GetBoxes()[0]->ch().c_str());
325 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
333 EXPECT_STREQ(
"\uFB01", renderer_->GetBoxes()[0]->ch().c_str());
339 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
341 EXPECT_GT(renderer_->StripUnrenderableWords(&text), 0);
346 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
347 renderer_->set_output_word_boxes(
true);
353 const int kNumSpaces = words.size() - 1;
354 const int kExpectedNumBoxes = words.size() + kNumSpaces;
355 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
356 EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
358 for (
size_t i = 0;
i < boxchars.size();
i += 2) {
360 if (
i < boxchars.size() - 1) {
368 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 600, 600);
369 renderer_->set_output_word_boxes(
true);
371 const char kMultlineText[] =
"the quick brown fox\njumps over the lazy dog";
372 EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(
kEngText), &pix));
375 std::vector<std::string> words;
376 for (
auto &line :
split(kMultlineText,
'\n')) {
377 for (
auto &word :
split(line,
' ')) {
378 words.push_back(word);
381 const int kNumSeparators = words.size() - 1;
382 const int kExpectedNumBoxes = words.size() + kNumSeparators;
383 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
384 EXPECT_EQ(kExpectedNumBoxes, boxchars.size());
386 for (
size_t i = 0;
i < boxchars.size();
i += 2) {
388 if (
i + 1 < boxchars.size()) {
396 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 1200, 1200);
398 std::string font_used;
402 offset += renderer_->RenderAllFontsToImage(1.0,
kEngText + offset, strlen(
kEngText + offset),
409 pixDisplay(pix, 0, 0);
412 }
while (offset < strlen(
kEngText));
416 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 500, 200);
417 const std::string word =
"A- -B C-D A BC";
420 renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix);
422 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
423 const std::string kWordJoinerUTF8 =
"\u2060";
424 ASSERT_EQ(word.length(), boxchars.size());
425 for (
size_t i = 0;
i < boxchars.size(); ++
i) {
432 renderer_ = std::make_unique<StringRenderer>(
"Verdana 10", 500, 200);
433 renderer_->set_drop_uncovered_chars(
true);
434 const std::string kWord =
"office";
435 const std::string kCleanWord =
"oice";
437 EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
438 EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
439 int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
441 const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
443 ASSERT_EQ(kCleanWord.length(), boxchars.size());
444 for (
size_t i = 0;
i < boxchars.size(); ++
i) {
445 EXPECT_EQ(kCleanWord.substr(
i, 1), boxchars[
i]->ch());
451TEST(ConvertBasicLatinToFullwidthLatinTest, DoesConvertBasicLatin) {
452 const std::string kHalfAlpha =
"ABCD";
453 const std::string kFullAlpha =
"ABCD";
456 const std::string kHalfDigit =
"0123";
457 const std::string kFullDigit =
"0123";
460 const std::string kHalfSym =
"()[]:;!?";
461 const std::string kFullSym =
"()[]:;!?";
465TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertFullwidthLatin) {
466 const std::string kFullAlpha =
"ABCD";
469 const std::string kFullDigit =
"0123";
472 const std::string kFullSym =
"()[]:;!?";
476TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertNonLatin) {
477 const std::string kHalfKana =
"アイウエオ";
478 const std::string kFullKana =
"アイウエオ";
483TEST(ConvertBasicLatinToFullwidthLatinTest, DoesNotConvertSpace) {
484 const std::string kHalfSpace =
" ";
485 const std::string kFullSpace =
" ";
492TEST(ConvertFullwidthLatinToBasicLatinTest, DoesConvertFullwidthLatin) {
493 const std::string kHalfAlpha =
"ABCD";
494 const std::string kFullAlpha =
"ABCD";
497 const std::string kHalfDigit =
"0123";
498 const std::string kFullDigit =
"0123";
501 const std::string kHalfSym =
"()[]:;!?";
502 const std::string kFullSym =
"()[]:;!?";
506TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertBasicLatin) {
507 const std::string kHalfAlpha =
"ABCD";
510 const std::string kHalfDigit =
"0123";
513 const std::string kHalfSym =
"()[]:;!?";
517TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertNonLatin) {
518 const std::string kHalfKana =
"アイウエオ";
519 const std::string kFullKana =
"アイウエオ";
524TEST(ConvertFullwidthLatinToBasicLatinTest, DoesNotConvertSpace) {
525 const std::string kHalfSpace =
" ";
526 const std::string kFullSpace =
" ";
#define ASSERT_EQ(val1, val2)
#define EXPECT_EQ(val1, val2)
#define EXPECT_NE(val1, val2)
#define EXPECT_GT(val1, val2)
#define EXPECT_TRUE(condition)
#define EXPECT_STREQ(s1, s2)
#define EXPECT_FALSE(condition)
#define EXPECT_STRNE(s1, s2)
#define EXPECT_LT(val1, val2)
BOOL_PARAM_FLAG(display, false, "Display image for inspection")
bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure, std::vector< TBOX > *boxes, std::vector< std::string > *texts, std::vector< std::string > *box_texts, std::vector< int > *pages)
const char kEngNonLigatureText[]
const char kEngLigatureText[]
const std::vector< std::string > split(const std::string &s, char c)
TEST_F(EuroText, FastLatinOCR)
TEST(TesseractInstanceTest, TestMultipleTessInstances)
static void SoftInitFontConfig()
static std::string InsertWordJoiners(const std::string &text)
static std::string ConvertBasicLatinToFullwidthLatin(const std::string &text)
static std::string ConvertFullwidthLatinToBasicLatin(const std::string &text)
void DisplayClusterBoxes(Image pix)
std::unique_ptr< StringRenderer > renderer_
static void SetUpTestCase()