tesseract v5.3.3.20231005
ligature_table_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include "ligature_table.h"
13#include "commandlineflags.h"
14#include "fileio.h"
15#include "include_gunit.h"
16#include "pango_font_info.h"
17
18namespace tesseract {
19
20#if 0 // not with NFC normalization
21const char kEngNonLigatureText[] = "fidelity effigy ſteep";
22// Same as above text, but with "fi" in the first word and "ffi" in the second
23// word replaced with their respective ligatures.
24const char kEngLigatureText[] = "fidelity effigy ſteep";
25// Same as kEngLigatureText but with "fi" in both words replaced with their
26// ligature. The test Verdana font does not support the "ffi" or "ſt" ligature.
27const char kRenderableEngLigatureText[] = "fidelity effigy ſteep";
28#endif
29
30static PangoFontMap *font_map;
31
33protected:
34 void SetUp() override {
36 if (!font_map) {
37 font_map = pango_cairo_font_map_new_for_font_type(CAIRO_FONT_TYPE_FT);
38 }
39 pango_cairo_font_map_set_default(PANGO_CAIRO_FONT_MAP(font_map));
40 }
41
42 static void SetUpTestCase() {
43 static std::locale system_locale("");
44 std::locale::global(system_locale);
45
46 FLAGS_fonts_dir = TESTING_DIR;
47 FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
50 }
52};
53
54TEST_F(LigatureTableTest, DoesFillLigatureTables) {
55 EXPECT_GT(lig_table_->norm_to_lig_table().size(), 0);
56 EXPECT_GT(lig_table_->lig_to_norm_table().size(), 0);
57}
58
59#if 0 // not with NFC normalization
60TEST_F(LigatureTableTest, DoesAddLigatures) {
61 EXPECT_STREQ(kEngLigatureText, lig_table_->AddLigatures(kEngNonLigatureText, nullptr).c_str());
62}
63
64TEST_F(LigatureTableTest, DoesAddLigaturesWithSupportedFont) {
65 PangoFontInfo font;
66 EXPECT_TRUE(font.ParseFontDescriptionName("Verdana"));
67 printf("1:%s\n", kRenderableEngLigatureText);
68 printf("2:%s\n", lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
69 EXPECT_STREQ(kRenderableEngLigatureText,
70 lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
71}
72
73TEST_F(LigatureTableTest, DoesNotAddLigaturesWithUnsupportedFont) {
74 PangoFontInfo font;
75 EXPECT_TRUE(font.ParseFontDescriptionName("Lohit Hindi"));
76 EXPECT_STREQ(kEngNonLigatureText, lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
77}
78
79TEST_F(LigatureTableTest, DoesRemoveLigatures) {
80 EXPECT_STREQ(kEngNonLigatureText, lig_table_->RemoveLigatures(kEngLigatureText).c_str());
81}
82#endif
83
84TEST_F(LigatureTableTest, TestCustomLigatures) {
85 const char *kTestCases[] = {
86 "act", "a\uE003", "publiſh", "publi\uE006", "ſince",
87 "\uE007nce", "aſleep", "a\uE008eep", "neceſſary", "nece\uE009ary",
88 };
89 for (size_t i = 0; i < countof(kTestCases); i += 2) {
90 EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
91 EXPECT_STREQ(kTestCases[i], lig_table_->RemoveLigatures(kTestCases[i + 1]).c_str());
92 EXPECT_STREQ(kTestCases[i], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
93 }
94}
95
96#if 0 // not with NFC normalization
97TEST_F(LigatureTableTest, TestRemovesCustomLigatures) {
98 const char *kTestCases[] = {
99 "fiction",
100 "fi\uE003ion",
101 "fiction",
102 };
103 for (size_t i = 0; i < countof(kTestCases); i += 3) {
104 EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
105 EXPECT_STREQ(kTestCases[i + 2], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
106 }
107}
108#endif
109
110} // namespace tesseract
#define EXPECT_GT(val1, val2)
Definition: gtest.h:2053
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_STREQ(s1, s2)
Definition: gtest.h:2112
const char kEngNonLigatureText[]
constexpr size_t countof(T const (&)[N]) noexcept
Definition: serialis.h:34
const char kEngLigatureText[]
TEST_F(EuroText, FastLatinOCR)
static LigatureTable * Get()
static void MakeTmpdir()
Definition: include_gunit.h:38