28TEST(ValidateIndicTest, AddsJoinerToTerminalVirama) {
29 std::string str =
"\u0c15\u0c4d";
30 std::string target_str =
"\u0c15\u0c4d\u200c";
37TEST(ValidateIndicTest, OnlyOneDependentVowel) {
38 std::string str =
"\u0d15\u0d3e\u0d42";
53TEST(ValidateIndicTest, OnlyOneVowelModifier) {
54 std::string str =
"\u0c26\u0c4d\u0c01";
57 str.c_str(), &result));
59 EXPECT_EQ(std::string(
"\u0c26\u0c4d\u200c\u0c01"), result);
61 str =
"\u0995\u0983\u0981";
63 str.c_str(), &result));
66 str =
"\u0d15\u0d02\u0d02";
68 str.c_str(), &result));
80TEST(ValidateIndicTest, VowelModifierMustBeLast) {
81 std::string str =
"\u0c28\u0c02\u0c3f";
87 str =
"\u0c28\u0c3f\u0c02";
102TEST(ValidateIndicTest, MatrasFollowConsonantsNotVowels) {
103 std::string str =
"\u0c05\u0c47";
108 str =
"\u0c1e\u0c3e";
116TEST(ValidateIndicTest, SubGraphemes) {
117 std::string str =
"\u0d3e";
128TEST(ValidateIndicTest, Nukta) {
129 std::string str =
"\u0c95\u0cbc\u0ccd\u0cb9";
130 std::vector<std::string> glyphs;
135 EXPECT_EQ(glyphs[2], std::string(
"\u0ccd\u0cb9"));
137 std::string str2 =
"\u0c95\u0ccd\u0cbc\u0cb9";
142TEST(ValidateIndicTest, SinhalaRakaransaya) {
143 std::string str =
"\u0d9a\u0dca\u200d\u0dbb";
149 std::vector<std::string> glyphs;
154 EXPECT_EQ(glyphs[1], std::string(
"\u0dca\u200d\u0dbb"));
163TEST(ValidateIndicTest, SinhalaYansaya) {
164 std::string str =
"\u0d9a\u0dca\u200d\u0dba";
176 std::vector<std::string> glyphs;
181 EXPECT_EQ(glyphs[1], std::string(
"\u0dca\u200d\u0dba"));
184TEST(ValidateIndicTest, SinhalaRepaya) {
185 std::string str =
"\u0d9a\u0dbb\u0dca\u200d\u0db8";
186 std::vector<std::string> glyphs;
191 EXPECT_EQ(glyphs[1], std::string(
"\u0dbb\u0dca\u200d\u0db8"));
196 EXPECT_EQ(glyphs[1], std::string(
"\u0dbb\u0dca\u200d"));
199TEST(ValidateIndicTest, SinhalaSpecials) {
201 std::string str =
"\u0dc0\u0d9c\u0dca\u200d\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d";
202 std::vector<std::string> glyphs;
207 EXPECT_EQ(glyphs[0], std::string(
"\u0dc0"));
208 EXPECT_EQ(glyphs[1], std::string(
"\u0d9c"));
209 EXPECT_EQ(glyphs[2], std::string(
"\u0dca\u200d\u0dbb"));
210 EXPECT_EQ(glyphs[3], std::string(
"\u0dca\u200d"));
211 EXPECT_EQ(glyphs[4], std::string(
"\u0dbb\u0dca\u200d"));
212 str =
"\u0dc3\u0dbb\u0dca\u200d\u0dbb\u0dca\u200d\u0dcf";
217 EXPECT_EQ(glyphs[0], std::string(
"\u0dc3"));
218 EXPECT_EQ(glyphs[1], std::string(
"\u0dbb\u0dca\u200d"));
219 EXPECT_EQ(glyphs[2], std::string(
"\u0dbb\u0dca\u200d"));
220 EXPECT_EQ(glyphs[3], std::string(
"\u0dcf"));
#define EXPECT_EQ(val1, val2)
#define EXPECT_TRUE(condition)
#define EXPECT_FALSE(condition)
std::string PrintString32WithUnicodes(const std::string &str)
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
std::string PrintStringVectorWithUnicodes(const std::vector< std::string > &glyphs)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
TEST(TesseractInstanceTest, TestMultipleTessInstances)