18TEST(ValidateGraphemeTest, MultipleSyllablesAreNotASingleGrapheme) {
19 std::string str =
"\u0c15\u0c3f\u0c15\u0c0e";
20 std::vector<std::string> glyphs;
26 EXPECT_EQ(glyphs[0], std::string(
"\u0c15\u0c3f"));
27 EXPECT_EQ(glyphs[1], std::string(
"\u0c15"));
28 EXPECT_EQ(glyphs[2], std::string(
"\u0c0e"));
31TEST(ValidateGraphemeTest, SingleConsonantOK) {
32 std::string str =
"\u0cb9";
33 std::vector<std::string> glyphs;
41TEST(ValidateGraphemeTest, SimpleCV) {
42 std::string str =
"\u0cb9\u0cbf";
43 std::vector<std::string> glyphs;
51TEST(ValidateGraphemeTest, SubscriptConjunct) {
52 std::string str =
"\u0cb9\u0ccd\u0c95\u0cbf";
53 std::vector<std::string> glyphs;
64 EXPECT_EQ(glyphs[1], std::string(
"\u0ccd\u0c95"));
67TEST(ValidateGraphemeTest, HalfFormJoiner) {
68 std::string str =
"\u0d15\u0d4d\u200d\u0d24";
69 std::vector<std::string> glyphs;
80 EXPECT_EQ(glyphs[0], std::string(
"\u0d15\u0d4d\u200d"));
83TEST(ValidateGraphemeTest, TraditionalConjunctJoiner) {
84 std::string str =
"\u0d15\u200d\u0d4d\u0d24";
85 std::vector<std::string> glyphs;
96 EXPECT_EQ(glyphs[1], std::string(
"\u200d\u0d4d"));
99TEST(ValidateGraphemeTest, OpenConjunctNonJoiner) {
100 std::string str =
"\u0d15\u200c\u0d4d\u0d24";
101 std::vector<std::string> glyphs;
112 EXPECT_EQ(glyphs[1], std::string(
"\u200c\u0d4d"));
114 str =
"\u0c15\u200c\u0c4d\u0c24";
121TEST(ValidateGraphemeTest, ExplicitViramaNonJoiner) {
122 std::string str =
"\u0d15\u0d4d\u200c\u0d24";
123 std::vector<std::string> glyphs;
128 EXPECT_EQ(glyphs[1], std::string(
"\u0d24"));
134 EXPECT_EQ(glyphs[1], std::string(
"\u0d4d\u200c"));
137TEST(ValidateGraphemeTest, ThaiGraphemes) {
139 std::string str =
"\u0e14\u0e38\u0e4a";
140 std::vector<std::string> glyphs;
151 EXPECT_EQ(glyphs[0], std::string(
"\u0e14"));
154TEST(ValidateGraphemeTest, NoLonelyJoinersQuote) {
155 std::string str =
"'\u0d24\u0d23\u0d32\u0d4d'\u200d";
156 std::vector<std::string> glyphs;
163 EXPECT_EQ(glyphs[1], std::string(
"\u0d24"));
164 EXPECT_EQ(glyphs[2], std::string(
"\u0d23"));
165 EXPECT_EQ(glyphs[3], std::string(
"\u0d32\u0d4d\u200c"));
#define EXPECT_EQ(val1, val2)
#define EXPECT_TRUE(condition)
#define EXPECT_FALSE(condition)
std::string PrintString32WithUnicodes(const std::string &str)
std::string PrintStringVectorWithUnicodes(const std::vector< std::string > &glyphs)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
TEST(TesseractInstanceTest, TestMultipleTessInstances)