tesseract v5.3.3.20231005
validate_myanmar_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include "include_gunit.h"
13#include "normstrngs.h"
14#include "normstrngs_test.h"
15
16namespace tesseract {
17
18// Test some random Myanmar words.
19TEST(ValidateMyanmarTest, GoodMyanmarWords) {
20 std::string str = "လျှာကသိသည် "; // No viramas in this one.
22 str = "တုန္လႈပ္မႈ ";
24}
25
26// Test some random Myanmar words with dotted circles.
27TEST(ValidateMyanmarTest, BadMyanmarWords) {
28 std::string str = "က်န္းမာေရး";
29 std::vector<std::string> glyphs;
31 GraphemeNormMode::kCombined, true, str.c_str(),
32 &glyphs));
33 std::string result;
35 str.c_str(), &result));
36 // It works if the grapheme normalization is turned off.
38 str.c_str(), &result));
39 EXPECT_EQ(str, result);
40 str = "ခုႏွစ္";
42 GraphemeNormMode::kGlyphSplit, true, str.c_str(),
43 &glyphs));
45 str.c_str(), &result));
46 // It works if the grapheme normalization is turned off.
48 str.c_str(), &result));
49 EXPECT_EQ(str, result);
50}
51
52} // namespace tesseract
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
Definition: normstrngs.cpp:179
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:152
TEST(TesseractInstanceTest, TestMultipleTessInstances)