tesseract v5.3.3.20231005
validate_khmer_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include "include_gunit.h"
13#include "normstrngs.h"
14#include "normstrngs_test.h"
15
16namespace tesseract {
17
18// Test some random Khmer words.
19TEST(ValidateKhmerTest, GoodKhmerWords) {
20 std::string str = "ព័ត៏មានប្លែកៗ";
22 str = "ទំនុកច្រៀង";
24 str = "កាលីហ្វូញ៉ា";
26 str = "ចាប់ពីផ្លូវ";
28}
29
30// Test some random Khmer words with dotted circles.
31TEST(ValidateKhmerTest, BadKhmerWords) {
32 std::string result;
33 // Multiple dependent vowels not allowed
34 std::string str = "\u1796\u17b6\u17b7";
36 str.c_str(), &result));
37 // Multiple shifters not allowed
38 str = "\u1798\u17c9\u17ca";
40 str.c_str(), &result));
41 // Multiple signs not allowed
42 str = "\u1780\u17b6\u17cb\u17cd";
44 str.c_str(), &result));
45}
46
47} // namespace tesseract
#define EXPECT_FALSE(condition)
Definition: gtest.h:1986
void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_mode, int unicode_count, int glyph_count, int grapheme_count, const std::string &target_str)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
Definition: normstrngs.cpp:152
TEST(TesseractInstanceTest, TestMultipleTessInstances)