32TEST(LangModelTest, AddACharacter) {
33 constexpr char kTestString[] =
"Simple ASCII string to encode !@#$%&";
34 constexpr char kTestStringRupees[] =
"ASCII string with Rupee symbol ₹";
36 std::string script_dir = LANGDATA_DIR;
41 std::string version_str =
"TestVersion";
43 std::string output_dir = FLAGS_test_tmpdir;
44 LOG(
INFO) <<
"Output dir=" << output_dir <<
"\n";
45 std::string lang1 =
"eng";
46 bool pass_through_recoder =
false;
54 bool lang_is_rtl =
false;
57 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
nullptr,
60 std::string traineddata1 =
file::JoinPath(output_dir, lang1, lang1) +
".traineddata";
63 std::vector<int> labels1;
65 std::string test1_decoded = trainer1.
DecodeLabels(labels1);
66 std::string test1_str(&test1_decoded[0], test1_decoded.length());
67 LOG(
INFO) <<
"Labels1=" << test1_str <<
"\n";
70 int size_before = unicharset.
size();
75 std::string lang2 =
"extended";
77 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
80 std::string traineddata2 =
file::JoinPath(output_dir, lang2, lang2) +
".traineddata";
83 std::vector<int> labels2;
85 std::string test2_decoded = trainer2.
DecodeLabels(labels2);
86 std::string test2_str(&test2_decoded[0], test2_decoded.length());
87 LOG(
INFO) <<
"Labels2=" << test2_str <<
"\n";
89 std::vector<int> labels3;
91 std::string test3_decoded = trainer2.
DecodeLabels(labels3);
92 std::string test3_str(&test3_decoded[0], test3_decoded.length());
93 LOG(
INFO) <<
"labels3=" << test3_str <<
"\n";
101 std::vector<int> labels1_v(labels1.size());
102 for (
unsigned i = 0;
i < labels1.size(); ++
i) {
103 if (labels1[
i] == null1) {
104 labels1_v[
i] = null2;
106 labels1_v[
i] = labels1[
i];
109 EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
117TEST(LangModelTest, AddACharacterHindi) {
118 constexpr char kTestString[] =
"हिन्दी में एक लाइन लिखें";
119 constexpr char kTestStringRupees[] =
"हिंदी में रूपये का चिन्ह प्रयोग करें ₹१००.००";
121 std::string script_dir = LANGDATA_DIR;
126 std::string version_str =
"TestVersion";
128 std::string output_dir = FLAGS_test_tmpdir;
129 LOG(
INFO) <<
"Output dir=" << output_dir <<
"\n";
130 std::string lang1 =
"hin";
131 bool pass_through_recoder =
false;
139 bool lang_is_rtl =
false;
142 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
nullptr,
145 std::string traineddata1 =
file::JoinPath(output_dir, lang1, lang1) +
".traineddata";
148 std::vector<int> labels1;
150 std::string test1_decoded = trainer1.
DecodeLabels(labels1);
151 std::string test1_str(&test1_decoded[0], test1_decoded.length());
152 LOG(
INFO) <<
"Labels1=" << test1_str <<
"\n";
155 int size_before = unicharset.
size();
160 std::string lang2 =
"extendedhin";
162 pass_through_recoder, words, puncs, numbers, lang_is_rtl,
165 std::string traineddata2 =
file::JoinPath(output_dir, lang2, lang2) +
".traineddata";
168 std::vector<int> labels2;
170 std::string test2_decoded = trainer2.
DecodeLabels(labels2);
171 std::string test2_str(&test2_decoded[0], test2_decoded.length());
172 LOG(
INFO) <<
"Labels2=" << test2_str <<
"\n";
174 std::vector<int> labels3;
176 std::string test3_decoded = trainer2.
DecodeLabels(labels3);
177 std::string test3_str(&test3_decoded[0], test3_decoded.length());
178 LOG(
INFO) <<
"labels3=" << test3_str <<
"\n";
186 std::vector<int> labels1_v(labels1.size());
187 for (
unsigned i = 0;
i < labels1.size(); ++
i) {
188 if (labels1[
i] == null1) {
189 labels1_v[
i] = null2;
191 labels1_v[
i] = labels1[
i];
194 EXPECT_THAT(labels1_v, testing::ElementsAreArray(&labels2[0], labels2.size()));
#define EXPECT_THAT(value, matcher)
#define EXPECT_EQ(val1, val2)
#define EXPECT_GT(val1, val2)
#define EXPECT_TRUE(condition)
#define EXPECT_FALSE(condition)
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
std::string TestDataNameToPath(const std::string &name)
std::string ReadFile(const std::string &filename, FileReader reader)
int CombineLangModel(const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const std::vector< std::string > &words, const std::vector< std::string > &puncs, const std::vector< std::string > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
const std::vector< std::string > split(const std::string &s, char c)
TEST(TesseractInstanceTest, TestMultipleTessInstances)
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
bool load_from_file(const char *const filename, bool skip_fragments)
std::string DecodeLabels(const std::vector< int > &labels)
bool EncodeString(const std::string &str, std::vector< int > *labels) const
bool InitCharSet(const std::string &traineddata_path)
static std::string JoinPath(const std::string &s1, const std::string &s2)