tesseract v5.3.3.20231005
lstm_recode_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12#include "lstm_test.h"
13
14namespace tesseract {
15
16// Tests that training with unicharset recoding learns faster than without,
17// for Korean. This test is split in two, so it can be run sharded.
18
19TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
20 // A basic single-layer, bi-di 1d LSTM on Korean.
21 SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
22 "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
23 double kor_full_err = TrainIterations(kTrainerIterations * 2);
24 EXPECT_LT(kor_full_err, 88);
25 // EXPECT_GT(kor_full_err, 85);
26 LOG(INFO) << "********** Expected < 88 ************\n";
27}
28
29TEST_F(LSTMTrainerTest, RecodeTestKor) {
30 // A basic single-layer, bi-di 1d LSTM on Korean.
31 SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
32 "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
33 double kor_recode_err = TrainIterations(kTrainerIterations);
34 EXPECT_LT(kor_recode_err, 60);
35 LOG(INFO) << "********** Expected < 60 ************\n";
36}
37
38// Tests that the given string encodes and decodes back to the same
39// with both recode on and off for Korean.
40
41TEST_F(LSTMTrainerTest, EncodeDecodeBothTestKor) {
42 TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
43}
44
45} // namespace tesseract.
@ LOG
@ INFO
Definition: log.h:28
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
const int kTrainerIterations
Definition: lstm_test.h:34
TEST_F(EuroText, FastLatinOCR)