tesseract v5.3.3.20231005
lstm_test.cc
Go to the documentation of this file.
1// (C) Copyright 2017, Google Inc.
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5// http://www.apache.org/licenses/LICENSE-2.0
6// Unless required by applicable law or agreed to in writing, software
7// distributed under the License is distributed on an "AS IS" BASIS,
8// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9// See the License for the specific language governing permissions and
10// limitations under the License.
11
12// Generating the training data:
13// If the format of the lstmf (ImageData) file changes, the training data will
14// have to be regenerated as follows:
15//
16// Use --xsize 800 for text2image to be similar to original training data.
17//
18// tesstrain.py --fonts_dir /usr/share/fonts --lang eng \
19// --linedata_only --noextract_font_properties --langdata_dir ../langdata_lstm \
20// --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata \
21// --fontlist "Arial" --maxpages 10
22//
23
24#include "lstm_test.h"
25
26namespace tesseract {
27
28// Tests that some simple networks can learn Arial and meet accuracy targets.
30 // A Convolver sliding window classifier without LSTM.
31 SetupTrainer(
32 "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
33 "Ct1,1,64O1c1]",
34 "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false, "eng");
35 double non_lstm_err = TrainIterations(kTrainerIterations * 4);
36 EXPECT_LT(non_lstm_err, 98);
37 LOG(INFO) << "********** Expected < 98 ************\n";
38
39 // A basic single-layer, single direction LSTM.
40 SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
41 double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
42 EXPECT_LT(lstm_uni_err, 86);
43 LOG(INFO) << "********** Expected < 86 ************\n";
44 // Beats the convolver. (Although it does have a lot more weights, it still
45 // iterates faster.)
46 EXPECT_LT(lstm_uni_err, non_lstm_err);
47}
48
49// Color learns almost as fast as normalized grey/2D.
51 // A basic single-layer, single direction LSTM.
52 SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);
53 double lstm_uni_err = TrainIterations(kTrainerIterations);
54 EXPECT_LT(lstm_uni_err, 85);
55 // EXPECT_GT(lstm_uni_err, 66);
56 LOG(INFO) << "********** Expected < 85 ************\n";
57}
58
60 // A basic single-layer, bi-di 1d LSTM.
61 SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
62 double lstm_bi_err = TrainIterations(kTrainerIterations);
63 EXPECT_LT(lstm_bi_err, 75);
64 LOG(INFO) << "********** Expected < 75 ************\n";
65 // Int mode training is dead, so convert the trained network to int and check
66 // that its error rate is close to the float version.
67 TestIntMode(kTrainerIterations);
68}
69
70// Tests that a 2d-2-layer network learns correctly.
71// It takes a lot of iterations to get there.
73 // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
74 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
75 false);
76 double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
77 EXPECT_LT(lstm_2d_err, 98);
78 // EXPECT_GT(lstm_2d_err, 90);
79 LOG(INFO) << "********** Expected < 98 ************\n";
80 // Int mode training is dead, so convert the trained network to int and check
81 // that its error rate is close to the float version.
82 TestIntMode(kTrainerIterations);
83}
84
85// Tests that a 2d-2-layer network with Adam does *a lot* better than
86// without it.
88 // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
89 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
90 true);
91 double lstm_2d_err = TrainIterations(kTrainerIterations);
92 EXPECT_LT(lstm_2d_err, 70);
93 LOG(INFO) << "********** Expected < 70 ************\n";
94 TestIntMode(kTrainerIterations);
95}
96
97// Trivial test of training speed on a fairly complex network.
99 SetupTrainerEng(
100 "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
101 "O1c1]",
102 "2-D-2-layer-lstm", false, true);
103 TrainIterations(kTrainerIterations);
104 LOG(INFO) << "********** *** ************\n";
105}
106
107// Tests that two identical networks trained the same get the same results.
108// Also tests that the same happens with a serialize/deserialize in the middle.
109TEST_F(LSTMTrainerTest, DeterminismTest) {
110 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
111 false);
112 double lstm_2d_err_a = TrainIterations(kTrainerIterations);
113 double act_error_a = trainer_->ActivationError();
114 double char_error_a = trainer_->CharError();
115 std::vector<char> trainer_a_data;
116 EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, *trainer_, &trainer_a_data));
117 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
118 false);
119 double lstm_2d_err_b = TrainIterations(kTrainerIterations);
120 double act_error_b = trainer_->ActivationError();
121 double char_error_b = trainer_->CharError();
122 EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
123 EXPECT_FLOAT_EQ(act_error_a, act_error_b);
124 EXPECT_FLOAT_EQ(char_error_a, char_error_b);
125 // Now train some more iterations.
126 lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
127 act_error_b = trainer_->ActivationError();
128 char_error_b = trainer_->CharError();
129 // Unpack into a new trainer and train that some more too.
130 SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
131 false);
132 EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, *trainer_));
133 lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
134 act_error_a = trainer_->ActivationError();
135 char_error_a = trainer_->CharError();
136 EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
137 EXPECT_FLOAT_EQ(act_error_a, act_error_b);
138 EXPECT_FLOAT_EQ(char_error_a, char_error_b);
139 LOG(INFO) << "********** *** ************\n";
140}
141
142// The baseline network against which to test the built-in softmax.
143TEST_F(LSTMTrainerTest, SoftmaxBaselineTest) {
144 // A basic single-layer, single direction LSTM.
145 SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
146 double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
147 EXPECT_LT(lstm_uni_err, 60);
148 // EXPECT_GT(lstm_uni_err, 48);
149 LOG(INFO) << "********** Expected < 60 ************\n";
150 // Check that it works in int mode too.
151 TestIntMode(kTrainerIterations);
152 // If we run TestIntMode again, it tests that int_mode networks can
153 // serialize and deserialize correctly.
154 double delta = TestIntMode(kTrainerIterations);
155 // The two tests (both of int mode this time) should be almost identical.
156 LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
157 EXPECT_LT(delta, 0.01);
158}
159
160// Tests that the built-in softmax does better than the external one,
161// which has an error rate slightly less than 55%, as tested by
162// SoftmaxBaselineTest.
163TEST_F(LSTMTrainerTest, SoftmaxTest) {
164 // LSTM with a built-in softmax can beat the external softmax.
165 SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
166 double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
167 EXPECT_LT(lstm_sm_err, 49.0);
168 LOG(INFO) << "********** Expected < 49 ************\n";
169 // Check that it works in int mode too.
170 TestIntMode(kTrainerIterations);
171}
172
173// Tests that the built-in encoded softmax does better than the external one.
174// It takes a lot of iterations to get there.
175TEST_F(LSTMTrainerTest, EncodedSoftmaxTest) {
176 // LSTM with a built-in encoded softmax can beat the external softmax.
177 SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
178 double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
179 EXPECT_LT(lstm_sm_err, 62.0);
180 LOG(INFO) << "********** Expected < 62 ************\n";
181 // Check that it works in int mode too.
182 TestIntMode(kTrainerIterations);
183}
184
185// Tests that layer access methods work correctly.
186TEST_F(LSTMTrainerTest, TestLayerAccess) {
187 // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
188 SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm", false, false);
189 // Number of layers.
190 const size_t kNumLayers = 8;
191 // Expected layer names.
192 const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
193 const char *kLayerNames[kNumLayers] = {"Input", "Convolve", "ConvNL", "Maxpool",
194 "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
195 // Expected number of weights.
196 const int kNumWeights[kNumLayers] = {0,
197 0,
198 16 * (25 + 1),
199 0,
200 32 * (4 * (32 + 16 + 1)),
201 128 * (4 * (128 + 32 + 1)),
202 128 * (4 * (128 + 32 + 1)),
203 112 * (2 * 128 + 1)};
204
205 auto layers = trainer_->EnumerateLayers();
206 EXPECT_EQ(kNumLayers, layers.size());
207 for (unsigned i = 0; i < kNumLayers && i < layers.size(); ++i) {
208 EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
209 EXPECT_STREQ(kLayerNames[i], trainer_->GetLayer(layers[i])->name().c_str());
210 EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
211 }
212}
213
214} // namespace tesseract.
@ LOG
@ INFO
Definition: log.h:28
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:2043
#define EXPECT_FLOAT_EQ(val1, val2)
Definition: gtest.h:2144
#define EXPECT_TRUE(condition)
Definition: gtest.h:1982
#define EXPECT_STREQ(s1, s2)
Definition: gtest.h:2112
#define EXPECT_LT(val1, val2)
Definition: gtest.h:2049
const int kTrainerIterations
Definition: lstm_test.h:34
TEST_F(EuroText, FastLatinOCR)
@ NO_BEST_TRAINER
Definition: lstmtrainer.h:62