tesseract v5.3.3.20231005
trainingsample.h
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
15
16#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17#define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18
19#include "elst.h"
20#include "featdefs.h"
21#include "intfx.h"
22#include "intmatcher.h"
23#include "matrix.h"
24#include "mf.h"
25#include "mfdefs.h"
26#include "picofeat.h"
27#include "shapetable.h"
28#include "unicharset.h"
29
30struct Pix;
31
32namespace tesseract {
33
34class IntFeatureMap;
35class IntFeatureSpace;
36class ShapeTable;
37
38// Number of elements of cn_feature_.
39static const int kNumCNParams = 4;
40// Number of ways to shift the features when randomizing.
41static const int kSampleYShiftSize = 5;
42// Number of ways to scale the features when randomizing.
43static const int kSampleScaleSize = 3;
44// Total number of different ways to manipulate the features when randomizing.
45// The first and last combinations are removed to avoid an excessive
46// top movement (first) and an identity transformation (last).
47// WARNING: To avoid patterned duplication of samples, be sure to keep
48// kSampleRandomSize prime!
49// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
50// kSampleRandomSize is 13, which is prime.
51static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
52// ASSERT_IS_PRIME(kSampleRandomSize) !!
53
55public:
57 : class_id_(INVALID_UNICHAR_ID)
58 , font_id_(0)
59 , page_num_(0)
60 , num_features_(0)
61 , num_micro_features_(0)
62 , outline_length_(0)
63 , features_(nullptr)
64 , micro_features_(nullptr)
65 , weight_(1.0)
66 , max_dist_(0.0)
67 , sample_index_(0)
68 , features_are_indexed_(false)
69 , features_are_mapped_(false)
70 , is_error_(false) {}
72
73 // Saves the given features into a TrainingSample. The features are copied,
74 // so may be deleted afterwards. Delete the return value after use.
75 static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info,
76 const TBOX &bounding_box,
77 const INT_FEATURE_STRUCT *features, int num_features);
78 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
79 FEATURE_STRUCT *GetCNFeature() const;
80 // Constructs and returns a copy "randomized" by the method given by
81 // the randomizer index. If index is out of [0, kSampleRandomSize) then
82 // an exact copy is returned.
83 TrainingSample *RandomizedCopy(int index) const;
84 // Constructs and returns an exact copy.
85 TrainingSample *Copy() const;
86
87 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
88 // members, which is mostly the mapped features, and the weight.
89 // It is assumed these can all be reconstructed from what is saved.
90 // Writes to the given file. Returns false in case of error.
91 bool Serialize(FILE *fp) const;
92 // Creates from the given file. Returns nullptr in case of error.
93 // If swap is true, assumes a big/little-endian swap is needed.
94 static TrainingSample *DeSerializeCreate(bool swap, FILE *fp);
95 // Reads from the given file. Returns false in case of error.
96 // If swap is true, assumes a big/little-endian swap is needed.
97 bool DeSerialize(bool swap, FILE *fp);
98
99 // Extracts the needed information from the CHAR_DESC_STRUCT.
100 void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type,
101 CHAR_DESC_STRUCT *char_desc);
102
103 // Sets the mapped_features_ from the features_ using the provided
104 // feature_space to the indexed versions of the features.
105 void IndexFeatures(const IntFeatureSpace &feature_space);
106
107 // Returns a pix representing the sample. (Int features only.)
108 Image RenderToPix(const UNICHARSET *unicharset) const;
109 // Displays the features in the given window with the given color.
110 void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
111
112 // Returns a pix of the original sample image. The pix is padded all round
113 // by padding wherever possible.
114 // The returned Pix must be pixDestroyed after use.
115 // If the input page_pix is nullptr, nullptr is returned.
116 Image GetSamplePix(int padding, Image page_pix) const;
117
118 // Accessors.
120 return class_id_;
121 }
122 void set_class_id(int id) {
123 class_id_ = id;
124 }
125 int font_id() const {
126 return font_id_;
127 }
128 void set_font_id(int id) {
129 font_id_ = id;
130 }
131 int page_num() const {
132 return page_num_;
133 }
134 void set_page_num(int page) {
135 page_num_ = page;
136 }
137 const TBOX &bounding_box() const {
138 return bounding_box_;
139 }
140 void set_bounding_box(const TBOX &box) {
141 bounding_box_ = box;
142 }
143 uint32_t num_features() const {
144 return num_features_;
145 }
147 return features_;
148 }
149 uint32_t num_micro_features() const {
150 return num_micro_features_;
151 }
153 return micro_features_;
154 }
155 int outline_length() const {
156 return outline_length_;
157 }
158 float cn_feature(int index) const {
159 return cn_feature_[index];
160 }
161 int geo_feature(int index) const {
162 return geo_feature_[index];
163 }
164 double weight() const {
165 return weight_;
166 }
167 void set_weight(double value) {
168 weight_ = value;
169 }
170 double max_dist() const {
171 return max_dist_;
172 }
173 void set_max_dist(double value) {
174 max_dist_ = value;
175 }
176 int sample_index() const {
177 return sample_index_;
178 }
180 sample_index_ = value;
181 }
182 bool features_are_mapped() const {
183 return features_are_mapped_;
184 }
185 const std::vector<int> &mapped_features() const {
186 ASSERT_HOST(features_are_mapped_);
187 return mapped_features_;
188 }
189 const std::vector<int> &indexed_features() const {
190 ASSERT_HOST(features_are_indexed_);
191 return mapped_features_;
192 }
193 bool is_error() const {
194 return is_error_;
195 }
196 void set_is_error(bool value) {
197 is_error_ = value;
198 }
199
200private:
201 // Unichar id that this sample represents. There obviously must be a
202 // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
203 UNICHAR_ID class_id_;
204 // Font id in which this sample was printed. Refers to a fontinfo_table_ in
205 // MasterTrainer.
206 int font_id_;
207 // Number of page that the sample came from.
208 int page_num_;
209 // Bounding box of sample in original image.
210 TBOX bounding_box_;
211 // Number of INT_FEATURE_STRUCT in features_ array.
212 uint32_t num_features_;
213 // Number of MicroFeature in micro_features_ array.
214 uint32_t num_micro_features_;
215 // Total length of outline in the baseline normalized coordinate space.
216 // See comment in WERD_RES class definition for a discussion of coordinate
217 // spaces.
218 int outline_length_;
219 // Array of features.
220 INT_FEATURE_STRUCT *features_;
221 // Array of features.
222 MicroFeature *micro_features_;
223 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
224 float cn_feature_[kNumCNParams];
225 // The one and only geometric feature. (Aims at replacing cn_feature_).
226 // Indexed by GeoParams enum in picofeat.h
227 int geo_feature_[GeoCount];
228
229 // Non-serialized cache data.
230 // Weight used for boosting training.
231 double weight_;
232 // Maximum distance to other samples of same class/font used in computing
233 // the canonical sample.
234 double max_dist_;
235 // Global index of this sample.
236 int sample_index_;
237
238public:
239 // both are used in training tools
240 // hide after refactoring
241
242 // Indexed/mapped features, as indicated by the bools below.
243 std::vector<int> mapped_features_;
246
247private:
248 // True if the last classification was an error by the current definition.
249 bool is_error_;
250
251 // Randomizing factors.
252 static const int kYShiftValues[kSampleYShiftSize];
253 static const double kScaleValues[kSampleScaleSize];
254};
255
257
258} // namespace tesseract
259
260#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
#define ASSERT_HOST(x)
Definition: errcode.h:54
int value
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:205
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:236
@ GeoCount
Definition: picofeat.h:40
int UNICHAR_ID
Definition: unichar.h:34
std::array< float,(int) MicroFeatureParameter::MFCount > MicroFeature
Definition: mfdefs.h:36
const INT_FEATURE_STRUCT * features() const
const TBOX & bounding_box() const
void set_max_dist(double value)
const std::vector< int > & mapped_features() const
UNICHAR_ID class_id() const
uint32_t num_features() const
const std::vector< int > & indexed_features() const
void set_weight(double value)
uint32_t num_micro_features() const
int geo_feature(int index) const
void set_is_error(bool value)
float cn_feature(int index) const
const MicroFeature * micro_features() const
bool features_are_mapped() const
void set_bounding_box(const TBOX &box)
std::vector< int > mapped_features_
void set_sample_index(int value)
#define TESS_API
Definition: export.h:32