tesseract v5.3.3.20231005
normalis.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: normalis.h (Formerly denorm.h)
3 * Description: Code for the DENORM class.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef NORMALIS_H
20#define NORMALIS_H
21
22#include "image.h"
23
24#include <tesseract/export.h>
25
26#include <vector>
27
28struct Pix;
29
30namespace tesseract {
31
32const int kBlnCellHeight = 256; // Full-height for baseline normalization.
33const int kBlnXHeight = 128; // x-height for baseline normalization.
34const int kBlnBaselineOffset = 64; // offset for baseline normalization.
35
36class BLOCK;
37class FCOORD;
38class TBOX;
39class UNICHARSET;
40
41struct TBLOB;
42struct TPOINT;
43
44// Possible normalization methods. Use NEGATIVE values as these also
45// double up as markers for the last sub-classifier.
47 NM_BASELINE = -3, // The original BL normalization mode.
48 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
49 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
50};
51
53public:
54 DENORM();
55
56 // Copying a DENORM is allowed.
57 DENORM(const DENORM &);
58 DENORM &operator=(const DENORM &);
59 ~DENORM();
60
61 // Setup the normalization transformation parameters.
62 // The normalizations applied to a blob are as follows:
63 // 1. An optional block layout rotation that was applied during layout
64 // analysis to make the textlines horizontal.
65 // 2. A normalization transformation (LocalNormTransform):
66 // Subtract the "origin"
67 // Apply an x,y scaling.
68 // Apply an optional rotation.
69 // Add back a final translation.
70 // The origin is in the block-rotated space, and is usually something like
71 // the x-middle of the word at the baseline.
72 // 3. Zero or more further normalization transformations that are applied
73 // in sequence, with a similar pattern to the first normalization transform.
74 //
75 // A DENORM holds the parameters of a single normalization, and can execute
76 // both the LocalNormTransform (a forwards normalization), and the
77 // LocalDenormTransform which is an inverse transform or de-normalization.
78 // A DENORM may point to a predecessor DENORM, which is actually the earlier
79 // normalization, so the full normalization sequence involves executing all
80 // predecessors first and then the transform in "this".
81 // Let x be image coordinates and that we have normalization classes A, B, C
82 // where we first apply A then B then C to get normalized x':
83 // x' = CBAx
84 // Then the backwards (to original coordinates) would be:
85 // x = A^-1 B^-1 C^-1 x'
86 // and A = B->predecessor_ and B = C->predecessor_
87 // NormTransform executes all predecessors recursively, and then this.
88 // NormTransform would be used to transform an image-based feature to
89 // normalized space for use in a classifier
90 // DenormTransform inverts this and then all predecessors. It can be
91 // used to get back to the original image coordinates from normalized space.
92 // The LocalNormTransform member executes just the transformation
93 // in "this" without the layout rotation or any predecessors. It would be
94 // used to run each successive normalization, eg the word normalization,
95 // and later the character normalization.
96
97 // Arguments:
98 // block: if not nullptr, then this is the first transformation, and
99 // block->re_rotation() needs to be used after the Denorm
100 // transformation to get back to the image coords.
101 // rotation: if not nullptr, apply this rotation after translation to the
102 // origin and scaling. (Usually a classify rotation.)
103 // predecessor: if not nullptr, then predecessor has been applied to the
104 // input space and needs to be undone to complete the inverse.
105 // The above pointers are not owned by this DENORM and are assumed to live
106 // longer than this denorm, except rotation, which is deep copied on input.
107 //
108 // x_origin: The x origin which will be mapped to final_xshift in the result.
109 // y_origin: The y origin which will be mapped to final_yshift in the result.
110 // Added to result of row->baseline(x) if not nullptr.
111 //
112 // x_scale: scale factor for the x-coordinate.
113 // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
114 // Note that these scale factors apply to the same x and y system as the
115 // x-origin and y-origin apply, ie after any block rotation, but before
116 // the rotation argument is applied.
117 //
118 // final_xshift: The x component of the final translation.
119 // final_yshift: The y component of the final translation.
120 //
121 // In theory, any of the commonly used normalizations can be setup here:
122 // * Traditional baseline normalization on a word:
123 // SetupNormalization(block, nullptr, nullptr,
124 // box.x_middle(), baseline,
125 // kBlnXHeight / x_height, kBlnXHeight / x_height,
126 // 0, kBlnBaselineOffset);
127 // * "Numeric mode" baseline normalization on a word, in which the blobs
128 // are positioned with the bottom as the baseline is achieved by making
129 // a separate DENORM for each blob.
130 // SetupNormalization(block, nullptr, nullptr,
131 // box.x_middle(), box.bottom(),
132 // kBlnXHeight / x_height, kBlnXHeight / x_height,
133 // 0, kBlnBaselineOffset);
134 // * Anisotropic character normalization used by IntFx.
135 // SetupNormalization(nullptr, nullptr, denorm,
136 // centroid_x, centroid_y,
137 // 51.2 / ry, 51.2 / rx, 128, 128);
138 // * Normalize blob height to x-height (current OSD):
139 // SetupNormalization(nullptr, &rotation, nullptr,
140 // box.rotational_x_middle(rotation),
141 // box.rotational_y_middle(rotation),
142 // kBlnXHeight / box.rotational_height(rotation),
143 // kBlnXHeight / box.rotational_height(rotation),
144 // 0, kBlnBaselineOffset);
145 // * Secondary normalization for classification rotation (current):
146 // FCOORD rotation = block->classify_rotation();
147 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
148 // SetupNormalization(nullptr, &rotation, denorm,
149 // box.rotational_x_middle(rotation),
150 // box.rotational_y_middle(rotation),
151 // target_height / box.rotational_height(rotation),
152 // target_height / box.rotational_height(rotation),
153 // 0, kBlnBaselineOffset);
154 // * Proposed new normalizations for CJK: Between them there is then
155 // no need for further normalization at all, and the character fills the cell.
156 // ** Replacement for baseline normalization on a word:
157 // Scales height and width independently so that modal height and pitch
158 // fill the cell respectively.
159 // float cap_height = x_height / CCStruct::kXHeightCapRatio;
160 // SetupNormalization(block, nullptr, nullptr,
161 // box.x_middle(), cap_height / 2.0f,
162 // kBlnCellHeight / fixed_pitch,
163 // kBlnCellHeight / cap_height,
164 // 0, 0);
165 // ** Secondary normalization for classification (with rotation) (proposed):
166 // Requires a simple translation to the center of the appropriate character
167 // cell, no further scaling and a simple rotation (or nothing) about the
168 // cell center.
169 // FCOORD rotation = block->classify_rotation();
170 // SetupNormalization(nullptr, &rotation, denorm,
171 // fixed_pitch_cell_center,
172 // 0.0f,
173 // 1.0f,
174 // 1.0f,
175 // 0, 0);
176 void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
177 float x_origin, float y_origin, float x_scale, float y_scale,
178 float final_xshift, float final_yshift);
179
180 // Sets up the DENORM to execute a non-linear transformation based on
181 // preserving an even distribution of stroke edges. The transformation
182 // operates only within the given box, scaling input coords within the box
183 // non-linearly to a box of target_width by target_height, with all other
184 // coords being clipped to the box edge. As with SetupNormalization above,
185 // final_xshift and final_yshift are applied after scaling, and the bottom-
186 // left of box is used as a pre-scaling origin.
187 // x_coords is a collection of the x-coords of vertical edges for each
188 // y-coord starting at box.bottom().
189 // y_coords is a collection of the y-coords of horizontal edges for each
190 // x-coord starting at box.left().
191 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
192 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
193 // The second-level vectors must all be sorted in ascending order.
194 void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
195 float target_height, float final_xshift, float final_yshift,
196 const std::vector<std::vector<int>> &x_coords,
197 const std::vector<std::vector<int>> &y_coords);
198
199 // Transforms the given coords one step forward to normalized space, without
200 // using any block rotation or predecessor.
201 void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const;
202 void LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const;
203 // Transforms the given coords forward to normalized space using the
204 // full transformation sequence defined by the block rotation, the
205 // predecessors, deepest first, and finally this. If first_norm is not
206 // nullptr, then the first and deepest transformation used is first_norm,
207 // ending with this, and the block rotation will not be applied.
208 void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const;
209 void NormTransform(const DENORM *first_norm, const FCOORD &pt, FCOORD *transformed) const;
210 // Transforms the given coords one step back to source space, without
211 // using to any block rotation or predecessor.
212 void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const;
213 void LocalDenormTransform(const FCOORD &pt, FCOORD *original) const;
214 // Transforms the given coords all the way back to source image space using
215 // the full transformation sequence defined by this and its predecessors
216 // recursively, shallowest first, and finally any block re_rotation.
217 // If last_denorm is not nullptr, then the last transformation used will
218 // be last_denorm, and the block re_rotation will never be executed.
219 void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const;
220 void DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD *original) const;
221
222 // Normalize a blob using blob transformations. Less accurate, but
223 // more accurately copies the old way.
224 void LocalNormBlob(TBLOB *blob) const;
225
226 // Fills in the x-height range accepted by the given unichar_id in blob
227 // coordinates, given its bounding box in the usual baseline-normalized
228 // coordinates, with some initial crude x-height estimate (such as word
229 // size) and this denoting the transformation that was used.
230 // Also returns the amount the character must have shifted up or down.
231 void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht,
232 float *max_xht, float *yshift) const;
233
234 // Prints the content of the DENORM for debug purposes.
235 void Print() const;
236
237 Image pix() const {
238 return pix_;
239 }
240 void set_pix(Image pix) {
241 pix_ = pix;
242 }
243 bool inverse() const {
244 return inverse_;
245 }
246 void set_inverse(bool value) {
247 inverse_ = value;
248 }
249 const DENORM *RootDenorm() const {
250 if (predecessor_ != nullptr) {
251 return predecessor_->RootDenorm();
252 }
253 return this;
254 }
255 const DENORM *predecessor() const {
256 return predecessor_;
257 }
258 // Accessors - perhaps should not be needed.
259 float x_scale() const {
260 return x_scale_;
261 }
262 float y_scale() const {
263 return y_scale_;
264 }
265 const BLOCK *block() const {
266 return block_;
267 }
268 void set_block(const BLOCK *block) {
269 block_ = block;
270 }
271
272private:
273 // Free allocated memory and clear pointers.
274 void Clear();
275 // Setup default values.
276 void Init();
277
278 // Best available image.
279 Image pix_;
280 // True if the source image is white-on-black.
281 bool inverse_;
282 // Block the word came from. If not null, block->re_rotation() takes the
283 // "untransformed" coordinates even further back to the original image.
284 // Used only on the first DENORM in a chain.
285 const BLOCK *block_;
286 // Rotation to apply between translation to the origin and scaling.
287 const FCOORD *rotation_;
288 // Previous transformation in a chain.
289 const DENORM *predecessor_;
290 // Non-linear transformation maps directly from each integer offset from the
291 // origin to the corresponding x-coord. Owned by the DENORM.
292 std::vector<float> *x_map_;
293 // Non-linear transformation maps directly from each integer offset from the
294 // origin to the corresponding y-coord. Owned by the DENORM.
295 std::vector<float> *y_map_;
296 // x-coordinate to be mapped to final_xshift_ in the result.
297 float x_origin_;
298 // y-coordinate to be mapped to final_yshift_ in the result.
299 float y_origin_;
300 // Scale factors for x and y coords. Applied to pre-rotation system.
301 float x_scale_;
302 float y_scale_;
303 // Destination coords of the x_origin_ and y_origin_.
304 float final_xshift_;
305 float final_yshift_;
306};
307
308} // namespace tesseract
309
310#endif
int value
const int kBlnXHeight
Definition: normalis.h:33
const int kBlnCellHeight
Definition: normalis.h:32
NormalizationMode
Definition: normalis.h:46
@ NM_BASELINE
Definition: normalis.h:47
@ NM_CHAR_ANISOTROPIC
Definition: normalis.h:49
@ NM_CHAR_ISOTROPIC
Definition: normalis.h:48
const int kBlnBaselineOffset
Definition: normalis.h:34
std::string Print(const T &value)
const DENORM * predecessor() const
Definition: normalis.h:255
float y_scale() const
Definition: normalis.h:262
const DENORM * RootDenorm() const
Definition: normalis.h:249
void set_block(const BLOCK *block)
Definition: normalis.h:268
void set_inverse(bool value)
Definition: normalis.h:246
void set_pix(Image pix)
Definition: normalis.h:240
Image pix() const
Definition: normalis.h:237
bool inverse() const
Definition: normalis.h:243
const BLOCK * block() const
Definition: normalis.h:265
float x_scale() const
Definition: normalis.h:259
#define TESS_API
Definition: export.h:32