tesseract v5.3.3.20231005
baselinedetect.h
Go to the documentation of this file.
1
2// File: baselinedetect.h
3// Description: Initial Baseline Determination.
4// Copyright 2012 Google Inc. All Rights Reserved.
5// Author: rays@google.com (Ray Smith)
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
20#define TESSERACT_TEXTORD_BASELINEDETECT_H_
21
22#include "detlinefit.h"
23#include "points.h"
24#include "rect.h"
25
26struct Pix;
27
28namespace tesseract {
29
30class Textord;
31class BLOBNBOX_LIST;
32class TO_BLOCK;
33class TO_BLOCK_LIST;
34class TO_ROW;
35
36// Class to compute and hold baseline data for a TO_ROW.
38public:
39 BaselineRow(double line_size, TO_ROW *to_row);
40
41 const TBOX &bounding_box() const {
42 return bounding_box_;
43 }
44 // Sets the TO_ROW with the output straight line.
45 void SetupOldLineParameters(TO_ROW *row) const;
46
47 // Outputs diagnostic information.
48 void Print() const;
49
50 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
51 double BaselineAngle() const;
52 // Computes and returns the linespacing at the middle of the overlap
53 // between this and other.
54 double SpaceBetween(const BaselineRow &other) const;
55 // Computes and returns the displacement of the center of the line
56 // perpendicular to the given direction.
57 double PerpDisp(const FCOORD &direction) const;
58 // Computes the y coordinate at the given x using the straight baseline
59 // defined by baseline1_ and baseline2_.
60 double StraightYAtX(double x) const;
61
62 // Fits a straight baseline to the points. Returns true if it had enough
63 // points to be reasonably sure of the fitted baseline.
64 // If use_box_bottoms is false, baselines positions are formed by
65 // considering the outlines of the blobs.
66 bool FitBaseline(bool use_box_bottoms);
67 // Modifies an existing result of FitBaseline to be parallel to the given
68 // vector if that produces a better result.
69 void AdjustBaselineToParallel(int debug, const FCOORD &direction);
70 // Modifies the baseline to snap to the textline grid if the existing
71 // result is not good enough.
72 double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
73 double line_offset);
74
75private:
76 // Sets up displacement_modes_ with the top few modes of the perpendicular
77 // distance of each blob from the given direction vector, after rounding.
78 void SetupBlobDisplacements(const FCOORD &direction);
79
80 // Fits a line in the given direction to blobs that are close to the given
81 // target_offset perpendicular displacement from the direction. The fit
82 // error is allowed to be cheat_allowance worse than the existing fit, and
83 // will still be used.
84 // If cheat_allowance > 0, the new fit will be good and replace the current
85 // fit if it has better fit (with cheat) OR its error is below
86 // max_baseline_error_ and the old fit is marked bad.
87 // Otherwise the new fit will only replace the old if it is really better,
88 // or the old fit is marked bad and the new fit has sufficient points, as
89 // well as being within the max_baseline_error_.
90 void FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
91 double target_offset);
92 // Returns the perpendicular distance of the point from the straight
93 // baseline.
94 float PerpDistanceFromBaseline(const FCOORD &pt) const;
95 // Computes the bounding box of the row.
96 void ComputeBoundingBox();
97
98 // The blobs of the row to which this BaselineRow adds extra information
99 // during baseline fitting. Note that blobs_ could easily come from either
100 // a TO_ROW or a ColPartition.
101 BLOBNBOX_LIST *blobs_;
102 // Bounding box of all the blobs.
103 TBOX bounding_box_;
104 // Fitter used to fit lines to the blobs.
105 DetLineFit fitter_;
106 // 2 points on the straight baseline.
107 FCOORD baseline_pt1_;
108 FCOORD baseline_pt2_;
109 // Set of modes of displacements. They indicate preferable baseline positions.
110 std::vector<double> displacement_modes_;
111 // Quantization factor used for displacement_modes_.
112 double disp_quant_factor_;
113 // Half the acceptance range of blob displacements for computing the
114 // error during a constrained fit.
115 double fit_halfrange_;
116 // Max baseline error before a line is regarded as fitting badly.
117 double max_baseline_error_;
118 // The error of fit of the baseline.
119 double baseline_error_;
120 // True if this row seems to have a good baseline.
121 bool good_baseline_;
122};
123
124// Class to compute and hold baseline data for a TO_BLOCK.
126public:
127 BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block);
128
130 for (auto row : rows_) {
131 delete row;
132 }
133 }
134
135 TO_BLOCK *block() const {
136 return block_;
137 }
138 double skew_angle() const {
139 return skew_angle_;
140 }
141
142 // Computes and returns the absolute error of the given perp_disp from the
143 // given linespacing model.
144 static double SpacingModelError(double perp_disp, double line_spacing, double line_offset);
145
146 // Fits straight line baselines and computes the skew angle from the
147 // median angle. Returns true if a good angle is found.
148 // If use_box_bottoms is false, baseline positions are formed by
149 // considering the outlines of the blobs.
150 bool FitBaselinesAndFindSkew(bool use_box_bottoms);
151
152 // Refits the baseline to a constrained angle, using the stored block
153 // skew if good enough, otherwise the supplied default skew.
154 void ParallelizeBaselines(double default_block_skew);
155
156 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
157 void SetupBlockParameters() const;
158
159 // Processing that is required before fitting baseline splines, but requires
160 // linear baselines in order to be successful:
161 // Removes noise if required
162 // Separates out underlines
163 // Pre-associates blob fragments.
164 // TODO(rays/joeliu) This entire section of code is inherited from the past
165 // and could be improved/eliminated.
166 // page_tr is used to size a debug window.
167 void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
168
169 // Fits splines to the textlines, or creates fake QSPLINES from the straight
170 // baselines that are already on the TO_ROWs.
171 // As a side-effect, computes the xheights of the rows and the block.
172 // Although x-height estimation is conceptually separate, it is part of
173 // detecting perspective distortion and therefore baseline fitting.
174 void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord);
175
176 // Draws the (straight) baselines and final blobs colored according to
177 // what was discarded as noise and what is associated with each row.
178 void DrawFinalRows(const ICOORD &page_tr);
179
180 // Render the generated spline baselines for this block on pix_in.
181 void DrawPixSpline(Image pix_in);
182
183private:
184 // Top-level line-spacing calculation. Computes an estimate of the line-
185 // spacing, using the current baselines in the TO_ROWS of the block, and
186 // then refines it by fitting a regression line to the baseline positions
187 // as a function of their integer index.
188 // Returns true if it seems that the model is a reasonable fit to the
189 // observations.
190 bool ComputeLineSpacing();
191
192 // Computes the deskewed vertical position of each baseline in the block and
193 // stores them in the given vector.
194 void ComputeBaselinePositions(const FCOORD &direction, std::vector<double> *positions);
195
196 // Computes an estimate of the line spacing of the block from the median
197 // of the spacings between adjacent overlapping textlines.
198 void EstimateLineSpacing();
199
200 // Refines the line spacing of the block by fitting a regression
201 // line to the deskewed y-position of each baseline as a function of its
202 // estimated line index, allowing for a small error in the initial linespacing
203 // and choosing the best available model.
204 void RefineLineSpacing(const std::vector<double> &positions);
205
206 // Given an initial estimate of line spacing (m_in) and the positions of each
207 // baseline, computes the line spacing of the block more accurately in m_out,
208 // and the corresponding intercept in c_out, and the number of spacings seen
209 // in index_delta. Returns the error of fit to the line spacing model.
210 double FitLineSpacingModel(const std::vector<double> &positions, double m_in, double *m_out,
211 double *c_out, int *index_delta);
212
213 // The block to which this class adds extra information used during baseline
214 // calculation.
215 TO_BLOCK *block_;
216 // The rows in the block that we will be working with.
217 std::vector<BaselineRow *> rows_;
218 // Amount of debugging output to provide.
219 int debug_level_;
220 // True if the block is non-text (graphic).
221 bool non_text_block_;
222 // True if the block has at least one good enough baseline to compute the
223 // skew angle and therefore skew_angle_ is valid.
224 bool good_skew_angle_;
225 // Angle of skew in radians using the conventional anticlockwise from x-axis.
226 double skew_angle_;
227 // Current best estimate line spacing in pixels perpendicular to skew_angle_.
228 double line_spacing_;
229 // Offset for baseline positions, in pixels. Each baseline is at
230 // line_spacing_ * n + line_offset_ for integer n, which represents
231 // [textline] line number in a line numbering system that has line 0 on or
232 // at least near the x-axis. Not equal to the actual line number of a line
233 // within a block as most blocks are not near the x-axis.
234 double line_offset_;
235 // The error of the line spacing model.
236 double model_error_;
237};
238
240public:
241 BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks);
242
244 for (auto block : blocks_) {
245 delete block;
246 }
247 }
248
249 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
250 // block-wise and page-wise data to smooth small blocks/rows, and applies
251 // smoothing based on block/page-level skew and block-level linespacing.
252 void ComputeStraightBaselines(bool use_box_bottoms);
253
254 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
255 // other associated side-effects, including pre-associating blobs, computing
256 // x-heights and displaying debug information.
257 // NOTE that ComputeStraightBaselines must have been called first as this
258 // sets up data in the TO_ROWs upon which this function depends.
259 void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
260 bool remove_noise, bool show_final_rows, Textord *textord);
261
262private:
263 // Average (median) skew of the blocks on the page among those that have
264 // a good angle of their own.
265 FCOORD page_skew_;
266 // Amount of debug output to produce.
267 int debug_level_;
268 // The blocks that we are working with.
269 std::vector<BaselineBlock *> blocks_;
270};
271
272} // namespace tesseract
273
274#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
integer coordinate
Definition: points.h:36
bool FitBaseline(bool use_box_bottoms)
double PerpDisp(const FCOORD &direction) const
const TBOX & bounding_box() const
double BaselineAngle() const
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
double SpaceBetween(const BaselineRow &other) const
BaselineRow(double line_size, TO_ROW *to_row)
double StraightYAtX(double x) const
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
void SetupOldLineParameters(TO_ROW *row) const
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
void DrawFinalRows(const ICOORD &page_tr)
void ParallelizeBaselines(double default_block_skew)
void DrawPixSpline(Image pix_in)
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
TO_BLOCK * block() const
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)