All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
baselinedetect.cpp
Go to the documentation of this file.
1 // File: baselinedetect.cpp
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 // Created: Mon Apr 30 10:15:31 PDT 2012
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef _MSC_VER
21 #define _USE_MATH_DEFINES
22 #endif // _MSC_VER
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config_auto.h"
26 #endif
27 
28 #include "baselinedetect.h"
29 
30 #include <math.h>
31 #include "allheaders.h"
32 #include "blobbox.h"
33 #include "detlinefit.h"
34 #include "drawtord.h"
35 #include "helpers.h"
36 #include "linlsq.h"
37 #include "makerow.h"
38 #include "textord.h"
39 #include "tprintf.h"
40 #include "underlin.h"
41 
42 // Number of displacement modes kept in displacement_modes_;
43 const int kMaxDisplacementsModes = 3;
44 // Number of points to skip when retrying initial fit.
45 const int kNumSkipPoints = 3;
46 // Max angle deviation (in radians) allowed to keep the independent baseline.
47 const double kMaxSkewDeviation = 1.0 / 64;
48 // Fraction of line spacing estimate for quantization of blob displacements.
49 const double kOffsetQuantizationFactor = 3.0 / 64;
50 // Fraction of line spacing estimate for computing blob fit error.
51 const double kFitHalfrangeFactor = 6.0 / 64;
52 // Max fraction of line spacing allowed before a baseline counts as badly fitting.
53 const double kMaxBaselineError = 3.0 / 64;
54 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
55 // Copied from textord_excess_blobsize.
56 const double kMaxBlobSizeMultiple = 1.3;
57 // Min fraction of linespacing gaps that should be close to the model before
58 // we will force the linespacing model on all the lines.
59 const double kMinFittingLinespacings = 0.25;
60 // A y-coordinate within a textline that is to be debugged.
61 //#define kDebugYCoord 1525
62 
63 namespace tesseract {
64 
65 BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
66  : blobs_(to_row->blob_list()),
67  baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
68  baseline_error_(0.0), good_baseline_(false) {
69  ComputeBoundingBox();
70  // Compute a scale factor for rounding to ints.
71  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
72  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
73  max_baseline_error_ = kMaxBaselineError * line_spacing;
74 }
75 
76 // Sets the TO_ROW with the output straight line.
78  // TODO(rays) get rid of this when m and c are no longer used.
79  double gradient = tan(BaselineAngle());
80  // para_c is the actual intercept of the baseline on the y-axis.
81  float para_c = StraightYAtX(0.0);
82  row->set_line(gradient, para_c, baseline_error_);
83  row->set_parallel_line(gradient, para_c, baseline_error_);
84 }
85 
86 // Outputs diagnostic information.
87 void BaselineRow::Print() const {
88  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
89  baseline_pt1_.x(), baseline_pt1_.y(),
90  baseline_pt2_.x(), baseline_pt2_.y(),
91  BaselineAngle(), StraightYAtX(0.0));
92  tprintf("Quant factor=%g, error=%g, good=%d, box:",
93  disp_quant_factor_, baseline_error_, good_baseline_);
94  bounding_box_.print();
95 }
96 
97 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
99  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
100  double angle = baseline_dir.angle();
101  // Baseline directions are only unique in a range of pi so constrain to
102  // [-pi/2, pi/2].
103  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
104 }
105 
106 // Computes and returns the linespacing at the middle of the overlap
107 // between this and other.
108 double BaselineRow::SpaceBetween(const BaselineRow& other) const {
109  // Find the x-centre of overlap of the lines.
110  float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
111  MIN(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
112  // Find the vertical centre between them.
113  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
114  // Find the perpendicular distance of (x,y) from each line.
115  FCOORD pt(x, y);
116  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
117 }
118 
119 // Computes and returns the displacement of the center of the line
120 // perpendicular to the given direction.
121 double BaselineRow::PerpDisp(const FCOORD& direction) const {
122  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
123  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
124  return direction * middle_pos / direction.length();
125 }
126 
127 // Computes the y coordinate at the given x using the straight baseline
128 // defined by baseline_pt1_ and baseline_pt2__.
129 double BaselineRow::StraightYAtX(double x) const {
130  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
131  if (denominator == 0.0)
132  return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
133  return baseline_pt1_.y() +
134  (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
135  denominator;
136 }
137 
138 // Fits a straight baseline to the points. Returns true if it had enough
139 // points to be reasonably sure of the fitted baseline.
140 // If use_box_bottoms is false, baselines positions are formed by
141 // considering the outlines of the blobs.
142 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
143  // Deterministic fitting is used wherever possible.
144  fitter_.Clear();
145  // Linear least squares is a backup if the DetLineFit produces a bad line.
146  LLSQ llsq;
147  BLOBNBOX_IT blob_it(blobs_);
148 
149  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
150  BLOBNBOX* blob = blob_it.data();
151  if (!use_box_bottoms) blob->EstimateBaselinePosition();
152  const TBOX& box = blob->bounding_box();
153  int x_middle = (box.left() + box.right()) / 2;
154 #ifdef kDebugYCoord
155  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
156  tprintf("Box bottom = %d, baseline pos=%d for box at:",
157  box.bottom(), blob->baseline_position());
158  box.print();
159  }
160 #endif
161  fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
162  llsq.add(x_middle, blob->baseline_position());
163  }
164  // Fit the line.
165  ICOORD pt1, pt2;
166  baseline_error_ = fitter_.Fit(&pt1, &pt2);
167  baseline_pt1_ = pt1;
168  baseline_pt2_ = pt2;
169  if (baseline_error_ > max_baseline_error_ &&
171  // The fit was bad but there were plenty of points, so try skipping
172  // the first and last few, and use the new line if it dramatically improves
173  // the error of fit.
174  double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
175  if (error < baseline_error_ / 2.0) {
176  baseline_error_ = error;
177  baseline_pt1_ = pt1;
178  baseline_pt2_ = pt2;
179  }
180  }
181  int debug = 0;
182 #ifdef kDebugYCoord
183  Print();
184  debug = bounding_box_.bottom() < kDebugYCoord &&
185  bounding_box_.top() > kDebugYCoord
186  ? 3 : 2;
187 #endif
188  // Now we obtained a direction from that fit, see if we can improve the
189  // fit using the same direction and some other start point.
190  FCOORD direction(pt2 - pt1);
191  double target_offset = direction * pt1;
192  good_baseline_ = false;
193  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
194  // Wild lines can be produced because DetLineFit allows vertical lines, but
195  // vertical text has been rotated so angles over pi/4 should be disallowed.
196  // Near vertical lines can still be produced by vertically aligned components
197  // on very short lines.
198  double angle = BaselineAngle();
199  if (fabs(angle) > M_PI * 0.25) {
200  // Use the llsq fit as a backup.
201  baseline_pt1_ = llsq.mean_point();
202  baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
203  // TODO(rays) get rid of this when m and c are no longer used.
204  double m = llsq.m();
205  double c = llsq.c(m);
206  baseline_error_ = llsq.rms(m, c);
207  good_baseline_ = false;
208  }
209  return good_baseline_;
210 }
211 
212 // Modifies an existing result of FitBaseline to be parallel to the given
213 // direction vector if that produces a better result.
215  const FCOORD& direction) {
216  SetupBlobDisplacements(direction);
217  if (displacement_modes_.empty())
218  return;
219 #ifdef kDebugYCoord
220  if (bounding_box_.bottom() < kDebugYCoord &&
221  bounding_box_.top() > kDebugYCoord && debug < 3)
222  debug = 3;
223 #endif
224  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
225 }
226 
227 // Modifies the baseline to snap to the textline grid if the existing
228 // result is not good enough.
230  const FCOORD& direction,
231  double line_spacing,
232  double line_offset) {
233  if (blobs_->empty()) {
234  if (debug > 1) {
235  tprintf("Row empty at:");
236  bounding_box_.print();
237  }
238  return line_offset;
239  }
240  // Find the displacement_modes_ entry nearest to the grid.
241  double best_error = 0.0;
242  int best_index = -1;
243  for (int i = 0; i < displacement_modes_.size(); ++i) {
244  double blob_y = displacement_modes_[i];
245  double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
246  line_offset);
247  if (debug > 1) {
248  tprintf("Mode at %g has error %g from model \n", blob_y, error);
249  }
250  if (best_index < 0 || error < best_error) {
251  best_error = error;
252  best_index = i;
253  }
254  }
255  // We will move the baseline only if the chosen mode is close enough to the
256  // model.
257  double model_margin = max_baseline_error_ - best_error;
258  if (best_index >= 0 && model_margin > 0.0) {
259  // But if the current baseline is already close to the mode there is no
260  // point, and only the potential to damage accuracy by changing its angle.
261  double perp_disp = PerpDisp(direction);
262  double shift = displacement_modes_[best_index] - perp_disp;
263  if (fabs(shift) > max_baseline_error_) {
264  if (debug > 1) {
265  tprintf("Attempting linespacing model fit with mode %g to row at:",
266  displacement_modes_[best_index]);
267  bounding_box_.print();
268  }
269  FitConstrainedIfBetter(debug, direction, model_margin,
270  displacement_modes_[best_index]);
271  } else if (debug > 1) {
272  tprintf("Linespacing model only moves current line by %g for row at:",
273  shift);
274  bounding_box_.print();
275  }
276  } else if (debug > 1) {
277  tprintf("Linespacing model not close enough to any mode for row at:");
278  bounding_box_.print();
279  }
280  return fmod(PerpDisp(direction), line_spacing);
281 }
282 
283 // Sets up displacement_modes_ with the top few modes of the perpendicular
284 // distance of each blob from the given direction vector, after rounding.
285 void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
286  // Set of perpendicular displacements of the blob bottoms from the required
287  // baseline direction.
288  GenericVector<double> perp_blob_dists;
289  displacement_modes_.truncate(0);
290  // Gather the skew-corrected position of every blob.
291  double min_dist = MAX_FLOAT32;
292  double max_dist = -MAX_FLOAT32;
293  BLOBNBOX_IT blob_it(blobs_);
294  bool debug = false;
295  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
296  BLOBNBOX* blob = blob_it.data();
297  const TBOX& box = blob->bounding_box();
298 #ifdef kDebugYCoord
299  if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
300 #endif
301  FCOORD blob_pos((box.left() + box.right()) / 2.0f,
302  blob->baseline_position());
303  double offset = direction * blob_pos;
304  perp_blob_dists.push_back(offset);
305  if (debug) {
306  tprintf("Displacement %g for blob at:", offset);
307  box.print();
308  }
309  UpdateRange(offset, &min_dist, &max_dist);
310  }
311  // Set up a histogram using disp_quant_factor_ as the bucket size.
312  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
313  IntCastRounded(max_dist / disp_quant_factor_) + 1);
314  for (int i = 0; i < perp_blob_dists.size(); ++i) {
315  dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
316  }
318  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
319  if (debug) {
320  for (int i = 0; i < scaled_modes.size(); ++i) {
321  tprintf("Top mode = %g * %d\n",
322  scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
323  }
324  }
325  for (int i = 0; i < scaled_modes.size(); ++i)
326  displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
327 }
328 
329 // Fits a line in the given direction to blobs that are close to the given
330 // target_offset perpendicular displacement from the direction. The fit
331 // error is allowed to be cheat_allowance worse than the existing fit, and
332 // will still be used.
333 // If cheat_allowance > 0, the new fit will be good and replace the current
334 // fit if it has better fit (with cheat) OR its error is below
335 // max_baseline_error_ and the old fit is marked bad.
336 // Otherwise the new fit will only replace the old if it is really better,
337 // or the old fit is marked bad and the new fit has sufficient points, as
338 // well as being within the max_baseline_error_.
339 void BaselineRow::FitConstrainedIfBetter(int debug,
340  const FCOORD& direction,
341  double cheat_allowance,
342  double target_offset) {
343  double halfrange = fit_halfrange_ * direction.length();
344  double min_dist = target_offset - halfrange;
345  double max_dist = target_offset + halfrange;
346  ICOORD line_pt;
347  double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
348  debug > 2, &line_pt);
349  // Allow cheat_allowance off the new error
350  new_error -= cheat_allowance;
351  double old_angle = BaselineAngle();
352  double new_angle = direction.angle();
353  if (debug > 1) {
354  tprintf("Constrained error = %g, original = %g",
355  new_error, baseline_error_);
356  tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
357  old_angle, new_angle,
358  new_angle - old_angle, kMaxSkewDeviation);
359  }
360  bool new_good_baseline = new_error <= max_baseline_error_ &&
361  (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
362  // The new will replace the old if any are true:
363  // 1. the new error is better
364  // 2. the old is NOT good, but the new is
365  // 3. there is a wild angular difference between them (assuming that the new
366  // is a better guess at the angle.)
367  if (new_error <= baseline_error_ ||
368  (!good_baseline_ && new_good_baseline) ||
369  fabs(new_angle - old_angle) > kMaxSkewDeviation) {
370  baseline_error_ = new_error;
371  baseline_pt1_ = line_pt;
372  baseline_pt2_ = baseline_pt1_ + direction;
373  good_baseline_ = new_good_baseline;
374  if (debug > 1) {
375  tprintf("Replacing with constrained baseline, good = %d\n",
376  good_baseline_);
377  }
378  } else if (debug > 1) {
379  tprintf("Keeping old baseline\n");
380  }
381 }
382 
383 // Returns the perpendicular distance of the point from the straight
384 // baseline.
385 double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
386  FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
387  FCOORD offset_vector(pt - baseline_pt1_);
388  double distance = baseline_vector * offset_vector;
389  return sqrt(distance * distance / baseline_vector.sqlength());
390 }
391 
392 // Computes the bounding box of the row.
393 void BaselineRow::ComputeBoundingBox() {
394  BLOBNBOX_IT it(blobs_);
395  TBOX box;
396  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397  box += it.data()->bounding_box();
398  }
399  bounding_box_ = box;
400 }
401 
402 
403 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
404  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
405  good_skew_angle_(false), skew_angle_(0.0),
406  line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
407  TO_ROW_IT row_it(block_->get_rows());
408  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
409  // Sort the blobs on the rows.
410  row_it.data()->blob_list()->sort(blob_x_order);
411  rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
412  }
413 }
414 
415 // Computes and returns the absolute error of the given perp_disp from the
416 // given linespacing model.
417 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
418  double line_offset) {
419  // Round to the nearest multiple of line_spacing + line offset.
420  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
421  double model_y = line_spacing * multiple + line_offset;
422  return fabs(perp_disp - model_y);
423 }
424 
425 // Fits straight line baselines and computes the skew angle from the
426 // median angle. Returns true if a good angle is found.
427 // If use_box_bottoms is false, baseline positions are formed by
428 // considering the outlines of the blobs.
429 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
430  if (non_text_block_) return false;
431  GenericVector<double> angles;
432  for (int r = 0; r < rows_.size(); ++r) {
433  BaselineRow* row = rows_[r];
434  if (row->FitBaseline(use_box_bottoms)) {
435  double angle = row->BaselineAngle();
436  angles.push_back(angle);
437  }
438  if (debug_level_ > 1)
439  row->Print();
440  }
441 
442  if (!angles.empty()) {
443  skew_angle_ = MedianOfCircularValues(M_PI, &angles);
444  good_skew_angle_ = true;
445  } else {
446  skew_angle_ = 0.0f;
447  good_skew_angle_ = false;
448  }
449  if (debug_level_ > 0) {
450  tprintf("Initial block skew angle = %g, good = %d\n",
451  skew_angle_, good_skew_angle_);
452  }
453  return good_skew_angle_;
454 }
455 
456 // Refits the baseline to a constrained angle, using the stored block
457 // skew if good enough, otherwise the supplied default skew.
458 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
459  if (non_text_block_) return;
460  if (!good_skew_angle_) skew_angle_ = default_block_skew;
461  if (debug_level_ > 0)
462  tprintf("Adjusting block to skew angle %g\n", skew_angle_);
463  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
464  for (int r = 0; r < rows_.size(); ++r) {
465  BaselineRow* row = rows_[r];
466  row->AdjustBaselineToParallel(debug_level_, direction);
467  if (debug_level_ > 1)
468  row->Print();
469  }
470  if (rows_.size() < 3 || !ComputeLineSpacing())
471  return;
472  // Enforce the line spacing model on all lines that don't yet have a good
473  // baseline.
474  // Start by finding the row that is best fitted to the model.
475  int best_row = 0;
476  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
477  line_spacing_, line_offset_);
478  for (int r = 1; r < rows_.size(); ++r) {
479  double error = SpacingModelError(rows_[r]->PerpDisp(direction),
480  line_spacing_, line_offset_);
481  if (error < best_error) {
482  best_error = error;
483  best_row = r;
484  }
485  }
486  // Starting at the best fitting row, work outwards, syncing the offset.
487  double offset = line_offset_;
488  for (int r = best_row + 1; r < rows_.size(); ++r) {
489  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
490  line_spacing_, offset);
491  }
492  offset = line_offset_;
493  for (int r = best_row - 1; r >= 0; --r) {
494  offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
495  line_spacing_, offset);
496  }
497 }
498 
499 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
501  if (line_spacing_ > 0.0) {
502  // Where was block_line_spacing set before?
503  float min_spacing = MIN(block_->line_spacing, line_spacing_);
504  if (min_spacing < block_->line_size)
505  block_->line_size = min_spacing;
506  block_->line_spacing = line_spacing_;
507  block_->baseline_offset = line_offset_;
508  block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
509  }
510  // Setup the parameters on all the rows.
511  TO_ROW_IT row_it(block_->get_rows());
512  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
513  BaselineRow* row = rows_[r];
514  TO_ROW* to_row = row_it.data();
515  row->SetupOldLineParameters(to_row);
516  }
517 }
518 
519 // Processing that is required before fitting baseline splines, but requires
520 // linear baselines in order to be successful:
521 // Removes noise if required
522 // Separates out underlines
523 // Pre-associates blob fragments.
524 // TODO(rays/joeliu) This entire section of code is inherited from the past
525 // and could be improved/eliminated.
526 // page_tr is used to size a debug window.
527 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
528  if (non_text_block_) return;
529  if (remove_noise) {
530  vigorous_noise_removal(block_);
531  }
532  FCOORD rotation(1.0f, 0.0f);
533  double gradient = tan(skew_angle_);
534  separate_underlines(block_, gradient, rotation, true);
535  pre_associate_blobs(page_tr, block_, rotation, true);
536 }
537 
538 // Fits splines to the textlines, or creates fake QSPLINES from the straight
539 // baselines that are already on the TO_ROWs.
540 // As a side-effect, computes the xheights of the rows and the block.
541 // Although x-height estimation is conceptually separate, it is part of
542 // detecting perspective distortion and therefore baseline fitting.
543 void BaselineBlock::FitBaselineSplines(bool enable_splines,
544  bool show_final_rows,
545  Textord* textord) {
546  double gradient = tan(skew_angle_);
547  FCOORD rotation(1.0f, 0.0f);
548 
549  if (enable_splines) {
550  textord->make_spline_rows(block_, gradient, show_final_rows);
551  } else {
552  // Make a fake spline from the existing line.
553  TBOX block_box= block_->block->bounding_box();
554  TO_ROW_IT row_it = block_->get_rows();
555  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
556  TO_ROW* row = row_it.data();
557  inT32 xstarts[2] = { block_box.left(), block_box.right() };
558  double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
559  row->baseline = QSPLINE(1, xstarts, coeffs);
560  textord->compute_row_xheight(row, block_->block->classify_rotation(),
561  row->line_m(), block_->line_size);
562  }
563  }
564  textord->compute_block_xheight(block_, gradient);
565  block_->block->set_xheight(block_->xheight);
566  if (textord_restore_underlines) // fix underlines
567  restore_underlined_blobs(block_);
568 }
569 
570 // Draws the (straight) baselines and final blobs colored according to
571 // what was discarded as noise and what is associated with each row.
572 void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
573 #ifndef GRAPHICS_DISABLED
574  if (non_text_block_) return;
575  double gradient = tan(skew_angle_);
576  FCOORD rotation(1.0f, 0.0f);
577  int left_edge = block_->block->bounding_box().left();
578  ScrollView* win = create_to_win(page_tr);
580  TO_ROW_IT row_it = block_->get_rows();
581  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
582  plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
583  colour = static_cast<ScrollView::Color>(colour + 1);
584  if (colour > ScrollView::MAGENTA)
585  colour = ScrollView::RED;
586  }
588  // Show discarded blobs.
589  plot_blob_list(win, &block_->underlines,
591  if (block_->blobs.length() > 0)
592  tprintf("%d blobs discarded as noise\n", block_->blobs.length());
593  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
594 #endif
595 }
596 
597 void BaselineBlock::DrawPixSpline(Pix* pix_in) {
598  if (non_text_block_) return;
599  TO_ROW_IT row_it = block_->get_rows();
600  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
601  row_it.data()->baseline.plot(pix_in);
602  }
603 }
604 
605 // Top-level line-spacing calculation. Computes an estimate of the line-
606 // spacing, using the current baselines in the TO_ROWS of the block, and
607 // then refines it by fitting a regression line to the baseline positions
608 // as a function of their integer index.
609 // Returns true if it seems that the model is a reasonable fit to the
610 // observations.
611 bool BaselineBlock::ComputeLineSpacing() {
612  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
613  GenericVector<double> row_positions;
614  ComputeBaselinePositions(direction, &row_positions);
615  if (row_positions.size() < 2) return false;
616  EstimateLineSpacing();
617  RefineLineSpacing(row_positions);
618  // Verify that the model is reasonable.
619  double max_baseline_error = kMaxBaselineError * line_spacing_;
620  int non_trivial_gaps = 0;
621  int fitting_gaps = 0;
622  for (int i = 1; i < row_positions.size(); ++i) {
623  double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
624  if (row_gap > max_baseline_error) {
625  ++non_trivial_gaps;
626  if (fabs(row_gap - line_spacing_) <= max_baseline_error)
627  ++fitting_gaps;
628  }
629  }
630  if (debug_level_ > 0) {
631  tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
632  line_spacing_, row_positions.size(), fitting_gaps,
633  non_trivial_gaps);
634  }
635  return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
636 }
637 
638 // Computes the deskewed vertical position of each baseline in the block and
639 // stores them in the given vector.
640 // This is calculated as the perpendicular distance of the middle of each
641 // baseline (in case it has a different skew angle) from the line passing
642 // through the origin parallel to the block baseline angle.
643 // NOTE that "distance" above is a signed quantity so we can tell which side
644 // of the block baseline a line sits, hence the function and argument name
645 // positions not distances.
646 void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
647  GenericVector<double>* positions) {
648  positions->clear();
649  for (int r = 0; r < rows_.size(); ++r) {
650  BaselineRow* row = rows_[r];
651  const TBOX& row_box = row->bounding_box();
652  float x_middle = (row_box.left() + row_box.right()) / 2.0f;
653  FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
654  float offset = direction * row_pos;
655  positions->push_back(offset);
656  }
657 }
658 
659 // Computes an estimate of the line spacing of the block from the median
660 // of the spacings between adjacent overlapping textlines.
661 void BaselineBlock::EstimateLineSpacing() {
662  GenericVector<float> spacings;
663  for (int r = 0; r < rows_.size(); ++r) {
664  BaselineRow* row = rows_[r];
665  // Exclude silly lines.
666  if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
667  // Find the first row after row that overlaps it significantly.
668  const TBOX& row_box = row->bounding_box();
669  int r2;
670  for (r2 = r + 1; r2 < rows_.size() &&
671  !row_box.major_x_overlap(rows_[r2]->bounding_box());
672  ++r2);
673  if (r2 < rows_.size()) {
674  BaselineRow* row2 = rows_[r2];
675  // Exclude silly lines.
676  if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
677  float spacing = row->SpaceBetween(*row2);
678  spacings.push_back(spacing);
679  }
680  }
681  // If we have at least one value, use it, otherwise leave the previous
682  // value unchanged.
683  if (!spacings.empty()) {
684  line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
685  if (debug_level_ > 1)
686  tprintf("Estimate of linespacing = %g\n", line_spacing_);
687  }
688 }
689 
690 // Refines the line spacing of the block by fitting a regression
691 // line to the deskewed y-position of each baseline as a function of its
692 // estimated line index, allowing for a small error in the initial linespacing
693 // and choosing the best available model.
694 void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
695  double spacings[3], offsets[3], errors[3];
696  int index_range;
697  errors[0] = FitLineSpacingModel(positions, line_spacing_,
698  &spacings[0], &offsets[0], &index_range);
699  if (index_range > 1) {
700  double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
701  // Try the hypotheses that there might be index_range +/- 1 line spaces.
702  errors[1] = FitLineSpacingModel(positions, spacing_plus,
703  &spacings[1], &offsets[1], NULL);
704  double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
705  errors[2] = FitLineSpacingModel(positions, spacing_minus,
706  &spacings[2], &offsets[2], NULL);
707  for (int i = 1; i <= 2; ++i) {
708  if (errors[i] < errors[0]) {
709  spacings[0] = spacings[i];
710  offsets[0] = offsets[i];
711  errors[0] = errors[i];
712  }
713  }
714  }
715  if (spacings[0] > 0.0) {
716  line_spacing_ = spacings[0];
717  line_offset_ = offsets[0];
718  model_error_ = errors[0];
719  if (debug_level_ > 0) {
720  tprintf("Final linespacing model = %g + offset %g, error %g\n",
721  line_spacing_, line_offset_, model_error_);
722  }
723  }
724 }
725 
726 // Given an initial estimate of line spacing (m_in) and the positions of each
727 // baseline, computes the line spacing of the block more accurately in m_out,
728 // and the corresponding intercept in c_out, and the number of spacings seen
729 // in index_delta. Returns the error of fit to the line spacing model.
730 // Uses a simple linear regression, but optimized the offset using the median.
731 double BaselineBlock::FitLineSpacingModel(
732  const GenericVector<double>& positions, double m_in,
733  double* m_out, double* c_out, int* index_delta) {
734  if (m_in == 0.0f || positions.size() < 2) {
735  *m_out = m_in;
736  *c_out = 0.0;
737  if (index_delta != NULL) *index_delta = 0;
738  return 0.0;
739  }
740  GenericVector<double> offsets;
741  // Get the offset (remainder) linespacing for each line and choose the median.
742  for (int i = 0; i < positions.size(); ++i)
743  offsets.push_back(fmod(positions[i], m_in));
744  // Get the median offset.
745  double median_offset = MedianOfCircularValues(m_in, &offsets);
746  // Now fit a line to quantized line number and offset.
747  LLSQ llsq;
748  int min_index = MAX_INT32;
749  int max_index = -MAX_INT32;
750  for (int i = 0; i < positions.size(); ++i) {
751  double y_pos = positions[i];
752  int row_index = IntCastRounded((y_pos - median_offset) / m_in);
753  UpdateRange(row_index, &min_index, &max_index);
754  llsq.add(row_index, y_pos);
755  }
756  // Get the refined line spacing.
757  *m_out = llsq.m();
758  // Use the median offset rather than the mean.
759  offsets.truncate(0);
760  for (int i = 0; i < positions.size(); ++i)
761  offsets.push_back(fmod(positions[i], *m_out));
762  // Get the median offset.
763  if (debug_level_ > 2) {
764  for (int i = 0; i < offsets.size(); ++i)
765  tprintf("%d: %g\n", i, offsets[i]);
766  }
767  *c_out = MedianOfCircularValues(*m_out, &offsets);
768  if (debug_level_ > 1) {
769  tprintf("Median offset = %g, compared to mean of %g.\n",
770  *c_out, llsq.c(*m_out));
771  }
772  // Index_delta is the number of hypothesized line gaps present.
773  if (index_delta != NULL)
774  *index_delta = max_index - min_index;
775  // Use the regression model's intercept to compute the error, as it may be
776  // a full line-spacing in disagreement with the median.
777  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
778  if (debug_level_ > 1) {
779  tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
780  m_in, median_offset, *m_out, *c_out, rms_error);
781  }
782  return rms_error;
783 }
784 
785 
786 BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
787  TO_BLOCK_LIST* blocks)
788  : page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL),
789  debug_file_prefix_("") {
790  TO_BLOCK_IT it(blocks);
791  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
792  TO_BLOCK* to_block = it.data();
793  BLOCK* block = to_block->block;
794  POLY_BLOCK* pb = block->poly_block();
795  // A note about non-text blocks.
796  // On output, non-text blocks are supposed to contain a single empty word
797  // in each incoming text line. These mark out the polygonal bounds of the
798  // block. Ideally no baselines should be required, but currently
799  // make_words crashes if a baseline and xheight are not provided, so we
800  // include non-text blocks here, but flag them for special treatment.
801  bool non_text = pb != NULL && !pb->IsText();
802  blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
803  }
804 }
805 
807  pixDestroy(&pix_debug_);
808 }
809 
810 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
811 // block-wise and page-wise data to smooth small blocks/rows, and applies
812 // smoothing based on block/page-level skew and block-level linespacing.
813 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
814  GenericVector<double> block_skew_angles;
815  for (int i = 0; i < blocks_.size(); ++i) {
816  BaselineBlock* bl_block = blocks_[i];
817  if (debug_level_ > 0)
818  tprintf("Fitting initial baselines...\n");
819  if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
820  block_skew_angles.push_back(bl_block->skew_angle());
821  }
822  }
823  // Compute a page-wide default skew for blocks with too little information.
824  double default_block_skew = page_skew_.angle();
825  if (!block_skew_angles.empty()) {
826  default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
827  }
828  if (debug_level_ > 0) {
829  tprintf("Page skew angle = %g\n", default_block_skew);
830  }
831  // Set bad lines in each block to the default block skew and then force fit
832  // a linespacing model where it makes sense to do so.
833  for (int i = 0; i < blocks_.size(); ++i) {
834  BaselineBlock* bl_block = blocks_[i];
835  bl_block->ParallelizeBaselines(default_block_skew);
836  bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
837  }
838 }
839 
840 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
841 // other associated side-effects, including pre-associating blobs, computing
842 // x-heights and displaying debug information.
843 // NOTE that ComputeStraightBaselines must have been called first as this
844 // sets up data in the TO_ROWs upon which this function depends.
846  bool enable_splines,
847  bool remove_noise,
848  bool show_final_rows,
849  Textord* textord) {
850  Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
851  for (int i = 0; i < blocks_.size(); ++i) {
852  BaselineBlock* bl_block = blocks_[i];
853  bl_block->PrepareForSplineFitting(page_tr, remove_noise);
854  bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
855  if (pix_spline) {
856  bl_block->DrawPixSpline(pix_spline);
857  }
858  if (show_final_rows) {
859  bl_block->DrawFinalRows(page_tr);
860  }
861  }
862 
863  if (pix_spline) {
864  STRING outfile_name = debug_file_prefix_ + "_spline.png";
865  pixWrite(outfile_name.string(), pix_spline, IFF_PNG);
866  pixDestroy(&pix_spline);
867  }
868 }
869 
870 void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) {
871  pixDestroy(&pix_debug_);
872  pix_debug_ = pixClone(pixIn);
873  debug_file_prefix_ = output_path;
874 }
875 
876 } // namespace tesseract.
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:111
int size() const
Definition: genericvector.h:72
void truncate(int size)
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
#define MAX(x, y)
Definition: ndminx.h:24
void DrawPixSpline(Pix *pix_in)
float x() const
Definition: points.h:209
int push_back(T object)
double BaselineAngle() const
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:607
void compute_block_xheight(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:1285
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
#define tprintf(...)
Definition: tprintf.h:31
#define MIN(x, y)
Definition: ndminx.h:28
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1803
Definition: statistc.h:33
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:38
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2605
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
void print() const
Definition: rect.h:270
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
const double kMaxSkewDeviation
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
float line_c() const
Definition: blobbox.h:569
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:163
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void set_xheight(inT32 height)
set char size
Definition: ocrblock.h:72
void ParallelizeBaselines(double default_block_skew)
void DrawFinalRows(const ICOORD &page_tr)
void draw_meanlines(TO_BLOCK *block, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:210
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
double rms(double m, double c) const
Definition: linlsq.cpp:131
QSPLINE baseline
Definition: blobbox.h:666
bool IsText() const
Definition: polyblk.h:52
double skew_angle() const
inT16 right() const
Definition: rect.h:75
Definition: linlsq.h:26
void SetDebugImage(Pix *pixIn, const STRING &output_path)
bool FitBaseline(bool use_box_bottoms)
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1080
const int kMaxDisplacementsModes
float xheight
Definition: blobbox.h:784
void SetupOldLineParameters(TO_ROW *row) const
double m() const
Definition: linlsq.cpp:101
float angle() const
find angle
Definition: points.h:249
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
float line_m() const
Definition: blobbox.h:566
void ComputeStraightBaselines(bool use_box_bottoms)
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:1876
void EstimateBaselinePosition()
Definition: blobbox.cpp:350
FCOORD classify_rotation() const
Definition: ocrblock.h:144
const double kOffsetQuantizationFactor
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
inT16 left() const
Definition: rect.h:68
double c(double m) const
Definition: linlsq.cpp:117
Definition: ocrblock.h:30
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
int baseline_position() const
Definition: blobbox.h:374
int choose_nth_item(int target_index)
float length() const
find length
Definition: points.h:230
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
const double kMaxBaselineError
void add(double x, double y)
Definition: linlsq.cpp:49
const int kNumSkipPoints
#define MAX_INT32
Definition: host.h:120
integer coordinate
Definition: points.h:30
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1397
inT16 bottom() const
Definition: rect.h:61
EXTERN bool textord_restore_underlines
Definition: underlin.cpp:30
bool empty() const
Definition: genericvector.h:84
float baseline_offset
Definition: blobbox.h:783
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
inT16 width() const
Definition: rect.h:111
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
const double kMaxBaselineError
FCOORD mean_point() const
Definition: linlsq.cpp:167
const double kMaxBlobSizeMultiple
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:473
int IntCastRounded(double x)
Definition: helpers.h:172
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
float line_spacing
Definition: blobbox.h:775
double PerpDisp(const FCOORD &direction) const
float y() const
Definition: points.h:212
double StraightYAtX(double x) const
#define MAX_FLOAT32
Definition: host.h:124
Definition: strngs.h:44
#define NULL
Definition: host.h:144
double SpaceBetween(const BaselineRow &other) const
const TBOX & bounding_box() const
Definition: blobbox.h:215
const char * string() const
Definition: strngs.cpp:193
inT16 top() const
Definition: rect.h:54
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:131
const double kMinFittingLinespacings
void make_spline_rows(TO_BLOCK *block, float gradient, BOOL8 testing_on)
Definition: makerow.cpp:2034
Definition: points.h:189
float max_blob_size
Definition: blobbox.h:782
const double kFitHalfrangeFactor
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
BLOCK * block
Definition: blobbox.h:773
void SetupBlockParameters() const
float line_size
Definition: blobbox.h:781
BaselineRow(double line_size, TO_ROW *to_row)
int inT32
Definition: host.h:102
BLOBNBOX_LIST blobs
Definition: blobbox.h:768