tesseract v5.3.3.20231005
textlineprojection.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifdef HAVE_CONFIG_H
15# include "config_auto.h"
16#endif
17
18#include <allheaders.h>
19#include "bbgrid.h" // Base class.
20#include "blobbox.h" // BlobNeighourDir.
21#include "blobs.h"
22#include "colpartition.h"
23#include "helpers.h" // for IntCastRounded
24#include "normalis.h"
25#include "textlineprojection.h"
26
27#include <algorithm>
28
29// Padding factor to use on definitely oriented blobs
30const int kOrientedPadFactor = 8;
31// Padding factor to use on not definitely oriented blobs.
32const int kDefaultPadFactor = 2;
33// Penalty factor for going away from the line center.
34const int kWrongWayPenalty = 4;
35// Ratio between parallel gap and perpendicular gap used to measure total
36// distance of a box from a target box in curved textline space.
37// parallel-gap is treated more favorably by this factor to allow catching
38// quotes and ellipsis at the end of textlines.
39const int kParaPerpDistRatio = 4;
40// Multiple of scale_factor_ that the inter-line gap must be before we start
41// padding the increment box perpendicular to the text line.
43// Maximum tab-stop overrun for horizontal padding, in projection pixels.
44const int kMaxTabStopOverrun = 6;
45
46namespace tesseract {
47
48TextlineProjection::TextlineProjection(int resolution) : x_origin_(0), y_origin_(0), pix_(nullptr) {
49 // The projection map should be about 100 ppi, whatever the input.
50 scale_factor_ = IntCastRounded(resolution / 100.0);
51 if (scale_factor_ < 1) {
52 scale_factor_ = 1;
53 }
54}
56 pix_.destroy();
57}
58
59// Build the projection profile given the input_block containing lists of
60// blobs, a rotation to convert to image coords,
61// and a full-resolution nontext_map, marking out areas to avoid.
62// During construction, we have the following assumptions:
63// The rotation is a multiple of 90 degrees, ie no deskew yet.
64// The blobs have had their left and right rules set to also limit
65// the range of projection.
66void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation,
67 Image nontext_map) {
68 pix_.destroy();
69 TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
70 x_origin_ = 0;
71 y_origin_ = image_box.height();
72 int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
73 int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
74
75 pix_ = pixCreate(width, height, 8);
76 ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
77 ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
78 Image final_pix = pixBlockconv(pix_, 1, 1);
79 // Pix* final_pix = pixBlockconv(pix_, 2, 2);
80 pix_.destroy();
81 pix_ = final_pix;
82}
83
84#ifndef GRAPHICS_DISABLED
85
86// Display the blobs in the window colored according to textline quality.
87void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win) {
88 BLOBNBOX_IT it(blobs);
89 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
90 BLOBNBOX *blob = it.data();
91 const TBOX &box = blob->bounding_box();
92 bool bad_box = BoxOutOfHTextline(box, nullptr, false);
93 if (blob->UniquelyVertical()) {
95 } else {
96 win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
97 }
98 win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
99 }
100 win->Update();
101}
102
103#endif // !GRAPHICS_DISABLED
104
105// Moves blobs that look like they don't sit well on a textline from the
106// input blobs list to the output small_blobs list.
107// This gets them away from initial textline finding to stop diacritics
108// from forming incorrect textlines. (Introduced mainly to fix Thai.)
110 BLOBNBOX_LIST *small_blobs) const {
111 BLOBNBOX_IT it(blobs);
112 BLOBNBOX_IT small_it(small_blobs);
113 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
114 BLOBNBOX *blob = it.data();
115 const TBOX &box = blob->bounding_box();
116 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
117 if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
118 blob->ClearNeighbours();
119 small_it.add_to_end(it.extract());
120 }
121 }
122}
123
124#ifndef GRAPHICS_DISABLED
125
126// Create a window and display the projection in it.
128 int width = pixGetWidth(pix_);
129 int height = pixGetHeight(pix_);
130 Image pixc = pixCreate(width, height, 32);
131 int src_wpl = pixGetWpl(pix_);
132 int col_wpl = pixGetWpl(pixc);
133 uint32_t *src_data = pixGetData(pix_);
134 uint32_t *col_data = pixGetData(pixc);
135 for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
136 for (int x = 0; x < width; ++x) {
137 int pixel = GET_DATA_BYTE(src_data, x);
138 l_uint32 result;
139 if (pixel <= 17) {
140 composeRGBPixel(0, 0, pixel * 15, &result);
141 } else if (pixel <= 145) {
142 composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
143 } else {
144 composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
145 }
146 col_data[x] = result;
147 }
148 }
149 auto *win = new ScrollView("Projection", 0, 0, width, height, width, height);
150 win->Draw(pixc, 0, 0);
151 win->Update();
152 pixc.destroy();
153}
154
155#endif // !GRAPHICS_DISABLED
156
157// Compute the distance of the box from the partition using curved projection
158// space. As DistanceOfBoxFromBox, except that the direction is taken from
159// the ColPartition and the median bounds of the ColPartition are used as
160// the to_box.
162 const DENORM *denorm, bool debug) const {
163 // Compute a partition box that uses the median top/bottom of the blobs
164 // within and median left/right for vertical.
165 TBOX part_box = part.bounding_box();
166 if (part.IsHorizontalType()) {
167 part_box.set_top(part.median_top());
168 part_box.set_bottom(part.median_bottom());
169 } else {
170 part_box.set_left(part.median_left());
171 part_box.set_right(part.median_right());
172 }
173 // Now use DistanceOfBoxFromBox to make the actual calculation.
174 return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), denorm, debug);
175}
176
177// Compute the distance from the from_box to the to_box using curved
178// projection space. Separation that involves a decrease in projection
179// density (moving from the from_box to the to_box) is weighted more heavily
180// than constant density, and an increase is weighted less.
181// If horizontal_textline is true, then curved space is used vertically,
182// as for a diacritic on the edge of a textline.
183// The projection uses original image coords, so denorm is used to get
184// back to the image coords from box/part space.
185// How the calculation works: Think of a diacritic near a textline.
186// Distance is measured from the far side of the from_box to the near side of
187// the to_box. Shown is the horizontal textline case.
188// |------^-----|
189// | from | box |
190// |------|-----|
191// perpendicular |
192// <------v-------->|--------------------|
193// parallel | to box |
194// |--------------------|
195// Perpendicular distance uses "curved space" See VerticalDistance below.
196// Parallel distance is linear.
197// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
198int TextlineProjection::DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box,
199 bool horizontal_textline, const DENORM *denorm,
200 bool debug) const {
201 // The parallel_gap is the horizontal gap between a horizontal textline and
202 // the box. Analogous for vertical.
203 int parallel_gap = 0;
204 // start_pt is the box end of the line to be modified for curved space.
205 TPOINT start_pt;
206 // end_pt is the partition end of the line to be modified for curved space.
207 TPOINT end_pt;
208 if (horizontal_textline) {
209 parallel_gap = from_box.x_gap(to_box) + from_box.width();
210 start_pt.x = (from_box.left() + from_box.right()) / 2;
211 end_pt.x = start_pt.x;
212 if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
213 start_pt.y = from_box.top();
214 end_pt.y = std::min(to_box.top(), start_pt.y);
215 } else {
216 start_pt.y = from_box.bottom();
217 end_pt.y = std::max(to_box.bottom(), start_pt.y);
218 }
219 } else {
220 parallel_gap = from_box.y_gap(to_box) + from_box.height();
221 if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
222 start_pt.x = from_box.right();
223 end_pt.x = std::min(to_box.right(), start_pt.x);
224 } else {
225 start_pt.x = from_box.left();
226 end_pt.x = std::max(to_box.left(), start_pt.x);
227 }
228 start_pt.y = (from_box.bottom() + from_box.top()) / 2;
229 end_pt.y = start_pt.y;
230 }
231 // The perpendicular gap is the max vertical distance gap out of:
232 // top of from_box to to_box top and bottom of from_box to to_box bottom.
233 // This value is then modified for curved projection space.
234 // Analogous for vertical.
235 int perpendicular_gap = 0;
236 // If start_pt == end_pt, then the from_box lies entirely within the to_box
237 // (in the perpendicular direction), so we don't need to calculate the
238 // perpendicular_gap.
239 if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
240 if (denorm != nullptr) {
241 // Denormalize the start and end.
242 denorm->DenormTransform(nullptr, start_pt, &start_pt);
243 denorm->DenormTransform(nullptr, end_pt, &end_pt);
244 }
245 if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
246 perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, end_pt.y);
247 } else {
248 perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, start_pt.y);
249 }
250 }
251 // The parallel_gap weighs less than the perpendicular_gap.
252 return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
253}
254
255// Compute the distance between (x, y1) and (x, y2) using the rule that
256// a decrease in textline density is weighted more heavily than an increase.
257// The coordinates are in source image space, ie processed by any denorm
258// already, but not yet scaled by scale_factor_.
259// Going from the outside of a textline to the inside should measure much
260// less distance than going from the inside of a textline to the outside.
261// How it works:
262// An increase is cheap (getting closer to a textline).
263// Constant costs unity.
264// A decrease is expensive (getting further from a textline).
265// Pixels in projection map Counted distance
266// 2
267// 3 1/x
268// 3 1
269// 2 x
270// 5 1/x
271// 7 1/x
272// Total: 1 + x + 3/x where x = kWrongWayPenalty.
273int TextlineProjection::VerticalDistance(bool debug, int x, int y1, int y2) const {
274 x = ImageXToProjectionX(x);
275 y1 = ImageYToProjectionY(y1);
276 y2 = ImageYToProjectionY(y2);
277 if (y1 == y2) {
278 return 0;
279 }
280 int wpl = pixGetWpl(pix_);
281 int step = y1 < y2 ? 1 : -1;
282 uint32_t *data = pixGetData(pix_) + y1 * wpl;
283 wpl *= step;
284 int prev_pixel = GET_DATA_BYTE(data, x);
285 int distance = 0;
286 int right_way_steps = 0;
287 for (int y = y1; y != y2; y += step) {
288 data += wpl;
289 int pixel = GET_DATA_BYTE(data, x);
290 if (debug) {
291 tprintf("At (%d,%d), pix = %d, prev=%d\n", x, y + step, pixel, prev_pixel);
292 }
293 if (pixel < prev_pixel) {
295 } else if (pixel > prev_pixel) {
296 ++right_way_steps;
297 } else {
298 ++distance;
299 }
300 prev_pixel = pixel;
301 }
302 return distance * scale_factor_ + right_way_steps * scale_factor_ / kWrongWayPenalty;
303}
304
305// Compute the distance between (x1, y) and (x2, y) using the rule that
306// a decrease in textline density is weighted more heavily than an increase.
307int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, int y) const {
308 x1 = ImageXToProjectionX(x1);
309 x2 = ImageXToProjectionX(x2);
310 y = ImageYToProjectionY(y);
311 if (x1 == x2) {
312 return 0;
313 }
314 int wpl = pixGetWpl(pix_);
315 int step = x1 < x2 ? 1 : -1;
316 uint32_t *data = pixGetData(pix_) + y * wpl;
317 int prev_pixel = GET_DATA_BYTE(data, x1);
318 int distance = 0;
319 int right_way_steps = 0;
320 for (int x = x1; x != x2; x += step) {
321 int pixel = GET_DATA_BYTE(data, x + step);
322 if (debug) {
323 tprintf("At (%d,%d), pix = %d, prev=%d\n", x + step, y, pixel, prev_pixel);
324 }
325 if (pixel < prev_pixel) {
327 } else if (pixel > prev_pixel) {
328 ++right_way_steps;
329 } else {
330 ++distance;
331 }
332 prev_pixel = pixel;
333 }
334 return distance * scale_factor_ + right_way_steps * scale_factor_ / kWrongWayPenalty;
335}
336
337// Returns true if the blob appears to be outside of a textline.
338// Such blobs are potentially diacritics (even if large in Thai) and should
339// be kept away from initial textline finding.
340bool TextlineProjection::BoxOutOfHTextline(const TBOX &box, const DENORM *denorm,
341 bool debug) const {
342 int grad1 = 0;
343 int grad2 = 0;
344 EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
345 int worst_result = std::min(grad1, grad2);
346 int total_result = grad1 + grad2;
347 if (total_result >= 6) {
348 return false; // Strongly in textline.
349 }
350 // Medium strength: if either gradient is negative, it is likely outside
351 // the body of the textline.
352 if (worst_result < 0) {
353 return true;
354 }
355 return false;
356}
357
358// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
359// but uses the median top/bottom for horizontal and median left/right for
360// vertical instead of the bounding box edges.
361// Evaluates for both horizontal and vertical and returns the best result,
362// with a positive value for horizontal and a negative value for vertical.
364 bool debug) const {
365 if (part.IsSingleton()) {
366 return EvaluateBox(part.bounding_box(), denorm, debug);
367 }
368 // Test vertical orientation.
369 TBOX box = part.bounding_box();
370 // Use the partition median for left/right.
371 box.set_left(part.median_left());
372 box.set_right(part.median_right());
373 int vresult = EvaluateBox(box, denorm, debug);
374
375 // Test horizontal orientation.
376 box = part.bounding_box();
377 // Use the partition median for top/bottom.
378 box.set_top(part.median_top());
379 box.set_bottom(part.median_bottom());
380 int hresult = EvaluateBox(box, denorm, debug);
381 if (debug) {
382 tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
383 part.bounding_box().print();
384 part.Print();
385 }
386 return hresult >= -vresult ? hresult : vresult;
387}
388
389// Computes the mean projection gradients over the horizontal and vertical
390// edges of the box:
391// -h-h-h-h-h-h
392// |------------| mean=htop -v|+v--------+v|-v
393// |+h+h+h+h+h+h| -v|+v +v|-v
394// | | -v|+v +v|-v
395// | box | -v|+v box +v|-v
396// | | -v|+v +v|-v
397// |+h+h+h+h+h+h| -v|+v +v|-v
398// |------------| mean=hbot -v|+v--------+v|-v
399// -h-h-h-h-h-h
400// mean=vleft mean=vright
401//
402// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
403// for a horizontal textline, a negative number for a vertical textline,
404// and near zero for undecided. Undecided is most likely non-text.
405// All the gradients are truncated to remain non-negative, since negative
406// horizontal gradients don't give any indication of being vertical and
407// vice versa.
408// Additional complexity: The coordinates have to be transformed to original
409// image coordinates with denorm (if not null), scaled to match the projection
410// pix, and THEN step out 2 pixels each way from the edge to compute the
411// gradient, and tries 3 positions, each measuring the gradient over a
412// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
413// several layers of helpers below.
414int TextlineProjection::EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const {
415 return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
416}
417
418// Internal version of EvaluateBox returns the unclipped gradients as well
419// as the result of EvaluateBox.
420// hgrad1 and hgrad2 are the gradients for the horizontal textline.
421int TextlineProjection::EvaluateBoxInternal(const TBOX &box, const DENORM *denorm, bool debug,
422 int *hgrad1, int *hgrad2, int *vgrad1,
423 int *vgrad2) const {
424 int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), box.top(), true);
425 int bottom_gradient =
426 -BestMeanGradientInRow(denorm, box.left(), box.right(), box.bottom(), false);
427 int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), box.top(), true);
428 int right_gradient =
429 -BestMeanGradientInColumn(denorm, box.right(), box.bottom(), box.top(), false);
430 int top_clipped = std::max(top_gradient, 0);
431 int bottom_clipped = std::max(bottom_gradient, 0);
432 int left_clipped = std::max(left_gradient, 0);
433 int right_clipped = std::max(right_gradient, 0);
434 if (debug) {
435 tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", top_gradient,
436 bottom_gradient, left_gradient, right_gradient);
437 box.print();
438 }
439 int result = std::max(top_clipped, bottom_clipped) - std::max(left_clipped, right_clipped);
440 if (hgrad1 != nullptr && hgrad2 != nullptr) {
441 *hgrad1 = top_gradient;
442 *hgrad2 = bottom_gradient;
443 }
444 if (vgrad1 != nullptr && vgrad2 != nullptr) {
445 *vgrad1 = left_gradient;
446 *vgrad2 = right_gradient;
447 }
448 return result;
449}
450
451// Helper returns the mean gradient value for the horizontal row at the given
452// y, (in the external coordinates) by subtracting the mean of the transformed
453// row 2 pixels above from the mean of the transformed row 2 pixels below.
454// This gives a positive value for a good top edge and negative for bottom.
455// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
456int TextlineProjection::BestMeanGradientInRow(const DENORM *denorm, int16_t min_x, int16_t max_x,
457 int16_t y, bool best_is_max) const {
458 TPOINT start_pt(min_x, y);
459 TPOINT end_pt(max_x, y);
460 int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
461 int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
462 int best_gradient = lower - upper;
463 upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
464 lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
465 int gradient = lower - upper;
466 if ((gradient > best_gradient) == best_is_max) {
467 best_gradient = gradient;
468 }
469 upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
470 lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
471 gradient = lower - upper;
472 if ((gradient > best_gradient) == best_is_max) {
473 best_gradient = gradient;
474 }
475 return best_gradient;
476}
477
478// Helper returns the mean gradient value for the vertical column at the
479// given x, (in the external coordinates) by subtracting the mean of the
480// transformed column 2 pixels left from the mean of the transformed column
481// 2 pixels to the right.
482// This gives a positive value for a good left edge and negative for right.
483// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
484int TextlineProjection::BestMeanGradientInColumn(const DENORM *denorm, int16_t x, int16_t min_y,
485 int16_t max_y, bool best_is_max) const {
486 TPOINT start_pt(x, min_y);
487 TPOINT end_pt(x, max_y);
488 int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
489 int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
490 int best_gradient = right - left;
491 left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
492 right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
493 int gradient = right - left;
494 if ((gradient > best_gradient) == best_is_max) {
495 best_gradient = gradient;
496 }
497 left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
498 right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
499 gradient = right - left;
500 if ((gradient > best_gradient) == best_is_max) {
501 best_gradient = gradient;
502 }
503 return best_gradient;
504}
505
506// Helper returns the mean pixel value over the line between the start_pt and
507// end_pt (inclusive), but shifted perpendicular to the line in the projection
508// image by offset pixels. For simplicity, it is assumed that the vector is
509// either nearly horizontal or nearly vertical. It works on skewed textlines!
510// The end points are in external coordinates, and will be denormalized with
511// the denorm if not nullptr before further conversion to pix coordinates.
512// After all the conversions, the offset is added to the direction
513// perpendicular to the line direction. The offset is thus in projection image
514// coordinates, which allows the caller to get a guaranteed displacement
515// between pixels used to calculate gradients.
516int TextlineProjection::MeanPixelsInLineSegment(const DENORM *denorm, int offset, TPOINT start_pt,
517 TPOINT end_pt) const {
518 TransformToPixCoords(denorm, &start_pt);
519 TransformToPixCoords(denorm, &end_pt);
520 TruncateToImageBounds(&start_pt);
521 TruncateToImageBounds(&end_pt);
522 int wpl = pixGetWpl(pix_);
523 uint32_t *data = pixGetData(pix_);
524 int total = 0;
525 int count = 0;
526 int x_delta = end_pt.x - start_pt.x;
527 int y_delta = end_pt.y - start_pt.y;
528 if (abs(x_delta) >= abs(y_delta)) {
529 if (x_delta == 0) {
530 return 0;
531 }
532 // Horizontal line. Add the offset vertically.
533 int x_step = x_delta > 0 ? 1 : -1;
534 // Correct offset for rotation, keeping it anti-clockwise of the delta.
535 offset *= x_step;
536 start_pt.y += offset;
537 end_pt.y += offset;
538 TruncateToImageBounds(&start_pt);
539 TruncateToImageBounds(&end_pt);
540 x_delta = end_pt.x - start_pt.x;
541 y_delta = end_pt.y - start_pt.y;
542 count = x_delta * x_step + 1;
543 for (int x = start_pt.x; x != end_pt.x; x += x_step) {
544 int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
545 total += GET_DATA_BYTE(data + wpl * y, x);
546 }
547 } else {
548 // Vertical line. Add the offset horizontally.
549 int y_step = y_delta > 0 ? 1 : -1;
550 // Correct offset for rotation, keeping it anti-clockwise of the delta.
551 // Pix holds the image with y=0 at the top, so the offset is negated.
552 offset *= -y_step;
553 start_pt.x += offset;
554 end_pt.x += offset;
555 TruncateToImageBounds(&start_pt);
556 TruncateToImageBounds(&end_pt);
557 x_delta = end_pt.x - start_pt.x;
558 y_delta = end_pt.y - start_pt.y;
559 count = y_delta * y_step + 1;
560 for (int y = start_pt.y; y != end_pt.y; y += y_step) {
561 int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
562 total += GET_DATA_BYTE(data + wpl * y, x);
563 }
564 }
565 return DivRounded(total, count);
566}
567
568// Given an input pix, and a box, the sides of the box are shrunk inwards until
569// they bound any black pixels found within the original box.
570// The function converts between tesseract coords and the pix coords assuming
571// that this pix is full resolution equal in size to the original image.
572// Returns an empty box if there are no black pixels in the source box.
573static TBOX BoundsWithinBox(Image pix, const TBOX &box) {
574 int im_height = pixGetHeight(pix);
575 Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height());
576 Box *output_box = nullptr;
577 pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
578 TBOX result_box;
579 if (output_box != nullptr) {
580 l_int32 x, y, width, height;
581 boxGetGeometry(output_box, &x, &y, &width, &height);
582 result_box.set_left(x);
583 result_box.set_right(x + width);
584 result_box.set_top(im_height - y);
585 result_box.set_bottom(result_box.top() - height);
586 boxDestroy(&output_box);
587 }
588 boxDestroy(&input_box);
589 return result_box;
590}
591
592// Splits the given box in half at x_middle or y_middle according to split_on_x
593// and checks for nontext_map pixels in each half. Reduces the bbox so that it
594// still includes the middle point, but does not touch any fg pixels in
595// nontext_map. An empty box may be returned if there is no such box.
596static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Image nontext_map,
597 TBOX *bbox) {
598 TBOX box1(*bbox);
599 TBOX box2(*bbox);
600 TBOX im_box;
601 if (split_on_x) {
602 box1.set_right(x_middle);
603 im_box = BoundsWithinBox(nontext_map, box1);
604 if (!im_box.null_box()) {
605 box1.set_left(im_box.right());
606 }
607 box2.set_left(x_middle);
608 im_box = BoundsWithinBox(nontext_map, box2);
609 if (!im_box.null_box()) {
610 box2.set_right(im_box.left());
611 }
612 } else {
613 box1.set_bottom(y_middle);
614 im_box = BoundsWithinBox(nontext_map, box1);
615 if (!im_box.null_box()) {
616 box1.set_top(im_box.bottom());
617 }
618 box2.set_top(y_middle);
619 im_box = BoundsWithinBox(nontext_map, box2);
620 if (!im_box.null_box()) {
621 box2.set_bottom(im_box.top());
622 }
623 }
624 box1 += box2;
625 *bbox = box1;
626}
627
628// Helper function to add 1 to a rectangle in source image coords to the
629// internal projection pix_.
630void TextlineProjection::IncrementRectangle8Bit(const TBOX &box) {
631 int scaled_left = ImageXToProjectionX(box.left());
632 int scaled_top = ImageYToProjectionY(box.top());
633 int scaled_right = ImageXToProjectionX(box.right());
634 int scaled_bottom = ImageYToProjectionY(box.bottom());
635 int wpl = pixGetWpl(pix_);
636 uint32_t *data = pixGetData(pix_) + scaled_top * wpl;
637 for (int y = scaled_top; y <= scaled_bottom; ++y) {
638 for (int x = scaled_left; x <= scaled_right; ++x) {
639 int pixel = GET_DATA_BYTE(data, x);
640 if (pixel < 255) {
641 SET_DATA_BYTE(data, x, pixel + 1);
642 }
643 }
644 data += wpl;
645 }
646}
647
648// Inserts a list of blobs into the projection.
649// Rotation is a multiple of 90 degrees to get from blob coords to
650// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
651// Blobs are spread horizontally or vertically according to their internal
652// flags, but the spreading is truncated by set pixels in the nontext_map
653// and also by the horizontal rule line limits on the blobs.
654void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation,
655 const TBOX &nontext_map_box, Image nontext_map) {
656 BLOBNBOX_IT blob_it(blobs);
657 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
658 BLOBNBOX *blob = blob_it.data();
659 TBOX bbox = blob->bounding_box();
660 ICOORD middle((bbox.left() + bbox.right()) / 2, (bbox.bottom() + bbox.top()) / 2);
661 bool spreading_horizontally = PadBlobBox(blob, &bbox);
662 // Rotate to match the nontext_map.
663 bbox.rotate(rotation);
664 middle.rotate(rotation);
665 if (rotation.x() == 0.0f) {
666 spreading_horizontally = !spreading_horizontally;
667 }
668 // Clip to the image before applying the increments.
669 bbox &= nontext_map_box; // This is in-place box intersection.
670 // Check for image pixels before spreading.
671 TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, nontext_map, &bbox);
672 if (bbox.area() > 0) {
673 IncrementRectangle8Bit(bbox);
674 }
675 }
676}
677
678// Pads the bounding box of the given blob according to whether it is on
679// a horizontal or vertical text line, taking into account tab-stops near
680// the blob. Returns true if padding was in the horizontal direction.
681bool TextlineProjection::PadBlobBox(BLOBNBOX *blob, TBOX *bbox) {
682 // Determine which direction to spread.
683 // If text is well spaced out, it can be useful to pad perpendicular to
684 // the textline direction, so as to ensure diacritics get absorbed
685 // correctly, but if the text is tightly spaced, this will destroy the
686 // blank space between textlines in the projection map, and that would
687 // be very bad.
688 int pad_limit = scale_factor_ * kMinLineSpacingFactor;
689 int xpad = 0;
690 int ypad = 0;
691 bool padding_horizontally = false;
692 if (blob->UniquelyHorizontal()) {
693 xpad = bbox->height() * kOrientedPadFactor;
694 padding_horizontally = true;
695 // If the text appears to be very well spaced, pad the other direction by a
696 // single pixel in the projection profile space to help join diacritics to
697 // the textline.
698 if ((blob->neighbour(BND_ABOVE) == nullptr ||
699 bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
700 (blob->neighbour(BND_BELOW) == nullptr ||
701 bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
702 ypad = scale_factor_;
703 }
704 } else if (blob->UniquelyVertical()) {
705 ypad = bbox->width() * kOrientedPadFactor;
706 if ((blob->neighbour(BND_LEFT) == nullptr ||
707 bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
708 (blob->neighbour(BND_RIGHT) == nullptr ||
709 bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
710 xpad = scale_factor_;
711 }
712 } else {
713 if ((blob->neighbour(BND_ABOVE) != nullptr &&
714 blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
715 (blob->neighbour(BND_BELOW) != nullptr &&
716 blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
717 ypad = bbox->width() * kDefaultPadFactor;
718 }
719 if ((blob->neighbour(BND_RIGHT) != nullptr &&
720 blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
721 (blob->neighbour(BND_LEFT) != nullptr &&
722 blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
723 xpad = bbox->height() * kDefaultPadFactor;
724 padding_horizontally = true;
725 }
726 }
727 bbox->pad(xpad, ypad);
728 pad_limit = scale_factor_ * kMaxTabStopOverrun;
729 // Now shrink horizontally to avoid stepping more than pad_limit over a
730 // tab-stop.
731 if (bbox->left() < blob->left_rule() - pad_limit) {
732 bbox->set_left(blob->left_rule() - pad_limit);
733 }
734 if (bbox->right() > blob->right_rule() + pad_limit) {
735 bbox->set_right(blob->right_rule() + pad_limit);
736 }
737 return padding_horizontally;
738}
739
740// Helper denormalizes the TPOINT with the denorm if not nullptr, then
741// converts to pix_ coordinates.
742void TextlineProjection::TransformToPixCoords(const DENORM *denorm, TPOINT *pt) const {
743 if (denorm != nullptr) {
744 // Denormalize the point.
745 denorm->DenormTransform(nullptr, *pt, pt);
746 }
747 pt->x = ImageXToProjectionX(pt->x);
748 pt->y = ImageYToProjectionY(pt->y);
749}
750
751#if defined(_MSC_VER) && !defined(__clang__)
752# pragma optimize("g", off)
753#endif // _MSC_VER
754// Helper truncates the TPOINT to be within the pix_.
755void TextlineProjection::TruncateToImageBounds(TPOINT *pt) const {
756 pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
757 pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
758}
759#if defined(_MSC_VER) && !defined(__clang__)
760# pragma optimize("", on)
761#endif // _MSC_VER
762
763// Transform tesseract image coordinates to coordinates used in the projection.
764int TextlineProjection::ImageXToProjectionX(int x) const {
765 x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
766 return x;
767}
768int TextlineProjection::ImageYToProjectionY(int y) const {
769 y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
770 return y;
771}
772
773} // namespace tesseract.
const int kWrongWayPenalty
const int kMaxTabStopOverrun
const int kParaPerpDistRatio
const int kMinLineSpacingFactor
const int kOrientedPadFactor
const int kDefaultPadFactor
@ TBOX
@ TPOINT
UnicodeText::const_iterator::difference_type distance(const UnicodeText::const_iterator &first, const UnicodeText::const_iterator &last)
Definition: unicodetext.cc:44
const double y
int * count
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int IntCastRounded(double x)
Definition: helpers.h:170
int DivRounded(int a, int b)
Definition: helpers.h:162
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:105
@ BND_LEFT
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:89
@ BND_BELOW
Definition: blobbox.h:89
@ BND_ABOVE
Definition: blobbox.h:89
const TBOX & bounding_box() const
Definition: blobbox.h:239
bool UniquelyVertical() const
Definition: blobbox.h:427
void ClearNeighbours()
Definition: blobbox.h:510
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:780
TDimension x
Definition: blobs.h:89
TDimension y
Definition: blobs.h:90
void destroy()
Definition: image.cpp:32
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:401
TDimension left() const
Definition: rect.h:82
int y_gap(const TBOX &box) const
Definition: rect.h:245
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
void set_right(int x)
Definition: rect.h:92
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
int x_gap(const TBOX &box) const
Definition: rect.h:238
void set_bottom(int y)
Definition: rect.h:78
void print() const
Definition: rect.h:289
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void set_top(int y)
Definition: rect.h:71
static bool WithinTestRegion(int detail_level, int x, int y)
const TBOX & bounding_box() const
Definition: colpartition.h:108
bool IsSingleton() const
Definition: colpartition.h:361
bool IsHorizontalType() const
Definition: colpartition.h:445
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map)
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
int HorizontalDistance(bool debug, int x1, int x2, int y) const
int VerticalDistance(bool debug, int x, int y1, int y2) const
void Pen(Color color)
Definition: scrollview.cpp:710
static void Update()
Definition: scrollview.cpp:700
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:576