tesseract v5.3.3.20231005
linefind.cpp
Go to the documentation of this file.
1
2// File: linefind.cpp
3// Description: Class to find vertical lines in an image and create
4// a corresponding list of empty blobs.
5// Author: Ray Smith
6//
7// (C) Copyright 2008, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h"
22#endif
23
24#include "alignedblob.h"
25#include "blobbox.h"
26#include "crakedge.h" // for CRACKEDGE
27#include "edgblob.h"
28#include "linefind.h"
29#include "tabvector.h"
30#if defined(USE_OPENCL)
31# include "openclwrapper.h" // for OpenclDevice
32#endif
33
34#include <algorithm>
35
36namespace tesseract {
37
39const int kThinLineFraction = 20;
43const int kCrackSpacing = 100;
45const int kLineFindGridSize = 50;
46// Min width of a line in pixels to be considered thick.
47const int kMinThickLineWidth = 12;
48// Max size of line residue. (The pixels that fail the long thin opening, and
49// therefore don't make it to the candidate line mask, but are nevertheless
50// part of the line.)
51const int kMaxLineResidue = 6;
52// Min length in inches of a line segment that exceeds kMinThickLineWidth in
53// thickness. (Such lines shouldn't break by simple image degradation.)
54const double kThickLengthMultiple = 0.75;
55// Max fraction of line box area that can be occupied by non-line pixels.
56const double kMaxNonLineDensity = 0.25;
57// Max height of a music stave in inches.
58const double kMaxStaveHeight = 1.0;
59// Minimum fraction of pixels in a music rectangle connected to the staves.
60const double kMinMusicPixelFraction = 0.75;
61
62// Erases the unused blobs from the line_pix image, taking into account
63// whether this was a horizontal or vertical line set.
64static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs,
65 Image line_pix) {
66 int height = pixGetHeight(line_pix);
67 BLOBNBOX_IT bbox_it(line_bblobs);
68 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
69 BLOBNBOX *blob = bbox_it.data();
70 if (blob->left_tab_type() != TT_VLINE) {
71 const TBOX &box = blob->bounding_box();
72 Box *pixbox = nullptr;
73 if (horizontal_lines) {
74 // Horizontal lines are in tess format and also have x and y flipped
75 // (to use FindVerticalAlignment) so we have to flip x and y and then
76 // convert to Leptonica by height - flipped x (ie the right edge).
77 // See GetLineBoxes for more explanation.
78 pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width());
79 } else {
80 // For vertical lines, just flip upside-down to convert to Leptonica.
81 // The y position of the box in Leptonica terms is the distance from
82 // the top of the image to the top of the box.
83 pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height());
84 }
85 pixClearInRect(line_pix, pixbox);
86 boxDestroy(&pixbox);
87 }
88 }
89}
90
91// Helper subtracts the line_pix image from the src_pix, and removes residue
92// as well by removing components that touch the line, but are not in the
93// non_line_pix mask. It is assumed that the non_line_pix mask has already
94// been prepared to required accuracy.
95static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix,
96 Image src_pix) {
97 // First remove the lines themselves.
98 pixSubtract(src_pix, src_pix, line_pix);
99 // Subtract the non-lines from the image to get the residue.
100 Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
101 // Dilate the lines so they touch the residue.
102 Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
103 // Seed fill the fat lines to get all the residue.
104 pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
105 // Subtract the residue from the original image.
106 pixSubtract(src_pix, src_pix, fat_line_pix);
107 fat_line_pix.destroy();
108 residue_pix.destroy();
109}
110
111// Returns the maximum strokewidth in the given binary image by doubling
112// the maximum of the distance function.
113static int MaxStrokeWidth(Image pix) {
114 Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
115 int width = pixGetWidth(dist_pix);
116 int height = pixGetHeight(dist_pix);
117 int wpl = pixGetWpl(dist_pix);
118 l_uint32 *data = pixGetData(dist_pix);
119 // Find the maximum value in the distance image.
120 int max_dist = 0;
121 for (int y = 0; y < height; ++y) {
122 for (int x = 0; x < width; ++x) {
123 int pixel = GET_DATA_BYTE(data, x);
124 if (pixel > max_dist) {
125 max_dist = pixel;
126 }
127 }
128 data += wpl;
129 }
130 dist_pix.destroy();
131 return max_dist * 2;
132}
133
134// Returns the number of components in the intersection_pix touched by line_box.
135static int NumTouchingIntersections(Box *line_box, Image intersection_pix) {
136 if (intersection_pix == nullptr) {
137 return 0;
138 }
139 Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
140 Boxa *boxa = pixConnComp(rect_pix, nullptr, 8);
141 rect_pix.destroy();
142 if (boxa == nullptr) {
143 return false;
144 }
145 int result = boxaGetCount(boxa);
146 boxaDestroy(&boxa);
147 return result;
148}
149
150// Returns the number of black pixels found in the box made by adding the line
151// width to both sides of the line bounding box. (Increasing the smallest
152// dimension of the bounding box.)
153static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) {
154 l_int32 x, y, box_width, box_height;
155 boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
156 if (box_width > box_height) {
157 // horizontal line.
158 int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width);
159 y = std::max(0, y - line_width);
160 box_height = bottom - y;
161 } else {
162 // Vertical line.
163 int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width);
164 x = std::max(0, x - line_width);
165 box_width = right - x;
166 }
167 Box *box = boxCreate(x, y, box_width, box_height);
168 Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
169 boxDestroy(&box);
170 l_int32 result;
171 pixCountPixels(rect_pix, &result, nullptr);
172 rect_pix.destroy();
173 return result;
174}
175
176// Helper erases false-positive line segments from the input/output line_pix.
177// 1. Since thick lines shouldn't really break up, we can eliminate some false
178// positives by marking segments that are at least kMinThickLineWidth
179// thickness, yet have a length less than min_thick_length.
180// 2. Lines that don't have at least 2 intersections with other lines and have
181// a lot of neighbouring non-lines are probably not lines (perhaps arabic
182// or Hindi words, or underlines.)
183// Bad line components are erased from line_pix.
184// Returns the number of remaining connected components.
185static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix,
186 Image line_pix) {
187 int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
188 Pixa *pixa = nullptr;
189 Boxa *boxa = pixConnComp(line_pix, &pixa, 8);
190 // Iterate over the boxes to remove false positives.
191 int nboxes = boxaGetCount(boxa);
192 int remaining_boxes = nboxes;
193 for (int i = 0; i < nboxes; ++i) {
194 Box *box = boxaGetBox(boxa, i, L_CLONE);
195 l_int32 x, y, box_width, box_height;
196 boxGetGeometry(box, &x, &y, &box_width, &box_height);
197 Image comp_pix = pixaGetPix(pixa, i, L_CLONE);
198 int max_width = MaxStrokeWidth(comp_pix);
199 comp_pix.destroy();
200 bool bad_line = false;
201 // If the length is too short to stand-alone as a line, and the box width
202 // is thick enough, and the stroke width is thick enough it is bad.
203 if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
204 box_width < min_thick_length && box_height < min_thick_length &&
205 max_width > kMinThickLineWidth) {
206 // Too thick for the length.
207 bad_line = true;
208 }
209 if (!bad_line && (NumTouchingIntersections(box, intersection_pix) < 2)) {
210 // Test non-line density near the line.
211 int nonline_count = CountPixelsAdjacentToLine(max_width, box, nonline_pix);
212 if (nonline_count > box_height * box_width * kMaxNonLineDensity) {
213 bad_line = true;
214 }
215 }
216 if (bad_line) {
217 // Not a good line.
218 pixClearInRect(line_pix, box);
219 --remaining_boxes;
220 }
221 boxDestroy(&box);
222 }
223 pixaDestroy(&pixa);
224 boxaDestroy(&boxa);
225 return remaining_boxes;
226}
227
228// Converts the Boxa array to a list of C_BLOB, getting rid of severely
229// overlapping outlines and those that are children of a bigger one.
230// The output is a list of C_BLOBs that are owned by the list.
231// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
232// bounding boxes. The Boxa is consumed and destroyed.
233static void ConvertBoxaToBlobs(int image_width, int image_height, Boxa **boxes,
234 C_BLOB_LIST *blobs) {
235 C_OUTLINE_LIST outlines;
236 C_OUTLINE_IT ol_it = &outlines;
237 // Iterate the boxes to convert to outlines.
238 int nboxes = boxaGetCount(*boxes);
239 for (int i = 0; i < nboxes; ++i) {
240 l_int32 x, y, width, height;
241 boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
242 // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
243 // as there is no outline, just a bounding box, but with some very
244 // small changes to coutln.cpp, it works nicely.
245 ICOORD top_left(x, y);
246 ICOORD bot_right(x + width, y + height);
247 CRACKEDGE startpt;
248 startpt.pos = top_left;
249 auto *outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
250 ol_it.add_after_then_move(outline);
251 }
252 // Use outlines_to_blobs to convert the outlines to blobs and find
253 // overlapping and contained objects. The output list of blobs in the block
254 // has all the bad ones filtered out and deleted.
255 BLOCK block;
256 ICOORD page_tl(0, 0);
257 ICOORD page_br(image_width, image_height);
258 outlines_to_blobs(&block, page_tl, page_br, &outlines);
259 // Transfer the created blobs to the output list.
260 C_BLOB_IT blob_it(blobs);
261 blob_it.add_list_after(block.blob_list());
262 // The boxes aren't needed any more.
263 boxaDestroy(boxes);
264}
265
266// Returns a list of boxes corresponding to the candidate line segments. Sets
267// the line_crossings member of the boxes so we can later determine the number
268// of intersections touched by a full line.
269static void GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
270 C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) {
271 // Put a single pixel crack in every line at an arbitrary spacing,
272 // so they break up and the bounding boxes can be used to get the
273 // direction accurately enough without needing outlines.
274 int wpl = pixGetWpl(pix_lines);
275 int width = pixGetWidth(pix_lines);
276 int height = pixGetHeight(pix_lines);
277 l_uint32 *data = pixGetData(pix_lines);
278 if (horizontal_lines) {
279 for (int y = 0; y < height; ++y, data += wpl) {
280 for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
281 CLEAR_DATA_BIT(data, x);
282 }
283 }
284 } else {
285 for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
286 memset(data + wpl * y, 0, wpl * sizeof(*data));
287 }
288 }
289 // Get the individual connected components
290 Boxa *boxa = pixConnComp(pix_lines, nullptr, 8);
291 ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
292 // Make the BLOBNBOXes from the C_BLOBs.
293 C_BLOB_IT blob_it(line_cblobs);
294 BLOBNBOX_IT bbox_it(line_bblobs);
295 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
296 C_BLOB *cblob = blob_it.data();
297 auto *bblob = new BLOBNBOX(cblob);
298 bbox_it.add_to_end(bblob);
299 // Determine whether the line segment touches two intersections.
300 const TBOX &bbox = bblob->bounding_box();
301 Box *box = boxCreate(bbox.left(), bbox.bottom(), bbox.width(), bbox.height());
302 bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
303 boxDestroy(&box);
304 // Transform the bounding box prior to finding lines. To save writing
305 // two line finders, flip x and y for horizontal lines and re-use the
306 // tab-stop detection code. For vertical lines we still have to flip the
307 // y-coordinates to switch from leptonica coords to tesseract coords.
308 if (horizontal_lines) {
309 // Note that we have Leptonica coords stored in a Tesseract box, so that
310 // bbox.bottom(), being the MIN y coord, is actually the top, so to get
311 // back to Leptonica coords in RemoveUnusedLineSegments, we have to
312 // use height - box.right() as the top, which looks very odd.
313 TBOX new_box(height - bbox.top(), bbox.left(), height - bbox.bottom(), bbox.right());
314 bblob->set_bounding_box(new_box);
315 } else {
316 TBOX new_box(bbox.left(), height - bbox.top(), bbox.right(), height - bbox.bottom());
317 bblob->set_bounding_box(new_box);
318 }
319 }
320}
321
322// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
323// are the bounds of the image on which the input line_bblobs were found.
324// The input line_bblobs list is const really.
325// The output vertical_x and vertical_y are the total of all the vectors.
326// The output list of TabVector makes no reference to the input BLOBNBOXes.
327static void FindLineVectors(const ICOORD &bleft, const ICOORD &tright,
328 BLOBNBOX_LIST *line_bblobs, int *vertical_x, int *vertical_y,
329 TabVector_LIST *vectors) {
330 BLOBNBOX_IT bbox_it(line_bblobs);
331 int b_count = 0;
332 // Put all the blobs into the grid to find the lines, and move the blobs
333 // to the output lists.
334 AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
335 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
336 BLOBNBOX *bblob = bbox_it.data();
337 bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
338 bblob->set_left_rule(bleft.x());
339 bblob->set_right_rule(tright.x());
340 bblob->set_left_crossing_rule(bleft.x());
341 bblob->set_right_crossing_rule(tright.x());
342 blob_grid.InsertBBox(false, true, bblob);
343 ++b_count;
344 }
345 if (b_count == 0) {
346 return;
347 }
348
349 // Search the entire grid, looking for vertical line vectors.
350 BlobGridSearch lsearch(&blob_grid);
351 BLOBNBOX *bbox;
352 TabVector_IT vector_it(vectors);
353 *vertical_x = 0;
354 *vertical_y = 1;
355 lsearch.StartFullSearch();
356 while ((bbox = lsearch.NextFullSearch()) != nullptr) {
357 if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
358 const TBOX &box = bbox->bounding_box();
359 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
360 tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom());
361 }
362 AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
363 TabVector *vector =
364 blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
365 if (vector != nullptr) {
366 vector->Freeze();
367 vector_it.add_to_end(vector);
368 }
369 }
370 }
371}
372
373// Returns a Pix music mask if music is detected.
374// Any vertical line that has at least 5 intersections in sufficient density
375// is taken to be a bar. Bars are used as a seed and the entire touching
376// component is added to the output music mask and subtracted from the lines.
377// Returns nullptr and does minimal work if no music is found.
378static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline,
379 bool &v_empty, bool &h_empty) {
380 int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
381 Image intersection_pix = pix_vline & pix_hline;
382 Boxa *boxa = pixConnComp(pix_vline, nullptr, 8);
383 // Iterate over the boxes to find music bars.
384 int nboxes = boxaGetCount(boxa);
385 Image music_mask = nullptr;
386 for (int i = 0; i < nboxes; ++i) {
387 Box *box = boxaGetBox(boxa, i, L_CLONE);
388 l_int32 x, y, box_width, box_height;
389 boxGetGeometry(box, &x, &y, &box_width, &box_height);
390 int joins = NumTouchingIntersections(box, intersection_pix);
391 // Test for the join density being at least 5 per max_stave_height,
392 // ie (joins-1)/box_height >= (5-1)/max_stave_height.
393 if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
394 // This is a music bar. Add to the mask.
395 if (music_mask == nullptr) {
396 music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 1);
397 }
398 pixSetInRect(music_mask, box);
399 }
400 boxDestroy(&box);
401 }
402 boxaDestroy(&boxa);
403 intersection_pix.destroy();
404 if (music_mask != nullptr) {
405 // The mask currently contains just the bars. Use the mask as a seed
406 // and the pix_closed as the mask for a seedfill to get all the
407 // intersecting staves.
408 pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
409 // Filter out false positives. CCs in the music_mask should be the vast
410 // majority of the pixels in their bounding boxes, as we expect just a
411 // tiny amount of text, a few phrase marks, and crescendo etc left.
412 Boxa *boxa = pixConnComp(music_mask, nullptr, 8);
413 // Iterate over the boxes to find music components.
414 int nboxes = boxaGetCount(boxa);
415 for (int i = 0; i < nboxes; ++i) {
416 Box *box = boxaGetBox(boxa, i, L_CLONE);
417 Image rect_pix = pixClipRectangle(music_mask, box, nullptr);
418 l_int32 music_pixels;
419 pixCountPixels(rect_pix, &music_pixels, nullptr);
420 rect_pix.destroy();
421 rect_pix = pixClipRectangle(pix_closed, box, nullptr);
422 l_int32 all_pixels;
423 pixCountPixels(rect_pix, &all_pixels, nullptr);
424 rect_pix.destroy();
425 if (music_pixels < kMinMusicPixelFraction * all_pixels) {
426 // False positive. Delete from the music mask.
427 pixClearInRect(music_mask, box);
428 }
429 boxDestroy(&box);
430 }
431 boxaDestroy(&boxa);
432 if (music_mask.isZero()) {
433 music_mask.destroy();
434 } else {
435 pixSubtract(pix_vline, pix_vline, music_mask);
436 pixSubtract(pix_hline, pix_hline, music_mask);
437 // We may have deleted all the lines
438 v_empty = pix_vline.isZero();
439 h_empty = pix_hline.isZero();
440 }
441 }
442 return music_mask;
443}
444
445// Most of the heavy lifting of line finding. Given src_pix and its separate
446// resolution, returns image masks:
447// pix_vline candidate vertical lines.
448// pix_non_vline pixels that didn't look like vertical lines.
449// pix_hline candidate horizontal lines.
450// pix_non_hline pixels that didn't look like horizontal lines.
451// pix_intersections pixels where vertical and horizontal lines meet.
452// pix_music_mask candidate music staves.
453// This function promises to initialize all the output (2nd level) pointers,
454// but any of the returns that are empty will be nullptr on output.
455// None of the input (1st level) pointers may be nullptr except
456// pix_music_mask, which will disable music detection, and pixa_display, which
457// is for debug.
458static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
459 Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
460 Image *pix_music_mask, Pixa *pixa_display) {
461 Image pix_closed = nullptr;
462 Image pix_hollow = nullptr;
463
464 int max_line_width = resolution / kThinLineFraction;
465 int min_line_length = resolution / kMinLineLengthFraction;
466 if (pixa_display != nullptr) {
467 tprintf("Image resolution = %d, max line width = %d, min length=%d\n", resolution,
468 max_line_width, min_line_length);
469 }
470 int closing_brick = max_line_width / 3;
471
472// only use opencl if compiled w/ OpenCL and selected device is opencl
473#ifdef USE_OPENCL
474 if (OpenclDevice::selectedDeviceIsOpenCL()) {
475 // OpenCL pixGetLines Operation
476 int clStatus =
477 OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix);
478 bool getpixclosed = pix_music_mask != nullptr;
479 OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed,
480 closing_brick, closing_brick, max_line_width, max_line_width,
481 min_line_length, min_line_length);
482 } else {
483#endif
484 // Close up small holes, making it less likely that false alarms are found
485 // in thickened text (as it will become more solid) and also smoothing over
486 // some line breaks and nicks in the edges of the lines.
487 pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
488 if (pixa_display != nullptr) {
489 pixaAddPix(pixa_display, pix_closed, L_CLONE);
490 }
491 // Open up with a big box to detect solid areas, which can then be
492 // subtracted. This is very generous and will leave in even quite wide
493 // lines.
494 Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
495 if (pixa_display != nullptr) {
496 pixaAddPix(pixa_display, pix_solid, L_CLONE);
497 }
498 pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
499
500 pix_solid.destroy();
501
502 // Now open up in both directions independently to find lines of at least
503 // 1 inch/kMinLineLengthFraction in length.
504 if (pixa_display != nullptr) {
505 pixaAddPix(pixa_display, pix_hollow, L_CLONE);
506 }
507 *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
508 *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
509
510 pix_hollow.destroy();
511#ifdef USE_OPENCL
512 }
513#endif
514
515 // Lines are sufficiently rare, that it is worth checking for a zero image.
516 bool v_empty = pix_vline->isZero();
517 bool h_empty = pix_hline->isZero();
518 if (pix_music_mask != nullptr) {
519 if (!v_empty && !h_empty) {
520 *pix_music_mask =
521 FilterMusic(resolution, pix_closed, *pix_vline, *pix_hline, v_empty, h_empty);
522 } else {
523 *pix_music_mask = nullptr;
524 }
525 }
526 pix_closed.destroy();
527 Image pix_nonlines = nullptr;
528 *pix_intersections = nullptr;
529 Image extra_non_hlines = nullptr;
530 if (!v_empty) {
531 // Subtract both line candidates from the source to get definite non-lines.
532 pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
533 if (!h_empty) {
534 pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
535 // Intersections are a useful indicator for likelihood of being a line.
536 *pix_intersections = *pix_vline & *pix_hline;
537 // Candidate vlines are not hlines (apart from the intersections)
538 // and vice versa.
539 extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections);
540 }
541 *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1);
542 pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
543 if (!h_empty) {
544 // Candidate hlines are not vlines.
545 *pix_non_vline |= *pix_hline;
546 pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
547 }
548 if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) {
549 pix_vline->destroy(); // No candidates left.
550 }
551 } else {
552 // No vertical lines.
553 pix_vline->destroy();
554 *pix_non_vline = nullptr;
555 if (!h_empty) {
556 pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
557 }
558 }
559 if (h_empty) {
560 pix_hline->destroy();
561 *pix_non_hline = nullptr;
562 if (v_empty) {
563 return;
564 }
565 } else {
566 *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue);
567 pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
568 if (extra_non_hlines != nullptr) {
569 *pix_non_hline |= extra_non_hlines;
570 extra_non_hlines.destroy();
571 }
572 if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) {
573 pix_hline->destroy(); // No candidates left.
574 }
575 }
576 if (pixa_display != nullptr) {
577 if (*pix_vline != nullptr) {
578 pixaAddPix(pixa_display, *pix_vline, L_CLONE);
579 }
580 if (*pix_hline != nullptr) {
581 pixaAddPix(pixa_display, *pix_hline, L_CLONE);
582 }
583 if (pix_nonlines != nullptr) {
584 pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
585 }
586 if (*pix_non_vline != nullptr) {
587 pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
588 }
589 if (*pix_non_hline != nullptr) {
590 pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
591 }
592 if (*pix_intersections != nullptr) {
593 pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
594 }
595 if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
596 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
597 }
598 }
599 pix_nonlines.destroy();
600}
601
602// Finds vertical line objects in pix_vline and removes them from src_pix.
603// Uses the given resolution to determine size thresholds instead of any
604// that may be present in the pix.
605// The output vertical_x and vertical_y contain a sum of the output vectors,
606// thereby giving the mean vertical direction.
607// The output vectors are owned by the list and Frozen (cannot refit) by
608// having no boxes, as there is no need to refit or merge separator lines.
609// If no good lines are found, pix_vline is destroyed.
610// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
611// the function does nothing.
612static void FindAndRemoveVLines(Image pix_intersections, int *vertical_x,
613 int *vertical_y, Image *pix_vline, Image pix_non_vline,
614 Image src_pix, TabVector_LIST *vectors) {
615 if (pix_vline == nullptr || *pix_vline == nullptr) {
616 return;
617 }
618 C_BLOB_LIST line_cblobs;
619 BLOBNBOX_LIST line_bblobs;
620 GetLineBoxes(false, *pix_vline, pix_intersections, &line_cblobs, &line_bblobs);
621 int width = pixGetWidth(src_pix);
622 int height = pixGetHeight(src_pix);
623 ICOORD bleft(0, 0);
624 ICOORD tright(width, height);
625 FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
626 if (!vectors->empty()) {
627 RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
628 SubtractLinesAndResidue(*pix_vline, pix_non_vline, src_pix);
629 ICOORD vertical;
630 vertical.set_with_shrink(*vertical_x, *vertical_y);
631 TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
632 } else {
633 pix_vline->destroy();
634 }
635}
636
637// Finds horizontal line objects in pix_hline and removes them from src_pix.
638// Uses the given resolution to determine size thresholds instead of any
639// that may be present in the pix.
640// The output vertical_x and vertical_y contain a sum of the output vectors,
641// thereby giving the mean vertical direction.
642// The output vectors are owned by the list and Frozen (cannot refit) by
643// having no boxes, as there is no need to refit or merge separator lines.
644// If no good lines are found, pix_hline is destroyed.
645// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
646// the function does nothing.
647static void FindAndRemoveHLines(Image pix_intersections, int vertical_x,
648 int vertical_y, Image *pix_hline, Image pix_non_hline,
649 Image src_pix, TabVector_LIST *vectors) {
650 if (pix_hline == nullptr || *pix_hline == nullptr) {
651 return;
652 }
653 C_BLOB_LIST line_cblobs;
654 BLOBNBOX_LIST line_bblobs;
655 GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
656 int width = pixGetWidth(src_pix);
657 int height = pixGetHeight(src_pix);
658 ICOORD bleft(0, 0);
659 ICOORD tright(height, width);
660 FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors);
661 if (!vectors->empty()) {
662 RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
663 SubtractLinesAndResidue(*pix_hline, pix_non_hline, src_pix);
664 ICOORD vertical;
665 vertical.set_with_shrink(vertical_x, vertical_y);
666 TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
667 // Iterate the vectors to flip them. x and y were flipped for horizontal
668 // lines, so FindLineVectors can work just with the vertical case.
669 // See GetLineBoxes for more on the flip.
670 TabVector_IT h_it(vectors);
671 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
672 h_it.data()->XYFlip();
673 }
674 } else {
675 pix_hline->destroy();
676 }
677}
678
679// Finds vertical and horizontal line objects in the given pix.
680// Uses the given resolution to determine size thresholds instead of any
681// that may be present in the pix.
682// The output vertical_x and vertical_y contain a sum of the output vectors,
683// thereby giving the mean vertical direction.
684// If pix_music_mask != nullptr, and music is detected, a mask of the staves
685// and anything that is connected (bars, notes etc.) will be returned in
686// pix_music_mask, the mask subtracted from pix, and the lines will not
687// appear in v_lines or h_lines.
688// The output vectors are owned by the list and Frozen (cannot refit) by
689// having no boxes, as there is no need to refit or merge separator lines.
690// The detected lines are removed from the pix.
691void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
692 int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
693 TabVector_LIST *h_lines) {
694 if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
695 tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
696 return;
697 }
698 Image pix_vline = nullptr;
699 Image pix_non_vline = nullptr;
700 Image pix_hline = nullptr;
701 Image pix_non_hline = nullptr;
702 Image pix_intersections = nullptr;
703 Pixa *pixa_display = debug ? pixaCreate(0) : nullptr;
704 GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline,
705 &pix_intersections, pix_music_mask, pixa_display);
706 // Find lines, convert to TabVector_LIST and remove those that are used.
707 FindAndRemoveVLines(pix_intersections, vertical_x, vertical_y, &pix_vline,
708 pix_non_vline, pix, v_lines);
709 pix_intersections.destroy();
710 if (pix_hline != nullptr) {
711 // Recompute intersections and re-filter false positive h-lines.
712 if (pix_vline != nullptr) {
713 pix_intersections = pix_vline & pix_hline;
714 }
715 if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) {
716 pix_hline.destroy();
717 }
718 }
719 FindAndRemoveHLines(pix_intersections, *vertical_x, *vertical_y, &pix_hline,
720 pix_non_hline, pix, h_lines);
721 if (pixa_display != nullptr && pix_vline != nullptr) {
722 pixaAddPix(pixa_display, pix_vline, L_CLONE);
723 }
724 if (pixa_display != nullptr && pix_hline != nullptr) {
725 pixaAddPix(pixa_display, pix_hline, L_CLONE);
726 }
727 pix_intersections.destroy();
728 if (pix_vline != nullptr && pix_hline != nullptr) {
729 // Remove joins (intersections) where lines cross, and the residue.
730 // Recalculate the intersections, since some lines have been deleted.
731 pix_intersections = pix_vline & pix_hline;
732 // Fatten up the intersections and seed-fill to get the intersection
733 // residue.
734 Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
735 pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
736 // Now remove the intersection residue.
737 pixSubtract(pix, pix, pix_join_residue);
738 pix_join_residue.destroy();
739 }
740 // Remove any detected music.
741 if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
742 if (pixa_display != nullptr) {
743 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
744 }
745 pixSubtract(pix, pix, *pix_music_mask);
746 }
747 if (pixa_display != nullptr) {
748 pixaAddPix(pixa_display, pix, L_CLONE);
749 }
750
751 pix_vline.destroy();
752 pix_non_vline.destroy();
753 pix_hline.destroy();
754 pix_non_hline.destroy();
755 pix_intersections.destroy();
756 if (pixa_display != nullptr) {
757 pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf");
758 pixaDestroy(&pixa_display);
759 }
760}
761
762} // namespace tesseract.
@ TBOX
const double y
const double kMinMusicPixelFraction
Definition: linefind.cpp:60
const double kMaxStaveHeight
Definition: linefind.cpp:58
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int kCrackSpacing
Spacing of cracks across the page to break up tall vertical lines.
Definition: linefind.cpp:43
const double kThickLengthMultiple
Definition: linefind.cpp:54
const int kMinThickLineWidth
Definition: linefind.cpp:47
const double kMaxNonLineDensity
Definition: linefind.cpp:56
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
Definition: edgblob.cpp:460
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
Definition: linefind.cpp:41
const int kMaxLineResidue
Definition: linefind.cpp:51
const int kLineFindGridSize
Grid size used by line finder. Not very critical.
Definition: linefind.cpp:45
@ TT_VLINE
Definition: blobbox.h:67
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:65
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
Definition: linefind.cpp:39
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:239
TabType left_tab_type() const
Definition: blobbox.h:286
void destroy()
Definition: image.cpp:32
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
Definition: linefind.cpp:691
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352