tesseract  4.00.00dev
imagefind.cpp
Go to the documentation of this file.
1 // File: imagefind.cpp
3 // Description: Function to find image and drawing regions in an image
4 // and create a corresponding list of empty blobs.
5 // Author: Ray Smith
6 // Created: Thu Mar 20 09:49:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifdef _MSC_VER
22 #pragma warning(disable:4244) // Conversion warnings
23 #endif
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config_auto.h"
27 #endif
28 
29 #include "imagefind.h"
30 #include "colpartitiongrid.h"
31 #include "linlsq.h"
32 #include "ndminx.h"
33 #include "statistc.h"
34 #include "params.h"
35 
36 #include "allheaders.h"
37 
38 INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
39 
40 namespace tesseract {
41 
42 // Fraction of width or height of on pixels that can be discarded from a
43 // roughly rectangular image.
44 const double kMinRectangularFraction = 0.125;
45 // Fraction of width or height to consider image completely used.
46 const double kMaxRectangularFraction = 0.75;
47 // Fraction of width or height to allow transition from kMinRectangularFraction
48 // to kMaxRectangularFraction, equivalent to a dy/dx skew.
49 const double kMaxRectangularGradient = 0.1; // About 6 degrees.
50 // Minimum image size to be worth looking for images on.
51 const int kMinImageFindSize = 100;
52 // Scale factor for the rms color fit error.
53 const double kRMSFitScaling = 8.0;
54 // Min color difference to call it two colors.
55 const int kMinColorDifference = 16;
56 // Pixel padding for noise blobs and partitions when rendering on the image
57 // mask to encourage them to join together. Make it too big and images
58 // will fatten out too much and have to be clipped to text.
59 const int kNoisePadding = 4;
60 
61 // Finds image regions within the BINARY source pix (page image) and returns
62 // the image regions as a mask image.
63 // The returned pix may be NULL, meaning no images found.
64 // If not NULL, it must be PixDestroyed by the caller.
65 // If textord_tabfind_show_images, debug images are appended to pixa_debug.
66 Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
67  // Not worth looking at small images.
68  if (pixGetWidth(pix) < kMinImageFindSize ||
69  pixGetHeight(pix) < kMinImageFindSize)
70  return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
71 
72  // Reduce by factor 2.
73  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
74  if (textord_tabfind_show_images && pixa_debug != nullptr)
75  pixa_debug->AddPix(pixr, "CascadeReduced");
76 
77  // Get the halftone mask directly from Leptonica.
78  //
79  // Leptonica will print an error message and return NULL if we call
80  // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we
81  // want to bypass that.
82  if (pixGetWidth(pixr) < kMinImageFindSize ||
83  pixGetHeight(pixr) < kMinImageFindSize) {
84  pixDestroy(&pixr);
85  return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
86  }
87  // Get the halftone mask.
88  l_int32 ht_found = 0;
89  Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr)
90  ? pixaCreate(0)
91  : nullptr;
92  Pix* pixht2 = pixGenerateHalftoneMask(pixr, NULL, &ht_found, pixadb);
93  if (pixadb) {
94  Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
95  if (textord_tabfind_show_images && pixa_debug != nullptr)
96  pixa_debug->AddPix(pixdb, "HalftoneMask");
97  pixDestroy(&pixdb);
98  pixaDestroy(&pixadb);
99  }
100  pixDestroy(&pixr);
101  if (!ht_found && pixht2 != NULL)
102  pixDestroy(&pixht2);
103  if (pixht2 == NULL)
104  return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
105 
106  // Expand back up again.
107  Pix *pixht = pixExpandReplicate(pixht2, 2);
108  if (textord_tabfind_show_images && pixa_debug != nullptr)
109  pixa_debug->AddPix(pixht, "HalftoneReplicated");
110  pixDestroy(&pixht2);
111 
112  // Fill to capture pixels near the mask edges that were missed
113  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
114  pixOr(pixht, pixht, pixt);
115  pixDestroy(&pixt);
116 
117  // Eliminate lines and bars that may be joined to images.
118  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
119  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
120  if (textord_tabfind_show_images && pixa_debug != nullptr)
121  pixa_debug->AddPix(pixfinemask, "FineMask");
122  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
123  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
124  pixDestroy(&pixreduced);
125  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
126  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
127  pixDestroy(&pixreduced2);
128  if (textord_tabfind_show_images && pixa_debug != nullptr)
129  pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
130  // Combine the coarse and fine image masks.
131  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
132  pixDestroy(&pixfinemask);
133  // Dilate a bit to make sure we get everything.
134  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
135  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
136  pixDestroy(&pixcoarsemask);
137  if (textord_tabfind_show_images && pixa_debug != nullptr)
138  pixa_debug->AddPix(pixmask, "MaskDilated");
139  // And the image mask with the line and bar remover.
140  pixAnd(pixht, pixht, pixmask);
141  pixDestroy(&pixmask);
142  if (textord_tabfind_show_images && pixa_debug != nullptr)
143  pixa_debug->AddPix(pixht, "FinalMask");
144  // Make the result image the same size as the input.
145  Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
146  pixOr(result, result, pixht);
147  pixDestroy(&pixht);
148  return result;
149 }
150 
151 // Generates a Boxa, Pixa pair from the input binary (image mask) pix,
152 // analgous to pixConnComp, except that connected components which are nearly
153 // rectangular are replaced with solid rectangles.
154 // The returned boxa, pixa may be NULL, meaning no images found.
155 // If not NULL, they must be destroyed by the caller.
156 // Resolution of pix should match the source image (Tesseract::pix_binary_)
157 // so the output coordinate systems match.
159  Boxa** boxa, Pixa** pixa) {
160  *boxa = NULL;
161  *pixa = NULL;
162 
163  if (textord_tabfind_show_images && pixa_debug != nullptr)
164  pixa_debug->AddPix(pix, "Conncompimage");
165  // Find the individual image regions in the mask image.
166  *boxa = pixConnComp(pix, pixa, 8);
167  // Rectangularize the individual images. If a sharp edge in vertical and/or
168  // horizontal occupancy can be found, it indicates a probably rectangular
169  // image with unwanted bits merged on, so clip to the approximate rectangle.
170  int npixes = 0;
171  if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa);
172  for (int i = 0; i < npixes; ++i) {
173  int x_start, x_end, y_start, y_end;
174  Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
175  if (textord_tabfind_show_images && pixa_debug != nullptr)
176  pixa_debug->AddPix(img_pix, "A component");
177  if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
178  kMaxRectangularFraction,
179  kMaxRectangularGradient,
180  &x_start, &y_start, &x_end, &y_end)) {
181  Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
182  pixSetAll(simple_pix);
183  pixDestroy(&img_pix);
184  // pixaReplacePix takes ownership of the simple_pix.
185  pixaReplacePix(*pixa, i, simple_pix, NULL);
186  img_pix = pixaGetPix(*pixa, i, L_CLONE);
187  // Fix the box to match the new pix.
188  l_int32 x, y, width, height;
189  boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
190  Box* simple_box = boxCreate(x + x_start, y + y_start,
191  x_end - x_start, y_end - y_start);
192  boxaReplaceBox(*boxa, i, simple_box);
193  }
194  pixDestroy(&img_pix);
195  }
196 }
197 
198 // Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
199 // stepping y+=y_step, until y=y_end. *ystart is input/output.
200 // If the number of black pixels in a row, pix_count fits this pattern:
201 // 0 or more rows with pix_count < min_count then
202 // <= mid_width rows with min_count <= pix_count <= max_count then
203 // a row with pix_count > max_count then
204 // true is returned, and *y_start = the first y with pix_count >= min_count.
205 static bool HScanForEdge(uinT32* data, int wpl, int x_start, int x_end,
206  int min_count, int mid_width, int max_count,
207  int y_end, int y_step, int* y_start) {
208  int mid_rows = 0;
209  for (int y = *y_start; y != y_end; y += y_step) {
210  // Need pixCountPixelsInRow(pix, y, &pix_count, NULL) to count in a subset.
211  int pix_count = 0;
212  uinT32* line = data + wpl * y;
213  for (int x = x_start; x < x_end; ++x) {
214  if (GET_DATA_BIT(line, x))
215  ++pix_count;
216  }
217  if (mid_rows == 0 && pix_count < min_count)
218  continue; // In the min phase.
219  if (mid_rows == 0)
220  *y_start = y; // Save the y_start where we came out of the min phase.
221  if (pix_count > max_count)
222  return true; // Found the pattern.
223  ++mid_rows;
224  if (mid_rows > mid_width)
225  break; // Middle too big.
226  }
227  return false; // Never found max_count.
228 }
229 
230 // Scans vertically on y=[y_start,y_end), starting with x=*x_start,
231 // stepping x+=x_step, until x=x_end. *x_start is input/output.
232 // If the number of black pixels in a column, pix_count fits this pattern:
233 // 0 or more cols with pix_count < min_count then
234 // <= mid_width cols with min_count <= pix_count <= max_count then
235 // a column with pix_count > max_count then
236 // true is returned, and *x_start = the first x with pix_count >= min_count.
237 static bool VScanForEdge(uinT32* data, int wpl, int y_start, int y_end,
238  int min_count, int mid_width, int max_count,
239  int x_end, int x_step, int* x_start) {
240  int mid_cols = 0;
241  for (int x = *x_start; x != x_end; x += x_step) {
242  int pix_count = 0;
243  uinT32* line = data + y_start * wpl;
244  for (int y = y_start; y < y_end; ++y, line += wpl) {
245  if (GET_DATA_BIT(line, x))
246  ++pix_count;
247  }
248  if (mid_cols == 0 && pix_count < min_count)
249  continue; // In the min phase.
250  if (mid_cols == 0)
251  *x_start = x; // Save the place where we came out of the min phase.
252  if (pix_count > max_count)
253  return true; // found the pattern.
254  ++mid_cols;
255  if (mid_cols > mid_width)
256  break; // Middle too big.
257  }
258  return false; // Never found max_count.
259 }
260 
261 // Returns true if there is a rectangle in the source pix, such that all
262 // pixel rows and column slices outside of it have less than
263 // min_fraction of the pixels black, and within max_skew_gradient fraction
264 // of the pixels on the inside, there are at least max_fraction of the
265 // pixels black. In other words, the inside of the rectangle looks roughly
266 // rectangular, and the outside of it looks like extra bits.
267 // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
268 // Note: the algorithm is iterative, allowing it to slice off pixels from
269 // one edge, allowing it to then slice off more pixels from another edge.
271  double min_fraction, double max_fraction,
272  double max_skew_gradient,
273  int* x_start, int* y_start,
274  int* x_end, int* y_end) {
275  ASSERT_HOST(pix != NULL);
276  *x_start = 0;
277  *x_end = pixGetWidth(pix);
278  *y_start = 0;
279  *y_end = pixGetHeight(pix);
280 
281  uinT32* data = pixGetData(pix);
282  int wpl = pixGetWpl(pix);
283  bool any_cut = false;
284  bool left_done = false;
285  bool right_done = false;
286  bool top_done = false;
287  bool bottom_done = false;
288  do {
289  any_cut = false;
290  // Find the top/bottom edges.
291  int width = *x_end - *x_start;
292  int min_count = static_cast<int>(width * min_fraction);
293  int max_count = static_cast<int>(width * max_fraction);
294  int edge_width = static_cast<int>(width * max_skew_gradient);
295  if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
296  max_count, *y_end, 1, y_start) && !top_done) {
297  top_done = true;
298  any_cut = true;
299  }
300  --(*y_end);
301  if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
302  max_count, *y_start, -1, y_end) && !bottom_done) {
303  bottom_done = true;
304  any_cut = true;
305  }
306  ++(*y_end);
307 
308  // Find the left/right edges.
309  int height = *y_end - *y_start;
310  min_count = static_cast<int>(height * min_fraction);
311  max_count = static_cast<int>(height * max_fraction);
312  edge_width = static_cast<int>(height * max_skew_gradient);
313  if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
314  max_count, *x_end, 1, x_start) && !left_done) {
315  left_done = true;
316  any_cut = true;
317  }
318  --(*x_end);
319  if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
320  max_count, *x_start, -1, x_end) && !right_done) {
321  right_done = true;
322  any_cut = true;
323  }
324  ++(*x_end);
325  } while (any_cut);
326 
327  // All edges must satisfy the condition of sharp gradient in pixel density
328  // in order for the full rectangle to be present.
329  return left_done && right_done && top_done && bottom_done;
330 }
331 
332 // Given an input pix, and a bounding rectangle, the sides of the rectangle
333 // are shrunk inwards until they bound any black pixels found within the
334 // original rectangle. Returns false if the rectangle contains no black
335 // pixels at all.
336 bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
337  int* x_end, int* y_end) {
338  Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
339  *y_end - *y_start);
340  Box* output_box = NULL;
341  pixClipBoxToForeground(pix, input_box, NULL, &output_box);
342  bool result = output_box != NULL;
343  if (result) {
344  l_int32 x, y, width, height;
345  boxGetGeometry(output_box, &x, &y, &width, &height);
346  *x_start = x;
347  *y_start = y;
348  *x_end = x + width;
349  *y_end = y + height;
350  boxDestroy(&output_box);
351  }
352  boxDestroy(&input_box);
353  return result;
354 }
355 
356 // Given a point in 3-D (RGB) space, returns the squared Euclidean distance
357 // of the point from the given line, defined by a pair of points in the 3-D
358 // (RGB) space, line1 and line2.
360  const uinT8* line2,
361  const uinT8* point) {
362  int line_vector[kRGBRMSColors];
363  int point_vector[kRGBRMSColors];
364  for (int i = 0; i < kRGBRMSColors; ++i) {
365  line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
366  point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
367  }
368  line_vector[L_ALPHA_CHANNEL] = 0;
369  // Now the cross product in 3d.
370  int cross[kRGBRMSColors];
371  cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
372  - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
373  cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
374  - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
375  cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
376  - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
377  cross[L_ALPHA_CHANNEL] = 0;
378  // Now the sums of the squares.
379  double cross_sq = 0.0;
380  double line_sq = 0.0;
381  for (int j = 0; j < kRGBRMSColors; ++j) {
382  cross_sq += static_cast<double>(cross[j]) * cross[j];
383  line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
384  }
385  if (line_sq == 0.0) {
386  return 0.0;
387  }
388  return cross_sq / line_sq; // This is the squared distance.
389 }
390 
391 
392 // Returns the leptonica combined code for the given RGB triplet.
394  l_uint32 result;
395  composeRGBPixel(r, g, b, &result);
396  return result;
397 }
398 
399 // Returns the input value clipped to a uinT8.
401  if (pixel < 0.0)
402  return 0;
403  else if (pixel >= 255.0)
404  return 255;
405  return static_cast<uinT8>(pixel);
406 }
407 
408 // Computes the light and dark extremes of color in the given rectangle of
409 // the given pix, which is factor smaller than the coordinate system in rect.
410 // The light and dark points are taken to be the upper and lower 8th-ile of
411 // the most deviant of R, G and B. The value of the other 2 channels are
412 // computed by linear fit against the most deviant.
413 // The colors of the two points are returned in color1 and color2, with the
414 // alpha channel set to a scaled mean rms of the fits.
415 // If color_map1 is not null then it and color_map2 get rect pasted in them
416 // with the two calculated colors, and rms map gets a pasted rect of the rms.
417 // color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
418 void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
419  Pix* color_map1, Pix* color_map2,
420  Pix* rms_map,
421  uinT8* color1, uinT8* color2) {
422  ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
423  // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
424  // background.
425  int width = pixGetWidth(pix);
426  int height = pixGetHeight(pix);
427  int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
428  int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
429  top_pad = MIN(height, top_pad);
430  int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
431  right_pad = MIN(width, right_pad);
432  int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
433  int width_pad = right_pad - left_pad;
434  int height_pad = top_pad - bottom_pad;
435  if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
436  return;
437  // Now crop the pix to the rectangle.
438  Box* scaled_box = boxCreate(left_pad, height - top_pad,
439  width_pad, height_pad);
440  Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);
441 
442  // Compute stats over the whole image.
443  STATS red_stats(0, 256);
444  STATS green_stats(0, 256);
445  STATS blue_stats(0, 256);
446  uinT32* data = pixGetData(scaled);
447  ASSERT_HOST(pixGetWpl(scaled) == width_pad);
448  for (int y = 0; y < height_pad; ++y) {
449  for (int x = 0; x < width_pad; ++x, ++data) {
450  int r = GET_DATA_BYTE(data, COLOR_RED);
451  int g = GET_DATA_BYTE(data, COLOR_GREEN);
452  int b = GET_DATA_BYTE(data, COLOR_BLUE);
453  red_stats.add(r, 1);
454  green_stats.add(g, 1);
455  blue_stats.add(b, 1);
456  }
457  }
458  // Find the RGB component with the greatest 8th-ile-range.
459  // 8th-iles are used instead of quartiles to get closer to the true
460  // foreground color, which is going to be faint at best because of the
461  // pre-scaling of the input image.
462  int best_l8 = static_cast<int>(red_stats.ile(0.125f));
463  int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
464  int best_i8r = best_u8 - best_l8;
465  int x_color = COLOR_RED;
466  int y1_color = COLOR_GREEN;
467  int y2_color = COLOR_BLUE;
468  int l8 = static_cast<int>(green_stats.ile(0.125f));
469  int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
470  if (u8 - l8 > best_i8r) {
471  best_i8r = u8 - l8;
472  best_l8 = l8;
473  best_u8 = u8;
474  x_color = COLOR_GREEN;
475  y1_color = COLOR_RED;
476  }
477  l8 = static_cast<int>(blue_stats.ile(0.125f));
478  u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
479  if (u8 - l8 > best_i8r) {
480  best_i8r = u8 - l8;
481  best_l8 = l8;
482  best_u8 = u8;
483  x_color = COLOR_BLUE;
484  y1_color = COLOR_GREEN;
485  y2_color = COLOR_RED;
486  }
487  if (best_i8r >= kMinColorDifference) {
488  LLSQ line1;
489  LLSQ line2;
490  uinT32* data = pixGetData(scaled);
491  for (int im_y = 0; im_y < height_pad; ++im_y) {
492  for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
493  int x = GET_DATA_BYTE(data, x_color);
494  int y1 = GET_DATA_BYTE(data, y1_color);
495  int y2 = GET_DATA_BYTE(data, y2_color);
496  line1.add(x, y1);
497  line2.add(x, y2);
498  }
499  }
500  double m1 = line1.m();
501  double c1 = line1.c(m1);
502  double m2 = line2.m();
503  double c2 = line2.c(m2);
504  double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
505  rms *= kRMSFitScaling;
506  // Save the results.
507  color1[x_color] = ClipToByte(best_l8);
508  color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
509  color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
510  color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
511  color2[x_color] = ClipToByte(best_u8);
512  color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
513  color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
514  color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
515  } else {
516  // There is only one color.
517  color1[COLOR_RED] = ClipToByte(red_stats.median());
518  color1[COLOR_GREEN] = ClipToByte(green_stats.median());
519  color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
520  color1[L_ALPHA_CHANNEL] = 0;
521  memcpy(color2, color1, 4);
522  }
523  if (color_map1 != NULL) {
524  pixSetInRectArbitrary(color_map1, scaled_box,
525  ComposeRGB(color1[COLOR_RED],
526  color1[COLOR_GREEN],
527  color1[COLOR_BLUE]));
528  pixSetInRectArbitrary(color_map2, scaled_box,
529  ComposeRGB(color2[COLOR_RED],
530  color2[COLOR_GREEN],
531  color2[COLOR_BLUE]));
532  pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
533  }
534  pixDestroy(&scaled);
535  boxDestroy(&scaled_box);
536 }
537 
538 // ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
539 // The following functions are responsible for cutting a polygonal image from
540 // a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
541 // with DivideImageIntoParts as the master.
542 // Problem statement:
543 // We start with a single connected component from the image mask: we get
544 // a Pix of the component, and its location on the page (im_box).
545 // The objective of cutting a polygonal image from its rectangle is to avoid
546 // interfering text, but not text that completely overlaps the image.
547 // ------------------------------ ------------------------------
548 // | Single input partition | | 1 Cut up output partitions |
549 // | | ------------------------------
550 // Av|oid | Avoid | |
551 // | | |________________________|
552 // Int|erfering | Interfering | |
553 // | | _____|__________________|
554 // T|ext | Text | |
555 // | Text-on-image | | Text-on-image |
556 // ------------------------------ --------------------------
557 // DivideImageIntoParts does this by building a ColPartition_LIST (not in the
558 // grid) with each ColPartition representing one of the rectangles needed,
559 // starting with a single rectangle for the whole image component, and cutting
560 // bits out of it with CutChunkFromParts as needed to avoid text. The output
561 // ColPartitions are supposed to be ordered from top to bottom.
562 
563 // The problem is complicated by the fact that we have rotated the coordinate
564 // system to make text lines horizontal, so if we need to look at the component
565 // image, we have to rotate the coordinates. Throughout the functions in this
566 // section im_box is the rectangle representing the image component in the
567 // rotated page coordinates (where we are building our output ColPartitions),
568 // rotation is the rotation that we used to get there, and rerotation is the
569 // rotation required to get back to original page image coordinates.
570 // To get to coordinates in the component image, pix, we rotate the im_box,
571 // the point we want to locate, and subtract the rotated point from the top-left
572 // of the rotated im_box.
573 // im_box is therefore essential to calculating coordinates within the pix.
574 
575 // Returns true if there are no black pixels in between the boxes.
576 // The im_box must represent the bounding box of the pix in tesseract
577 // coordinates, which may be negative, due to rotations to make the textlines
578 // horizontal. The boxes are rotated by rotation, which should undo such
579 // rotations, before mapping them onto the pix.
580 bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
581  const TBOX& im_box, const FCOORD& rotation,
582  Pix* pix) {
583  TBOX search_box(box1);
584  search_box += box2;
585  if (box1.x_gap(box2) >= box1.y_gap(box2)) {
586  if (box1.x_gap(box2) <= 0)
587  return true;
588  search_box.set_left(MIN(box1.right(), box2.right()));
589  search_box.set_right(MAX(box1.left(), box2.left()));
590  } else {
591  if (box1.y_gap(box2) <= 0)
592  return true;
593  search_box.set_top(MAX(box1.bottom(), box2.bottom()));
594  search_box.set_bottom(MIN(box1.top(), box2.top()));
595  }
596  return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
597 }
598 
599 // Returns the number of pixels in box in the pix.
600 // rotation, pix and im_box are defined in the large comment above.
602  const FCOORD& rotation, Pix* pix) {
603  // Intersect it with the image box.
604  box &= im_box; // This is in-place box intersection.
605  if (box.null_box())
606  return 0;
607  box.rotate(rotation);
608  TBOX rotated_im_box(im_box);
609  rotated_im_box.rotate(rotation);
610  Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
611  pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
612  PIX_SRC, pix, box.left() - rotated_im_box.left(),
613  rotated_im_box.top() - box.top());
614  l_int32 result;
615  pixCountPixels(rect_pix, &result, NULL);
616  pixDestroy(&rect_pix);
617  return result;
618 }
619 
620 // The box given by slice contains some black pixels, but not necessarily
621 // over the whole box. Shrink the x bounds of slice, but not the y bounds
622 // until there is at least one black pixel in the outermost columns.
623 // rotation, rerotation, pix and im_box are defined in the large comment above.
624 static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
625  const TBOX& im_box, Pix* pix, TBOX* slice) {
626  TBOX rotated_box(*slice);
627  rotated_box.rotate(rerotation);
628  TBOX rotated_im_box(im_box);
629  rotated_im_box.rotate(rerotation);
630  int left = rotated_box.left() - rotated_im_box.left();
631  int right = rotated_box.right() - rotated_im_box.left();
632  int top = rotated_im_box.top() - rotated_box.top();
633  int bottom = rotated_im_box.top() - rotated_box.bottom();
634  ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
635  top = rotated_im_box.top() - top;
636  bottom = rotated_im_box.top() - bottom;
637  left += rotated_im_box.left();
638  right += rotated_im_box.left();
639  rotated_box.set_to_given_coords(left, bottom, right, top);
640  rotated_box.rotate(rotation);
641  slice->set_left(rotated_box.left());
642  slice->set_right(rotated_box.right());
643 }
644 
645 // The meat of cutting a polygonal image around text.
646 // This function covers the general case of cutting a box out of a box
647 // as shown:
648 // Input Output
649 // ------------------------------ ------------------------------
650 // | Single input partition | | 1 Cut up output partitions |
651 // | | ------------------------------
652 // | ---------- | --------- ----------
653 // | | box | | | 2 | box | 3 |
654 // | | | | | | is cut | |
655 // | ---------- | --------- out ----------
656 // | | ------------------------------
657 // | | | 4 |
658 // ------------------------------ ------------------------------
659 // In the context that this function is used, at most 3 of the above output
660 // boxes will be created, as the overlapping box is never contained by the
661 // input.
662 // The above cutting operation is executed for each element of part_list that
663 // is overlapped by the input box. Each modified ColPartition is replaced
664 // in place in the list by the output of the cutting operation in the order
665 // shown above, so iff no holes are ever created, the output will be in
666 // top-to-bottom order, but in extreme cases, hole creation is possible.
667 // In such cases, the output order may cause strange block polygons.
668 // rotation, rerotation, pix and im_box are defined in the large comment above.
669 static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
670  const FCOORD& rotation, const FCOORD& rerotation,
671  Pix* pix, ColPartition_LIST* part_list) {
672  ASSERT_HOST(!part_list->empty());
673  ColPartition_IT part_it(part_list);
674  do {
675  ColPartition* part = part_it.data();
676  TBOX part_box = part->bounding_box();
677  if (part_box.overlap(box)) {
678  // This part must be cut and replaced with the remains. There are
679  // up to 4 pieces to be made. Start with the first one and use
680  // add_before_stay_put. For each piece if it has no black pixels
681  // left, just don't make the box.
682  // Above box.
683  if (box.top() < part_box.top()) {
684  TBOX slice(part_box);
685  slice.set_bottom(box.top());
686  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
687  pix) > 0) {
688  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
689  part_it.add_before_stay_put(
691  BTFT_NONTEXT));
692  }
693  }
694  // Left of box.
695  if (box.left() > part_box.left()) {
696  TBOX slice(part_box);
697  slice.set_right(box.left());
698  if (box.top() < part_box.top())
699  slice.set_top(box.top());
700  if (box.bottom() > part_box.bottom())
701  slice.set_bottom(box.bottom());
702  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
703  pix) > 0) {
704  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
705  part_it.add_before_stay_put(
707  BTFT_NONTEXT));
708  }
709  }
710  // Right of box.
711  if (box.right() < part_box.right()) {
712  TBOX slice(part_box);
713  slice.set_left(box.right());
714  if (box.top() < part_box.top())
715  slice.set_top(box.top());
716  if (box.bottom() > part_box.bottom())
717  slice.set_bottom(box.bottom());
718  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
719  pix) > 0) {
720  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
721  part_it.add_before_stay_put(
723  BTFT_NONTEXT));
724  }
725  }
726  // Below box.
727  if (box.bottom() > part_box.bottom()) {
728  TBOX slice(part_box);
729  slice.set_top(box.bottom());
730  if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
731  pix) > 0) {
732  AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
733  part_it.add_before_stay_put(
735  BTFT_NONTEXT));
736  }
737  }
738  part->DeleteBoxes();
739  delete part_it.extract();
740  }
741  part_it.forward();
742  } while (!part_it.at_first());
743 }
744 
745 // Starts with the bounding box of the image component and cuts it up
746 // so that it doesn't intersect text where possible.
747 // Strong fully contained horizontal text is marked as text on image,
748 // and does not cause a division of the image.
749 // For more detail see the large comment above on cutting polygonal images
750 // from a rectangle.
751 // rotation, rerotation, pix and im_box are defined in the large comment above.
752 static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
753  const FCOORD& rerotation, Pix* pix,
754  ColPartitionGridSearch* rectsearch,
755  ColPartition_LIST* part_list) {
756  // Add the full im_box partition to the list to begin with.
759  BTFT_NONTEXT);
760  ColPartition_IT part_it(part_list);
761  part_it.add_after_then_move(pix_part);
762 
763  rectsearch->StartRectSearch(im_box);
764  ColPartition* part;
765  while ((part = rectsearch->NextRectSearch()) != NULL) {
766  TBOX part_box = part->bounding_box();
767  if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
768  // This image is completely covered by an existing text partition.
769  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
770  ColPartition* pix_part = part_it.extract();
771  pix_part->DeleteBoxes();
772  delete pix_part;
773  }
774  } else if (part->flow() == BTFT_STRONG_CHAIN) {
775  // Text intersects the box.
776  TBOX overlap_box = part_box.intersection(im_box);
777  // Intersect it with the image box.
778  int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
779  rerotation, pix);
780  if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
781  // Eat a piece out of the image.
782  // Pad it so that pieces eaten out look decent.
783  int padding = part->blob_type() == BRT_VERT_TEXT
784  ? part_box.width() : part_box.height();
785  part_box.set_top(part_box.top() + padding / 2);
786  part_box.set_bottom(part_box.bottom() - padding / 2);
787  CutChunkFromParts(part_box, im_box, rotation, rerotation,
788  pix, part_list);
789  } else {
790  // Strong overlap with the black area, so call it text on image.
792  }
793  }
794  if (part_list->empty()) {
795  break;
796  }
797  }
798 }
799 
800 // Search for the rightmost text that overlaps vertically and is to the left
801 // of the given box, but within the given left limit.
802 static int ExpandImageLeft(const TBOX& box, int left_limit,
803  ColPartitionGrid* part_grid) {
804  ColPartitionGridSearch search(part_grid);
805  ColPartition* part;
806  // Search right to left for any text that overlaps.
807  search.StartSideSearch(box.left(), box.bottom(), box.top());
808  while ((part = search.NextSideSearch(true)) != NULL) {
809  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
810  const TBOX& part_box(part->bounding_box());
811  if (part_box.y_gap(box) < 0) {
812  if (part_box.right() > left_limit && part_box.right() < box.left())
813  left_limit = part_box.right();
814  break;
815  }
816  }
817  }
818  if (part != NULL) {
819  // Search for the nearest text up to the one we already found.
820  TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
821  search.StartRectSearch(search_box);
822  while ((part = search.NextRectSearch()) != NULL) {
823  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
824  const TBOX& part_box(part->bounding_box());
825  if (part_box.y_gap(box) < 0) {
826  if (part_box.right() > left_limit && part_box.right() < box.left()) {
827  left_limit = part_box.right();
828  }
829  }
830  }
831  }
832  }
833  return left_limit;
834 }
835 
836 // Search for the leftmost text that overlaps vertically and is to the right
837 // of the given box, but within the given right limit.
838 static int ExpandImageRight(const TBOX& box, int right_limit,
839  ColPartitionGrid* part_grid) {
840  ColPartitionGridSearch search(part_grid);
841  ColPartition* part;
842  // Search left to right for any text that overlaps.
843  search.StartSideSearch(box.right(), box.bottom(), box.top());
844  while ((part = search.NextSideSearch(false)) != NULL) {
845  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
846  const TBOX& part_box(part->bounding_box());
847  if (part_box.y_gap(box) < 0) {
848  if (part_box.left() < right_limit && part_box.left() > box.right())
849  right_limit = part_box.left();
850  break;
851  }
852  }
853  }
854  if (part != NULL) {
855  // Search for the nearest text up to the one we already found.
856  TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
857  search.StartRectSearch(search_box);
858  while ((part = search.NextRectSearch()) != NULL) {
859  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
860  const TBOX& part_box(part->bounding_box());
861  if (part_box.y_gap(box) < 0) {
862  if (part_box.left() < right_limit && part_box.left() > box.right())
863  right_limit = part_box.left();
864  }
865  }
866  }
867  }
868  return right_limit;
869 }
870 
871 // Search for the topmost text that overlaps horizontally and is below
872 // the given box, but within the given bottom limit.
873 static int ExpandImageBottom(const TBOX& box, int bottom_limit,
874  ColPartitionGrid* part_grid) {
875  ColPartitionGridSearch search(part_grid);
876  ColPartition* part;
877  // Search right to left for any text that overlaps.
878  search.StartVerticalSearch(box.left(), box.right(), box.bottom());
879  while ((part = search.NextVerticalSearch(true)) != NULL) {
880  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
881  const TBOX& part_box(part->bounding_box());
882  if (part_box.x_gap(box) < 0) {
883  if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
884  bottom_limit = part_box.top();
885  break;
886  }
887  }
888  }
889  if (part != NULL) {
890  // Search for the nearest text up to the one we already found.
891  TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
892  search.StartRectSearch(search_box);
893  while ((part = search.NextRectSearch()) != NULL) {
894  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
895  const TBOX& part_box(part->bounding_box());
896  if (part_box.x_gap(box) < 0) {
897  if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
898  bottom_limit = part_box.top();
899  }
900  }
901  }
902  }
903  return bottom_limit;
904 }
905 
906 // Search for the bottommost text that overlaps horizontally and is above
907 // the given box, but within the given top limit.
908 static int ExpandImageTop(const TBOX& box, int top_limit,
909  ColPartitionGrid* part_grid) {
910  ColPartitionGridSearch search(part_grid);
911  ColPartition* part;
912  // Search right to left for any text that overlaps.
913  search.StartVerticalSearch(box.left(), box.right(), box.top());
914  while ((part = search.NextVerticalSearch(false)) != NULL) {
915  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
916  const TBOX& part_box(part->bounding_box());
917  if (part_box.x_gap(box) < 0) {
918  if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
919  top_limit = part_box.bottom();
920  break;
921  }
922  }
923  }
924  if (part != NULL) {
925  // Search for the nearest text up to the one we already found.
926  TBOX search_box(box.left(), box.top(), box.right(), top_limit);
927  search.StartRectSearch(search_box);
928  while ((part = search.NextRectSearch()) != NULL) {
929  if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
930  const TBOX& part_box(part->bounding_box());
931  if (part_box.x_gap(box) < 0) {
932  if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
933  top_limit = part_box.bottom();
934  }
935  }
936  }
937  }
938  return top_limit;
939 }
940 
941 // Expands the image box in the given direction until it hits text,
942 // limiting the expansion to the given limit box, returning the result
943 // in the expanded box, and
944 // returning the increase in area resulting from the expansion.
945 static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
946  const TBOX& limit_box,
947  ColPartitionGrid* part_grid, TBOX* expanded_box) {
948  *expanded_box = im_box;
949  switch (dir) {
950  case BND_LEFT:
951  expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
952  part_grid));
953  break;
954  case BND_RIGHT:
955  expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
956  part_grid));
957  break;
958  case BND_ABOVE:
959  expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
960  break;
961  case BND_BELOW:
962  expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
963  part_grid));
964  break;
965  default:
966  return 0;
967  }
968  return expanded_box->area() - im_box.area();
969 }
970 
971 // Expands the image partition into any non-text until it touches text.
972 // The expansion proceeds in the order of increasing increase in area
973 // as a heuristic to find the best rectangle by expanding in the most
974 // constrained direction first.
975 static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
976  bool dunnit[BND_COUNT];
977  memset(dunnit, 0, sizeof(dunnit));
978  TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
979  part_grid->tright().x(), part_grid->tright().y());
980  TBOX text_box(*im_box);
981  for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
982  // Find the direction with least area increase.
983  int best_delta = -1;
984  BlobNeighbourDir best_dir = BND_LEFT;
985  TBOX expanded_boxes[BND_COUNT];
986  for (int dir = 0; dir < BND_COUNT; ++dir) {
987  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
988  if (!dunnit[bnd]) {
989  TBOX expanded_box;
990  int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
991  &expanded_boxes[bnd]);
992  if (best_delta < 0 || area_delta < best_delta) {
993  best_delta = area_delta;
994  best_dir = bnd;
995  }
996  }
997  }
998  // Run the best and remember the direction.
999  dunnit[best_dir] = true;
1000  text_box = expanded_boxes[best_dir];
1001  }
1002  *im_box = text_box;
1003 }
1004 
1005 // Helper deletes the given partition but first marks up all the blobs as
1006 // noise, so they get deleted later, and disowns them.
1007 // If the initial type of the partition is image, then it actually deletes
1008 // the blobs, as the partition owns them in that case.
1009 static void DeletePartition(ColPartition* part) {
1010  BlobRegionType type = part->blob_type();
1011  if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1012  // The partition owns the boxes of these types, so just delete them.
1013  part->DeleteBoxes(); // From a previous iteration.
1014  } else {
1015  // Once marked, the blobs will be swept up by TidyBlobs.
1016  part->set_flow(BTFT_NONTEXT);
1017  part->set_blob_type(BRT_NOISE);
1018  part->SetBlobTypes();
1019  part->DisownBoxes(); // Created before FindImagePartitions.
1020  }
1021  delete part;
1022 }
1023 
1024 // The meat of joining fragmented images and consuming ColPartitions of
1025 // uncertain type.
1026 // *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
1027 // expanded to consume overlapping and nearby ColPartitions of uncertain type
1028 // and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
1029 // max_image_box. *part_ptr is NOT in the part_grid.
1030 // rectsearch is already constructed on the part_grid, and is used for
1031 // searching for overlapping and nearby ColPartitions.
1032 // ExpandImageIntoParts is called iteratively until it returns false. Each
1033 // time it absorbs the nearest non-contained candidate, and everything that
1034 // is fully contained within part_ptr's bounding box.
1035 // TODO(rays) what if it just eats everything inside max_image_box in one go?
1036 static bool ExpandImageIntoParts(const TBOX& max_image_box,
1037  ColPartitionGridSearch* rectsearch,
1038  ColPartitionGrid* part_grid,
1039  ColPartition** part_ptr) {
1040  ColPartition* image_part = *part_ptr;
1041  TBOX im_part_box = image_part->bounding_box();
1042  if (textord_tabfind_show_images > 1) {
1043  tprintf("Searching for merge with image part:");
1044  im_part_box.print();
1045  tprintf("Text box=");
1046  max_image_box.print();
1047  }
1048  rectsearch->StartRectSearch(max_image_box);
1049  ColPartition* part;
1050  ColPartition* best_part = NULL;
1051  int best_dist = 0;
1052  while ((part = rectsearch->NextRectSearch()) != NULL) {
1053  if (textord_tabfind_show_images > 1) {
1054  tprintf("Considering merge with part:");
1055  part->Print();
1056  if (im_part_box.contains(part->bounding_box()))
1057  tprintf("Fully contained\n");
1058  else if (!max_image_box.contains(part->bounding_box()))
1059  tprintf("Not within text box\n");
1060  else if (part->flow() == BTFT_STRONG_CHAIN)
1061  tprintf("Too strong text\n");
1062  else
1063  tprintf("Real candidate\n");
1064  }
1065  if (part->flow() == BTFT_STRONG_CHAIN ||
1066  part->flow() == BTFT_TEXT_ON_IMAGE ||
1067  part->blob_type() == BRT_POLYIMAGE)
1068  continue;
1069  TBOX box = part->bounding_box();
1070  if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
1071  if (im_part_box.contains(box)) {
1072  // Eat it completely.
1073  rectsearch->RemoveBBox();
1074  DeletePartition(part);
1075  continue;
1076  }
1077  int x_dist = MAX(0, box.x_gap(im_part_box));
1078  int y_dist = MAX(0, box.y_gap(im_part_box));
1079  int dist = x_dist * x_dist + y_dist * y_dist;
1080  if (dist > box.area() || dist > im_part_box.area())
1081  continue; // Not close enough.
1082  if (best_part == NULL || dist < best_dist) {
1083  // We keep the nearest qualifier, which is not necessarily the nearest.
1084  best_part = part;
1085  best_dist = dist;
1086  }
1087  }
1088  }
1089  if (best_part != NULL) {
1090  // It needs expanding. We can do it without touching text.
1091  TBOX box = best_part->bounding_box();
1092  if (textord_tabfind_show_images > 1) {
1093  tprintf("Merging image part:");
1094  im_part_box.print();
1095  tprintf("with part:");
1096  box.print();
1097  }
1098  im_part_box += box;
1099  *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
1100  BRT_RECTIMAGE,
1101  BTFT_NONTEXT);
1102  DeletePartition(image_part);
1103  part_grid->RemoveBBox(best_part);
1104  DeletePartition(best_part);
1105  rectsearch->RepositionIterator();
1106  return true;
1107  }
1108  return false;
1109 }
1110 
1111 // Helper function to compute the overlap area between the box and the
1112 // given list of partitions.
1113 static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
1114  int intersect_area = 0;
1115  ColPartition_IT part_it(part_list);
1116  // Iterate the parts and subtract intersecting area.
1117  for (part_it.mark_cycle_pt(); !part_it.cycled_list();
1118  part_it.forward()) {
1119  ColPartition* image_part = part_it.data();
1120  TBOX intersect = box.intersection(image_part->bounding_box());
1121  intersect_area += intersect.area();
1122  }
1123  return intersect_area;
1124 }
1125 
1126 // part_list is a set of ColPartitions representing a polygonal image, and
1127 // im_box is the union of the bounding boxes of all the parts in part_list.
1128 // Tests whether part is to be consumed by the polygonal image.
1129 // Returns true if part is weak text and more than half of its area is
1130 // intersected by parts from the part_list, and it is contained within im_box.
1131 static bool TestWeakIntersectedPart(const TBOX& im_box,
1132  ColPartition_LIST* part_list,
1133  ColPartition* part) {
1134  if (part->flow() < BTFT_STRONG_CHAIN) {
1135  // A weak partition intersects the box.
1136  const TBOX& part_box = part->bounding_box();
1137  if (im_box.contains(part_box)) {
1138  int area = part_box.area();
1139  int intersect_area = IntersectArea(part_box, part_list);
1140  if (area < 2 * intersect_area) {
1141  return true;
1142  }
1143  }
1144  }
1145  return false;
1146 }
1147 
1148 // A rectangular or polygonal image has been completed, in part_list, bounding
1149 // box in im_box. We want to eliminate weak text or other uncertain partitions
1150 // (basically anything that is not BRT_STRONG_CHAIN or better) from both the
1151 // part_grid and the big_parts list that are contained within im_box and
1152 // overlapped enough by the possibly polygonal image.
1153 static void EliminateWeakParts(const TBOX& im_box,
1154  ColPartitionGrid* part_grid,
1155  ColPartition_LIST* big_parts,
1156  ColPartition_LIST* part_list) {
1157  ColPartitionGridSearch rectsearch(part_grid);
1158  ColPartition* part;
1159  rectsearch.StartRectSearch(im_box);
1160  while ((part = rectsearch.NextRectSearch()) != NULL) {
1161  if (TestWeakIntersectedPart(im_box, part_list, part)) {
1162  BlobRegionType type = part->blob_type();
1163  if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
1164  rectsearch.RemoveBBox();
1165  DeletePartition(part);
1166  } else {
1167  // The part is mostly covered, so mark it. Non-image partitions are
1168  // kept hanging around to mark the image for pass2
1169  part->set_flow(BTFT_NONTEXT);
1170  part->set_blob_type(BRT_NOISE);
1171  part->SetBlobTypes();
1172  }
1173  }
1174  }
1175  ColPartition_IT big_it(big_parts);
1176  for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1177  part = big_it.data();
1178  if (TestWeakIntersectedPart(im_box, part_list, part)) {
1179  // Once marked, the blobs will be swept up by TidyBlobs.
1180  DeletePartition(big_it.extract());
1181  }
1182  }
1183 }
1184 
1185 // Helper scans for good text partitions overlapping the given box.
1186 // If there are no good text partitions overlapping an expanded box, then
1187 // the box is expanded, otherwise, the original box is returned.
1188 // If good text overlaps the box, true is returned.
1189 static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
1190  ColPartitionGridSearch rectsearch(part_grid);
1191  TBOX padded_box(*box);
1192  padded_box.pad(kNoisePadding, kNoisePadding);
1193  rectsearch.StartRectSearch(padded_box);
1194  ColPartition* part;
1195  bool any_text_in_padded_rect = false;
1196  while ((part = rectsearch.NextRectSearch()) != NULL) {
1197  if (part->flow() == BTFT_CHAIN ||
1198  part->flow() == BTFT_STRONG_CHAIN) {
1199  // Text intersects the box.
1200  any_text_in_padded_rect = true;
1201  const TBOX& part_box = part->bounding_box();
1202  if (box->overlap(part_box)) {
1203  return true;
1204  }
1205  }
1206  }
1207  if (!any_text_in_padded_rect)
1208  *box = padded_box;
1209  return false;
1210 }
1211 
1212 // Renders the boxes of image parts from the supplied list onto the image_pix,
1213 // except where they interfere with existing strong text in the part_grid,
1214 // and then deletes them.
1215 // Box coordinates are rotated by rerotate to match the image.
1216 static void MarkAndDeleteImageParts(const FCOORD& rerotate,
1217  ColPartitionGrid* part_grid,
1218  ColPartition_LIST* image_parts,
1219  Pix* image_pix) {
1220  if (image_pix == NULL)
1221  return;
1222  int imageheight = pixGetHeight(image_pix);
1223  ColPartition_IT part_it(image_parts);
1224  for (; !part_it.empty(); part_it.forward()) {
1225  ColPartition* part = part_it.extract();
1226  TBOX part_box = part->bounding_box();
1227  BlobRegionType type = part->blob_type();
1228  if (!ScanForOverlappingText(part_grid, &part_box) ||
1229  type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1230  // Mark the box on the image.
1231  // All coords need to be rotated to match the image.
1232  part_box.rotate(rerotate);
1233  int left = part_box.left();
1234  int top = part_box.top();
1235  pixRasterop(image_pix, left, imageheight - top,
1236  part_box.width(), part_box.height(), PIX_SET, NULL, 0, 0);
1237  }
1238  DeletePartition(part);
1239  }
1240 }
1241 
1242 // Locates all the image partitions in the part_grid, that were found by a
1243 // previous call to FindImagePartitions, marks them in the image_mask,
1244 // removes them from the grid, and deletes them. This makes it possble to
1245 // call FindImagePartitions again to produce less broken-up and less
1246 // overlapping image partitions.
1247 // rerotation specifies how to rotate the partition coords to match
1248 // the image_mask, since this function is used after orientation correction.
1250  ColPartitionGrid* part_grid,
1251  Pix* image_mask) {
1252  // Extract the noise parts from the grid and put them on a temporary list.
1253  ColPartition_LIST parts_list;
1254  ColPartition_IT part_it(&parts_list);
1255  ColPartitionGridSearch gsearch(part_grid);
1256  gsearch.StartFullSearch();
1257  ColPartition* part;
1258  while ((part = gsearch.NextFullSearch()) != NULL) {
1259  BlobRegionType type = part->blob_type();
1260  if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1261  part_it.add_after_then_move(part);
1262  gsearch.RemoveBBox();
1263  }
1264  }
1265  // Render listed noise partitions to the image mask.
1266  MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1267 }
1268 
1269 // Removes and deletes all image partitions that are too small to be worth
1270 // keeping. We have to do this as a separate phase after creating the image
1271 // partitions as the small images are needed to join the larger ones together.
1272 static void DeleteSmallImages(ColPartitionGrid* part_grid) {
1273  if (part_grid != NULL) return;
1274  ColPartitionGridSearch gsearch(part_grid);
1275  gsearch.StartFullSearch();
1276  ColPartition* part;
1277  while ((part = gsearch.NextFullSearch()) != NULL) {
1278  // Only delete rectangular images, since if it became a poly image, it
1279  // is more evidence that it is somehow important.
1280  if (part->blob_type() == BRT_RECTIMAGE) {
1281  const TBOX& part_box = part->bounding_box();
1282  if (part_box.width() < kMinImageFindSize ||
1283  part_box.height() < kMinImageFindSize) {
1284  // It is too small to keep. Just make it disappear.
1285  gsearch.RemoveBBox();
1286  DeletePartition(part);
1287  }
1288  }
1289  }
1290 }
1291 
1292 // Runs a CC analysis on the image_pix mask image, and creates
1293 // image partitions from them, cutting out strong text, and merging with
1294 // nearby image regions such that they don't interfere with text.
1295 // Rotation and rerotation specify how to rotate image coords to match
1296 // the blob and partition coords and back again.
1297 // The input/output part_grid owns all the created partitions, and
1298 // the partitions own all the fake blobs that belong in the partitions.
1299 // Since the other blobs in the other partitions will be owned by the block,
1300 // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
1301 // situation and collect the image blobs.
1302 void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
1303  const FCOORD& rerotation, TO_BLOCK* block,
1304  TabFind* tab_grid, DebugPixa* pixa_debug,
1305  ColPartitionGrid* part_grid,
1306  ColPartition_LIST* big_parts) {
1307  int imageheight = pixGetHeight(image_pix);
1308  Boxa* boxa;
1309  Pixa* pixa;
1310  ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
1311  // Iterate the connected components in the image regions mask.
1312  int nboxes = 0;
1313  if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa);
1314  for (int i = 0; i < nboxes; ++i) {
1315  l_int32 x, y, width, height;
1316  boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1317  Pix* pix = pixaGetPix(pixa, i, L_CLONE);
1318  TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
1319  im_box.rotate(rotation); // Now matches all partitions and blobs.
1320  ColPartitionGridSearch rectsearch(part_grid);
1321  rectsearch.SetUniqueMode(true);
1322  ColPartition_LIST part_list;
1323  DivideImageIntoParts(im_box, rotation, rerotation, pix,
1324  &rectsearch, &part_list);
1325  if (textord_tabfind_show_images && pixa_debug != nullptr) {
1326  pixa_debug->AddPix(pix, "ImageComponent");
1327  tprintf("Component has %d parts\n", part_list.length());
1328  }
1329  pixDestroy(&pix);
1330  if (!part_list.empty()) {
1331  ColPartition_IT part_it(&part_list);
1332  if (part_list.singleton()) {
1333  // We didn't have to chop it into a polygon to fit around text, so
1334  // try expanding it to merge fragmented image parts, as long as it
1335  // doesn't touch strong text.
1336  ColPartition* part = part_it.extract();
1337  TBOX text_box(im_box);
1338  MaximalImageBoundingBox(part_grid, &text_box);
1339  while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
1340  part_it.set_to_list(&part_list);
1341  part_it.add_after_then_move(part);
1342  im_box = part->bounding_box();
1343  }
1344  EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1345  // Iterate the part_list and put the parts into the grid.
1346  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1347  ColPartition* image_part = part_it.extract();
1348  im_box = image_part->bounding_box();
1349  part_grid->InsertBBox(true, true, image_part);
1350  if (!part_it.at_last()) {
1351  ColPartition* neighbour = part_it.data_relative(1);
1352  image_part->AddPartner(false, neighbour);
1353  neighbour->AddPartner(true, image_part);
1354  }
1355  }
1356  }
1357  }
1358  boxaDestroy(&boxa);
1359  pixaDestroy(&pixa);
1360  DeleteSmallImages(part_grid);
1362  ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1363  part_grid->DisplayBoxes(images_win_);
1364  }
1365 }
1366 
1367 
1368 } // namespace tesseract.
1369 
Definition: points.h:189
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:580
#define MIN(x, y)
Definition: ndminx.h:28
void SetUniqueMode(bool mode)
Definition: bbgrid.h:255
const int kMinImageFindSize
Definition: imagefind.cpp:51
const TBOX & bounding_box() const
Definition: colpartition.h:109
void StartRectSearch(const TBOX &rect)
Definition: bbgrid.h:834
void set_blob_type(BlobRegionType t)
Definition: colpartition.h:151
void add(inT32 value, inT32 count)
Definition: statistc.cpp:99
void set_bottom(int y)
Definition: rect.h:64
void RemoveBBox(BBC *bbox)
Definition: bbgrid.h:537
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:750
static uinT8 ClipToByte(double pixel)
Definition: imagefind.cpp:400
Definition: linlsq.h:26
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
double median() const
Definition: statistc.cpp:237
const ICOORD & bleft() const
Definition: bbgrid.h:73
const double kMinRectangularFraction
Definition: imagefind.cpp:44
uint32_t uinT32
Definition: host.h:39
static void ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1, Pix *color_map2, Pix *rms_map, uinT8 *color1, uinT8 *color2)
Definition: imagefind.cpp:418
#define MAX(x, y)
Definition: ndminx.h:24
BlobTextFlowType flow() const
Definition: colpartition.h:154
void StartFullSearch()
Definition: bbgrid.h:669
inT16 x() const
access function
Definition: points.h:52
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:593
const double kMaxRectangularGradient
Definition: imagefind.cpp:49
const int kNoisePadding
void AddPix(const Pix *pix, const char *caption)
Definition: debugpixa.h:26
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
Definition: imagefind.cpp:1249
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:336
void RepositionIterator()
Definition: bbgrid.h:896
bool null_box() const
Definition: rect.h:46
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
void print() const
Definition: rect.h:270
#define tprintf(...)
Definition: tprintf.h:31
int y_gap(const TBOX &box) const
Definition: rect.h:225
inT32 area() const
Definition: rect.h:118
double rms(double m, double c) const
Definition: linlsq.cpp:131
uint8_t uinT8
Definition: host.h:35
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:263
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:765
BBC * NextVerticalSearch(bool top_to_bottom)
Definition: bbgrid.h:806
BlobRegionType blob_type() const
Definition: colpartition.h:148
void add(double x, double y)
Definition: linlsq.cpp:49
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:617
const int kRGBRMSColors
Definition: colpartition.h:36
#define INT_VAR(name, val, comment)
Definition: params.h:276
inT16 top() const
Definition: rect.h:54
void AddPartner(bool upper, ColPartition *partner)
double c(double m) const
Definition: linlsq.cpp:117
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:371
const double kRMSFitScaling
Definition: imagefind.cpp:53
BlobNeighbourDir
Definition: blobbox.h:72
static bool pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:270
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1302
void pad(int xpad, int ypad)
Definition: rect.h:127
void set_left(int x)
Definition: rect.h:71
inT16 bottom() const
Definition: rect.h:61
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:490
int x_gap(const TBOX &box) const
Definition: rect.h:217
static Pix * FindImages(Pix *pix, DebugPixa *pixa_debug)
Definition: imagefind.cpp:66
BBC * NextFullSearch()
Definition: bbgrid.h:679
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:157
inT16 height() const
Definition: rect.h:104
Definition: rect.h:30
const ICOORD & tright() const
Definition: bbgrid.h:76
const int kMinColorDifference
Definition: imagefind.cpp:55
inT16 left() const
Definition: rect.h:68
static void ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa)
Definition: imagefind.cpp:158
bool contains(const FCOORD pt) const
Definition: rect.h:323
#define ASSERT_HOST(x)
Definition: errcode.h:84
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:601
Definition: statistc.h:33
BlobRegionType
Definition: blobbox.h:57
inT16 y() const
access_function
Definition: points.h:56
void rotate(const FCOORD &vec)
Definition: rect.h:189
int textord_tabfind_show_images
Definition: imagefind.cpp:38
double m() const
Definition: linlsq.cpp:101
void set_right(int x)
Definition: rect.h:78
void set_top(int y)
Definition: rect.h:57
double ile(double frac) const
Definition: statistc.cpp:172
void StartVerticalSearch(int xmin, int xmax, int y)
Definition: bbgrid.h:792
inT16 right() const
Definition: rect.h:75
const double kMaxRectangularFraction
Definition: imagefind.cpp:46
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
Definition: imagefind.cpp:359
bool overlap(const TBOX &box) const
Definition: rect.h:345
BBC * NextRectSearch()
Definition: bbgrid.h:846
static uinT32 ComposeRGB(uinT32 r, uinT32 g, uinT32 b)
Definition: imagefind.cpp:393
inT16 width() const
Definition: rect.h:111