#include <imagefind.h>

Static Public Member Functions
static Pix *	FindImages (Pix *pix)

static void	ConnCompAndRectangularize (Pix pix, Boxa boxa, Pixa *pixa)

static bool	pixNearlyRectangular (Pix pix, double min_fraction, double max_fraction, double max_skew_gradient, int x_start, int y_start, int x_end, int *y_end)

static bool	BoundsWithinRect (Pix pix, int x_start, int y_start, int x_end, int *y_end)

static double	ColorDistanceFromLine (const uinT8 line1, const uinT8 line2, const uinT8 *point)

static uinT32	ComposeRGB (uinT32 r, uinT32 g, uinT32 b)

static uinT8	ClipToByte (double pixel)

static void	ComputeRectangleColors (const TBOX &rect, Pix pix, int factor, Pix color_map1, Pix color_map2, Pix rms_map, uinT8 color1, uinT8 color2)

static bool	BlankImageInBetween (const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)

static int	CountPixelsInRotatedBox (TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)

static void	TransferImagePartsToImageMask (const FCOORD &rerotation, ColPartitionGrid part_grid, Pix image_mask)

static void	FindImagePartitions (Pix image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK block, TabFind tab_grid, ColPartitionGrid part_grid, ColPartition_LIST *big_parts)

Detailed Description

Definition at line 42 of file imagefind.h.

Member Function Documentation

bool tesseract::ImageFind::BlankImageInBetween	(	const TBOX &	box1,
		const TBOX &	box2,
		const TBOX &	im_box,
		const FCOORD &	rotation,
		Pix *	pix
	)

static

Definition at line 552 of file imagefind.cpp.

                                               {
   TBOX search_box(box1);
   search_box += box2;
   if (box1.x_gap(box2) >= box1.y_gap(box2)) {
     if (box1.x_gap(box2) <= 0)
       return true;
     search_box.set_left(MIN(box1.right(), box2.right()));
     search_box.set_right(MAX(box1.left(), box2.left()));
   } else {
     if (box1.y_gap(box2) <= 0)
       return true;
     search_box.set_top(MAX(box1.bottom(), box2.bottom()));
     search_box.set_bottom(MIN(box1.top(), box2.top()));
   }
   return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
 }

bool tesseract::ImageFind::BoundsWithinRect	(	Pix *	pix,
		int *	x_start,
		int *	y_start,
		int *	x_end,
		int *	y_end
	)

static

Definition at line 308 of file imagefind.cpp.

                                                          {
   Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
                              *y_end - *y_start);
   Box* output_box = NULL;
   pixClipBoxToForeground(pix, input_box, NULL, &output_box);
   bool result = output_box != NULL;
   if (result) {
     l_int32 x, y, width, height;
     boxGetGeometry(output_box, &x, &y, &width, &height);
     *x_start = x;
     *y_start = y;
     *x_end = x + width;
     *y_end = y + height;
     boxDestroy(&output_box);
   }
   boxDestroy(&input_box);
   return result;
 }

uinT8 tesseract::ImageFind::ClipToByte ( double pixel )

static

Definition at line 372 of file imagefind.cpp.

                                         {
   if (pixel < 0.0)
     return 0;
   else if (pixel >= 255.0)
     return 255;
   return static_cast<uinT8>(pixel);
 }

double tesseract::ImageFind::ColorDistanceFromLine	(	const uinT8 *	line1,
		const uinT8 *	line2,
		const uinT8 *	point
	)

static

Definition at line 331 of file imagefind.cpp.

                                                             {
   int line_vector[kRGBRMSColors];
   int point_vector[kRGBRMSColors];
   for (int i = 0; i < kRGBRMSColors; ++i) {
     line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
     point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
   }
   line_vector[L_ALPHA_CHANNEL] = 0;
   // Now the cross product in 3d.
   int cross[kRGBRMSColors];
   cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
                    - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
   cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
                    - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
   cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
                    - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
   cross[L_ALPHA_CHANNEL] = 0;
   // Now the sums of the squares.
   double cross_sq = 0.0;
   double line_sq = 0.0;
   for (int j = 0; j < kRGBRMSColors; ++j) {
     cross_sq += static_cast<double>(cross[j]) * cross[j];
     line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
   }
   if (line_sq == 0.0) {
     return 0.0;
   }
   return cross_sq / line_sq;  // This is the squared distance.
 }

uinT32 tesseract::ImageFind::ComposeRGB	(	uinT32	r,
		uinT32	g,
		uinT32	b
	)

static

Definition at line 365 of file imagefind.cpp.

                                                          {
   l_uint32 result;
   composeRGBPixel(r, g, b, &result);
   return result;
 }

void tesseract::ImageFind::ComputeRectangleColors	(	const TBOX &	rect,
		Pix *	pix,
		int	factor,
		Pix *	color_map1,
		Pix *	color_map2,
		Pix *	rms_map,
		uinT8 *	color1,
		uinT8 *	color2
	)

static

Definition at line 390 of file imagefind.cpp.

                                                                      {
   ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
   // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
   // background.
   int width = pixGetWidth(pix);
   int height = pixGetHeight(pix);
   int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
   int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
   top_pad = MIN(height, top_pad);
   int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
   right_pad = MIN(width, right_pad);
   int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
   int width_pad = right_pad - left_pad;
   int height_pad = top_pad - bottom_pad;
   if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
     return;
   // Now crop the pix to the rectangle.
   Box* scaled_box = boxCreate(left_pad, height - top_pad,
                               width_pad, height_pad);
   Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);
 
   // Compute stats over the whole image.
   STATS red_stats(0, 256);
   STATS green_stats(0, 256);
   STATS blue_stats(0, 256);
   uinT32* data = pixGetData(scaled);
   ASSERT_HOST(pixGetWpl(scaled) == width_pad);
   for (int y = 0; y < height_pad; ++y) {
     for (int x = 0; x < width_pad; ++x, ++data) {
       int r = GET_DATA_BYTE(data, COLOR_RED);
       int g = GET_DATA_BYTE(data, COLOR_GREEN);
       int b = GET_DATA_BYTE(data, COLOR_BLUE);
       red_stats.add(r, 1);
       green_stats.add(g, 1);
       blue_stats.add(b, 1);
     }
   }
   // Find the RGB component with the greatest 8th-ile-range.
   // 8th-iles are used instead of quartiles to get closer to the true
   // foreground color, which is going to be faint at best because of the
   // pre-scaling of the input image.
   int best_l8 = static_cast<int>(red_stats.ile(0.125f));
   int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
   int best_i8r = best_u8 - best_l8;
   int x_color = COLOR_RED;
   int y1_color = COLOR_GREEN;
   int y2_color = COLOR_BLUE;
   int l8 = static_cast<int>(green_stats.ile(0.125f));
   int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
   if (u8 - l8 > best_i8r) {
     best_i8r = u8 - l8;
     best_l8 = l8;
     best_u8 = u8;
     x_color = COLOR_GREEN;
     y1_color = COLOR_RED;
   }
   l8 = static_cast<int>(blue_stats.ile(0.125f));
   u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
   if (u8 - l8 > best_i8r) {
     best_i8r = u8 - l8;
     best_l8 = l8;
     best_u8 = u8;
     x_color = COLOR_BLUE;
     y1_color = COLOR_GREEN;
     y2_color = COLOR_RED;
   }
   if (best_i8r >= kMinColorDifference) {
     LLSQ line1;
     LLSQ line2;
     uinT32* data = pixGetData(scaled);
     for (int im_y = 0; im_y < height_pad; ++im_y) {
       for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
         int x = GET_DATA_BYTE(data, x_color);
         int y1 = GET_DATA_BYTE(data, y1_color);
         int y2 = GET_DATA_BYTE(data, y2_color);
         line1.add(x, y1);
         line2.add(x, y2);
       }
     }
     double m1 = line1.m();
     double c1 = line1.c(m1);
     double m2 = line2.m();
     double c2 = line2.c(m2);
     double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
     rms *= kRMSFitScaling;
     // Save the results.
     color1[x_color] = ClipToByte(best_l8);
     color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
     color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
     color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
     color2[x_color] = ClipToByte(best_u8);
     color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
     color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
     color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
   } else {
     // There is only one color.
     color1[COLOR_RED] = ClipToByte(red_stats.median());
     color1[COLOR_GREEN] = ClipToByte(green_stats.median());
     color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
     color1[L_ALPHA_CHANNEL] = 0;
     memcpy(color2, color1, 4);
   }
   if (color_map1 != NULL) {
     pixSetInRectArbitrary(color_map1, scaled_box,
                           ComposeRGB(color1[COLOR_RED],
                               color1[COLOR_GREEN],
                               color1[COLOR_BLUE]));
     pixSetInRectArbitrary(color_map2, scaled_box,
                           ComposeRGB(color2[COLOR_RED],
                               color2[COLOR_GREEN],
                               color2[COLOR_BLUE]));
     pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
   }
   pixDestroy(&scaled);
   boxDestroy(&scaled_box);
 }

void tesseract::ImageFind::ConnCompAndRectangularize	(	Pix *	pix,
		Boxa **	boxa,
		Pixa **	pixa
	)

static

Definition at line 133 of file imagefind.cpp.

                                                                             {
   *boxa = NULL;
   *pixa = NULL;
 
   if (textord_tabfind_show_images)
     pixWrite("junkconncompimage.png", pix, IFF_PNG);
   // Find the individual image regions in the mask image.
   *boxa = pixConnComp(pix, pixa, 8);
   // Rectangularize the individual images. If a sharp edge in vertical and/or
   // horizontal occupancy can be found, it indicates a probably rectangular
   // image with unwanted bits merged on, so clip to the approximate rectangle.
   int npixes = pixaGetCount(*pixa);
   for (int i = 0; i < npixes; ++i) {
     int x_start, x_end, y_start, y_end;
     Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
     pixDisplayWrite(img_pix, textord_tabfind_show_images);
     if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
                              kMaxRectangularFraction,
                              kMaxRectangularGradient,
                              &x_start, &y_start, &x_end, &y_end)) {
       Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
       pixSetAll(simple_pix);
       pixDestroy(&img_pix);
       // pixaReplacePix takes ownership of the simple_pix.
       pixaReplacePix(*pixa, i, simple_pix, NULL);
       img_pix = pixaGetPix(*pixa, i, L_CLONE);
       // Fix the box to match the new pix.
       l_int32 x, y, width, height;
       boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
       Box* simple_box = boxCreate(x + x_start, y + y_start,
                                   x_end - x_start, y_end - y_start);
       boxaReplaceBox(*boxa, i, simple_box);
     }
     pixDestroy(&img_pix);
   }
 }

int tesseract::ImageFind::CountPixelsInRotatedBox	(	TBOX	box,
		const TBOX &	im_box,
		const FCOORD &	rotation,
		Pix *	pix
	)

static

Definition at line 573 of file imagefind.cpp.

                                                                          {
   // Intersect it with the image box.
   box &= im_box;  // This is in-place box intersection.
   if (box.null_box())
     return 0;
   box.rotate(rotation);
   TBOX rotated_im_box(im_box);
   rotated_im_box.rotate(rotation);
   Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
   pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
               PIX_SRC, pix, box.left() - rotated_im_box.left(),
               rotated_im_box.top() - box.top());
   l_int32 result;
   pixCountPixels(rect_pix, &result, NULL);
   pixDestroy(&rect_pix);
   return result;
 }

void tesseract::ImageFind::FindImagePartitions	(	Pix *	image_pix,
		const FCOORD &	rotation,
		const FCOORD &	rerotation,
		TO_BLOCK *	block,
		TabFind *	tab_grid,
		ColPartitionGrid *	part_grid,
		ColPartition_LIST *	big_parts
	)

static

Definition at line 1274 of file imagefind.cpp.

                                                                  {
   int imageheight = pixGetHeight(image_pix);
   Boxa* boxa;
   Pixa* pixa;
   ConnCompAndRectangularize(image_pix, &boxa, &pixa);
   // Iterate the connected components in the image regions mask.
   int nboxes = boxaGetCount(boxa);
   for (int i = 0; i < nboxes; ++i) {
     l_int32 x, y, width, height;
     boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
     Pix* pix = pixaGetPix(pixa, i, L_CLONE);
     TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
     im_box.rotate(rotation);  // Now matches all partitions and blobs.
     ColPartitionGridSearch rectsearch(part_grid);
     rectsearch.SetUniqueMode(true);
     ColPartition_LIST part_list;
     DivideImageIntoParts(im_box, rotation, rerotation, pix,
                          &rectsearch, &part_list);
     if (textord_tabfind_show_images) {
       pixWrite("junkimagecomponent.png", pix, IFF_PNG);
       tprintf("Component has %d parts\n", part_list.length());
     }
     pixDestroy(&pix);
     if (!part_list.empty()) {
       ColPartition_IT part_it(&part_list);
       if (part_list.singleton()) {
         // We didn't have to chop it into a polygon to fit around text, so
         // try expanding it to merge fragmented image parts, as long as it
         // doesn't touch strong text.
         ColPartition* part = part_it.extract();
         TBOX text_box(im_box);
         MaximalImageBoundingBox(part_grid, &text_box);
         while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
         part_it.set_to_list(&part_list);
         part_it.add_after_then_move(part);
         im_box = part->bounding_box();
       }
       EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
       // Iterate the part_list and put the parts into the grid.
       for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
         ColPartition* image_part = part_it.extract();
         im_box = image_part->bounding_box();
         part_grid->InsertBBox(true, true, image_part);
         if (!part_it.at_last()) {
           ColPartition* neighbour = part_it.data_relative(1);
           image_part->AddPartner(false, neighbour);
           neighbour->AddPartner(true, image_part);
         }
       }
     }
   }
   boxaDestroy(&boxa);
   pixaDestroy(&pixa);
   DeleteSmallImages(part_grid);
   if (textord_tabfind_show_images) {
     ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
     part_grid->DisplayBoxes(images_win_);
   }
 }

Pix * tesseract::ImageFind::FindImages ( Pix * pix )

static

Definition at line 65 of file imagefind.cpp.

                                    {
   // Not worth looking at small images.
   if (pixGetWidth(pix) < kMinImageFindSize ||
       pixGetHeight(pix) < kMinImageFindSize)
     return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
   // Reduce by factor 2.
   Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
   pixDisplayWrite(pixr, textord_tabfind_show_images);
 
   // Get the halftone mask directly from Leptonica.
   l_int32 ht_found = 0;
   Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found,
                                    textord_tabfind_show_images);
   pixDestroy(&pixr);
   if (!ht_found && pixht2 != NULL)
     pixDestroy(&pixht2);
   if (pixht2 == NULL)
     return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
 
   // Expand back up again.
   Pix *pixht = pixExpandReplicate(pixht2, 2);
   pixDisplayWrite(pixht, textord_tabfind_show_images);
   pixDestroy(&pixht2);
 
   // Fill to capture pixels near the mask edges that were missed
   Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
   pixOr(pixht, pixht, pixt);
   pixDestroy(&pixt);
 
   // Eliminate lines and bars that may be joined to images.
   Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
   pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
   pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
   Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
   Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
   pixDestroy(&pixreduced);
   pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
   Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
   pixDestroy(&pixreduced2);
   pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
   // Combine the coarse and fine image masks.
   pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
   pixDestroy(&pixfinemask);
   // Dilate a bit to make sure we get everything.
   pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
   Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
   pixDestroy(&pixcoarsemask);
   if (textord_tabfind_show_images)
     pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
   // And the image mask with the line and bar remover.
   pixAnd(pixht, pixht, pixmask);
   pixDestroy(&pixmask);
   if (textord_tabfind_show_images)
     pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
   // Make the result image the same size as the input.
   Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
   pixOr(result, result, pixht);
   pixDestroy(&pixht);
   return result;
 }

bool tesseract::ImageFind::pixNearlyRectangular	(	Pix *	pix,
		double	min_fraction,
		double	max_fraction,
		double	max_skew_gradient,
		int *	x_start,
		int *	y_start,
		int *	x_end,
		int *	y_end
	)

static

Definition at line 242 of file imagefind.cpp.

                                                              {
   ASSERT_HOST(pix != NULL);
   *x_start = 0;
   *x_end = pixGetWidth(pix);
   *y_start = 0;
   *y_end = pixGetHeight(pix);
 
   uinT32* data = pixGetData(pix);
   int wpl = pixGetWpl(pix);
   bool any_cut = false;
   bool left_done = false;
   bool right_done = false;
   bool top_done = false;
   bool bottom_done = false;
   do {
     any_cut = false;
     // Find the top/bottom edges.
     int width = *x_end - *x_start;
     int min_count = static_cast<int>(width * min_fraction);
     int max_count = static_cast<int>(width * max_fraction);
     int edge_width = static_cast<int>(width * max_skew_gradient);
     if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
                      max_count, *y_end, 1, y_start) && !top_done) {
       top_done = true;
       any_cut = true;
     }
     --(*y_end);
     if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
                      max_count, *y_start, -1, y_end) && !bottom_done) {
       bottom_done = true;
       any_cut = true;
     }
     ++(*y_end);
 
     // Find the left/right edges.
     int height = *y_end - *y_start;
     min_count = static_cast<int>(height * min_fraction);
     max_count = static_cast<int>(height * max_fraction);
     edge_width = static_cast<int>(height * max_skew_gradient);
     if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
                      max_count, *x_end, 1, x_start) && !left_done) {
       left_done = true;
       any_cut = true;
     }
     --(*x_end);
     if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
                      max_count, *x_start, -1, x_end) && !right_done) {
       right_done = true;
       any_cut = true;
     }
     ++(*x_end);
   } while (any_cut);
 
   // All edges must satisfy the condition of sharp gradient in pixel density
   // in order for the full rectangle to be present.
   return left_done && right_done && top_done && bottom_done;
 }

void tesseract::ImageFind::TransferImagePartsToImageMask	(	const FCOORD &	rerotation,
		ColPartitionGrid *	part_grid,
		Pix *	image_mask
	)

static

Definition at line 1221 of file imagefind.cpp.

                                                                {
   // Extract the noise parts from the grid and put them on a temporary list.
   ColPartition_LIST parts_list;
   ColPartition_IT part_it(&parts_list);
   ColPartitionGridSearch gsearch(part_grid);
   gsearch.StartFullSearch();
   ColPartition* part;
   while ((part = gsearch.NextFullSearch()) != NULL) {
     BlobRegionType type = part->blob_type();
     if (type  == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
       part_it.add_after_then_move(part);
       gsearch.RemoveBBox();
     }
   }
   // Render listed noise partitions to the image mask.
   MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
 }

The documentation for this class was generated from the following files:

textord/imagefind.h
textord/imagefind.cpp

Static Public Member Functions

Detailed Description

Member Function Documentation