All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
thresholder.cpp
Go to the documentation of this file.
1 // File: thresholder.cpp
3 // Description: Base API for thresolding images in tesseract.
4 // Author: Ray Smith
5 // Created: Mon May 12 11:28:15 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "allheaders.h"
21 
22 #include "thresholder.h"
23 
24 #include <string.h>
25 
26 #include "otsuthr.h"
27 
28 #include "openclwrapper.h"
29 
30 namespace tesseract {
31 
33  : pix_(NULL),
34  image_width_(0), image_height_(0),
35  pix_channels_(0), pix_wpl_(0),
36  scale_(1), yres_(300), estimated_res_(300) {
37  SetRectangle(0, 0, 0, 0);
38 }
39 
41  Clear();
42 }
43 
44 // Destroy the Pix if there is one, freeing memory.
46  pixDestroy(&pix_);
47 }
48 
49 // Return true if no image has been set.
51  return pix_ == NULL;
52 }
53 
54 // SetImage makes a copy of all the image data, so it may be deleted
55 // immediately after this call.
56 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
57 // Palette color images will not work properly and must be converted to
58 // 24 bit.
59 // Binary images of 1 bit per pixel may also be given but they must be
60 // byte packed with the MSB of the first byte being the first pixel, and a
61 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
62 void ImageThresholder::SetImage(const unsigned char* imagedata,
63  int width, int height,
64  int bytes_per_pixel, int bytes_per_line) {
65  int bpp = bytes_per_pixel * 8;
66  if (bpp == 0) bpp = 1;
67  Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
68  l_uint32* data = pixGetData(pix);
69  int wpl = pixGetWpl(pix);
70  switch (bpp) {
71  case 1:
72  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
73  for (int x = 0; x < width; ++x) {
74  if (imagedata[x / 8] & (0x80 >> (x % 8)))
75  CLEAR_DATA_BIT(data, x);
76  else
77  SET_DATA_BIT(data, x);
78  }
79  }
80  break;
81 
82  case 8:
83  // Greyscale just copies the bytes in the right order.
84  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
85  for (int x = 0; x < width; ++x)
86  SET_DATA_BYTE(data, x, imagedata[x]);
87  }
88  break;
89 
90  case 24:
91  // Put the colors in the correct places in the line buffer.
92  for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
93  for (int x = 0; x < width; ++x, ++data) {
94  SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
95  SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
96  SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
97  }
98  }
99  break;
100 
101  case 32:
102  // Maintain byte order consistency across different endianness.
103  for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
104  for (int x = 0; x < width; ++x) {
105  data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
106  (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
107  }
108  }
109  break;
110 
111  default:
112  tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
113  }
114  pixSetYRes(pix, 300);
115  SetImage(pix);
116  pixDestroy(&pix);
117 }
118 
119 // Store the coordinates of the rectangle to process for later use.
120 // Doesn't actually do any thresholding.
121 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
122  rect_left_ = left;
123  rect_top_ = top;
124  rect_width_ = width;
125  rect_height_ = height;
126 }
127 
128 // Get enough parameters to be able to rebuild bounding boxes in the
129 // original image (not just within the rectangle).
130 // Left and top are enough with top-down coordinates, but
131 // the height of the rectangle and the image are needed for bottom-up.
132 void ImageThresholder::GetImageSizes(int* left, int* top,
133  int* width, int* height,
134  int* imagewidth, int* imageheight) {
135  *left = rect_left_;
136  *top = rect_top_;
137  *width = rect_width_;
138  *height = rect_height_;
139  *imagewidth = image_width_;
140  *imageheight = image_height_;
141 }
142 
143 // Pix vs raw, which to use? Pix is the preferred input for efficiency,
144 // since raw buffers are copied.
145 // SetImage for Pix clones its input, so the source pix may be pixDestroyed
146 // immediately after, but may not go away until after the Thresholder has
147 // finished with it.
148 void ImageThresholder::SetImage(const Pix* pix) {
149  if (pix_ != NULL)
150  pixDestroy(&pix_);
151  Pix* src = const_cast<Pix*>(pix);
152  int depth;
153  pixGetDimensions(src, &image_width_, &image_height_, &depth);
154  // Convert the image as necessary so it is one of binary, plain RGB, or
155  // 8 bit with no colormap.
156  if (depth > 1 && depth < 8) {
157  pix_ = pixConvertTo8(src, false);
158  } else if (pixGetColormap(src)) {
159  pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
160  } else {
161  pix_ = pixClone(src);
162  }
163  depth = pixGetDepth(pix_);
164  pix_channels_ = depth / 8;
165  pix_wpl_ = pixGetWpl(pix_);
166  scale_ = 1;
167  estimated_res_ = yres_ = pixGetYRes(src);
168  Init();
169 }
170 
171 // Threshold the source image as efficiently as possible to the output Pix.
172 // Creates a Pix and sets pix to point to the resulting pointer.
173 // Caller must use pixDestroy to free the created Pix.
174 void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
175  if (pix_channels_ == 0) {
176  // We have a binary image, so it just has to be cloned.
177  *pix = GetPixRect();
178  } else {
180  }
181 }
182 
183 // Gets a pix that contains an 8 bit threshold value at each pixel. The
184 // returned pix may be an integer reduction of the binary image such that
185 // the scale factor may be inferred from the ratio of the sizes, even down
186 // to the extreme of a 1x1 pixel thresholds image.
187 // Ideally the 8 bit threshold should be the exact threshold used to generate
188 // the binary image in ThresholdToPix, but this is not a hard constraint.
189 // Returns NULL if the input is binary. PixDestroy after use.
191  if (IsBinary()) return NULL;
192  Pix* pix_grey = GetPixRectGrey();
193  int width = pixGetWidth(pix_grey);
194  int height = pixGetHeight(pix_grey);
195  int* thresholds;
196  int* hi_values;
197  OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
198  pixDestroy(&pix_grey);
199  Pix* pix_thresholds = pixCreate(width, height, 8);
200  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
201  pixSetAllArbitrary(pix_thresholds, threshold);
202  delete [] thresholds;
203  delete [] hi_values;
204  return pix_thresholds;
205 }
206 
207 // Common initialization shared between SetImage methods.
210 }
211 
212 // Get a clone/copy of the source image rectangle.
213 // The returned Pix must be pixDestroyed.
214 // This function will be used in the future by the page layout analysis, and
215 // the layout analysis that uses it will only be available with Leptonica,
216 // so there is no raw equivalent.
218  if (IsFullImage()) {
219  // Just clone the whole thing.
220  return pixClone(pix_);
221  } else {
222  // Crop to the given rectangle.
223  Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
224  Pix* cropped = pixClipRectangle(pix_, box, NULL);
225  boxDestroy(&box);
226  return cropped;
227  }
228 }
229 
230 // Get a clone/copy of the source image rectangle, reduced to greyscale,
231 // and at the same resolution as the output binary.
232 // The returned Pix must be pixDestroyed.
233 // Provided to the classifier to extract features from the greyscale image.
235  Pix* pix = GetPixRect(); // May have to be reduced to grey.
236  int depth = pixGetDepth(pix);
237  if (depth != 8) {
238  Pix* result = depth < 8 ? pixConvertTo8(pix, false)
239  : pixConvertRGBToLuminance(pix);
240  pixDestroy(&pix);
241  return result;
242  }
243  return pix;
244 }
245 
246 // Otsu thresholds the rectangle, taking the rectangle from *this.
248  Pix** out_pix) const {
249  PERF_COUNT_START("OtsuThresholdRectToPix")
250  int* thresholds;
251  int* hi_values;
252 
253  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
254  rect_height_, &thresholds, &hi_values);
255  // only use opencl if compiled w/ OpenCL and selected device is opencl
256 #ifdef USE_OPENCL
257  OpenclDevice od;
258  if ((num_channels == 4 || num_channels == 1) &&
259  od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
260  od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix),
261  num_channels, pixGetWpl(src_pix) * 4,
262  thresholds, hi_values, out_pix /*pix_OCL*/,
264  } else {
265 #endif
266  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
267 #ifdef USE_OPENCL
268  }
269 #endif
270  delete [] thresholds;
271  delete [] hi_values;
272 
274 }
275 
279 // arrays and also the bytes per pixel in src_pix.
281  int num_channels,
282  const int* thresholds,
283  const int* hi_values,
284  Pix** pix) const {
285  PERF_COUNT_START("ThresholdRectToPix")
286  *pix = pixCreate(rect_width_, rect_height_, 1);
287  uinT32* pixdata = pixGetData(*pix);
288  int wpl = pixGetWpl(*pix);
289  int src_wpl = pixGetWpl(src_pix);
290  uinT32* srcdata = pixGetData(src_pix);
291  for (int y = 0; y < rect_height_; ++y) {
292  const uinT32* linedata = srcdata + (y + rect_top_) * src_wpl;
293  uinT32* pixline = pixdata + y * wpl;
294  for (int x = 0; x < rect_width_; ++x) {
295  bool white_result = true;
296  for (int ch = 0; ch < num_channels; ++ch) {
297  int pixel = GET_DATA_BYTE(const_cast<void*>(
298  reinterpret_cast<const void *>(linedata)),
299  (x + rect_left_) * num_channels + ch);
300  if (hi_values[ch] >= 0 &&
301  (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
302  white_result = false;
303  break;
304  }
305  }
306  if (white_result)
307  CLEAR_DATA_BIT(pixline, x);
308  else
309  SET_DATA_BIT(pixline, x);
310  }
311  }
312 
314 }
315 
316 } // namespace tesseract.
317 
virtual Pix * GetPixRectThresholds()
#define tprintf(...)
Definition: tprintf.h:31
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
virtual void Init()
Common initialization shared between SetImage methods.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:75
void SetRectangle(int left, int top, int width, int height)
virtual Pix * GetPixRectGrey()
unsigned int uinT32
Definition: host.h:103
#define PERF_COUNT_START(FUNCT_NAME)
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
#define PERF_COUNT_END
bool IsFullImage() const
Return true if we are processing the full image.
Definition: thresholder.h:152
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
#define NULL
Definition: host.h:144
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
Definition: otsuthr.cpp:39
void ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds, const int *hi_values, Pix **pix) const