All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
devanagari_processing.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 
4 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
5 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
6 
7 #include "ocrblock.h"
8 #include "params.h"
9 
10 struct Pix;
11 struct Box;
12 struct Boxa;
13 
14 extern
16  "Debug level for split shiro-rekha process.");
17 
18 extern
20  "Whether to create a debug image for split shiro-rekha process.");
21 
22 class TBOX;
23 
24 namespace tesseract {
25 
27  public:
29  hist_ = NULL;
30  length_ = 0;
31  }
32 
34  Clear();
35  }
36 
37  void Clear() {
38  if (hist_) {
39  delete[] hist_;
40  }
41  length_ = 0;
42  }
43 
44  int* const hist() const {
45  return hist_;
46  }
47 
48  int length() const {
49  return length_;
50  }
51 
52  // Methods to construct histograms from images. These clear any existing data.
53  void ConstructVerticalCountHist(Pix* pix);
54  void ConstructHorizontalCountHist(Pix* pix);
55 
56  // This method returns the global-maxima for the histogram. The frequency of
57  // the global maxima is returned in count, if specified.
58  int GetHistogramMaximum(int* count) const;
59 
60  private:
61  int* hist_;
62  int length_;
63 };
64 
66  public:
68  NO_SPLIT = 0, // No splitting is performed for the phase.
69  MINIMAL_SPLIT, // Blobs are split minimally.
70  MAXIMAL_SPLIT // Blobs are split maximally.
71  };
72 
74  virtual ~ShiroRekhaSplitter();
75 
76  // Top-level method to perform splitting based on current settings.
77  // Returns true if a split was actually performed.
78  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
79  // splitting. If false, the ocr_split_strategy_ is used.
80  bool Split(bool split_for_pageseg);
81 
82  // Clears the memory held by this object.
83  void Clear();
84 
85  // Refreshes the words in the segmentation block list by using blobs in the
86  // input blob list.
87  // The segmentation block list must be set.
88  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
89 
90  // Returns true if the split strategies for pageseg and ocr are different.
92  return pageseg_split_strategy_ != ocr_split_strategy_;
93  }
94 
95  // This only keeps a copy of the block list pointer. At split call, the list
96  // object should still be alive. This block list is used as a golden
97  // segmentation when performing splitting.
98  void set_segmentation_block_list(BLOCK_LIST* block_list) {
99  segmentation_block_list_ = block_list;
100  }
101 
102  static const int kUnspecifiedXheight = -1;
103 
104  void set_global_xheight(int xheight) {
105  global_xheight_ = xheight;
106  }
107 
108  void set_perform_close(bool perform) {
109  perform_close_ = perform;
110  }
111 
112  // Returns the image obtained from shiro-rekha splitting. The returned object
113  // is owned by this class. Callers may want to clone the returned pix to keep
114  // it alive beyond the life of ShiroRekhaSplitter object.
115  Pix* splitted_image() {
116  return splitted_image_;
117  }
118 
119  // On setting the input image, a clone of it is owned by this class.
120  void set_orig_pix(Pix* pix);
121 
122  // Returns the input image provided to the object. This object is owned by
123  // this class. Callers may want to clone the returned pix to work with it.
124  Pix* orig_pix() {
125  return orig_pix_;
126  }
127 
129  return ocr_split_strategy_;
130  }
131 
133  ocr_split_strategy_ = strategy;
134  }
135 
137  return pageseg_split_strategy_;
138  }
139 
141  pageseg_split_strategy_ = strategy;
142  }
143 
144  BLOCK_LIST* segmentation_block_list() {
145  return segmentation_block_list_;
146  }
147 
148  // This method dumps a debug image to the specified location.
149  void DumpDebugImage(const char* filename) const;
150 
151  // This method returns the computed mode-height of blobs in the pix.
152  // It also prunes very small blobs from calculation. Could be used to provide
153  // a global xheight estimate for images which have the same point-size text.
154  static int GetModeHeight(Pix* pix);
155 
156  private:
157  // Method to perform a close operation on the input image. The xheight
158  // estimate decides the size of sel used.
159  static void PerformClose(Pix* pix, int xheight_estimate);
160 
161  // This method resolves the cc bbox to a particular row and returns the row's
162  // xheight. This uses block_list_ if available, else just returns the
163  // global_xheight_ estimate currently set in the object.
164  int GetXheightForCC(Box* cc_bbox);
165 
166  // Returns a list of regions (boxes) which should be cleared in the original
167  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
168  // (or less) word only. Xheight measure could be the global estimate, the row
169  // estimate, or unspecified. If unspecified, over splitting may occur, since a
170  // conservative estimate of stroke width along with an associated multiplier
171  // is used in its place. It is advisable to have a specified xheight when
172  // splitting for classification/training.
173  void SplitWordShiroRekha(SplitStrategy split_strategy,
174  Pix* pix,
175  int xheight,
176  int word_left,
177  int word_top,
178  Boxa* regions_to_clear);
179 
180  // Returns a new box object for the corresponding TBOX, based on the original
181  // image's coordinate system.
182  Box* GetBoxForTBOX(const TBOX& tbox) const;
183 
184  // This method returns y-extents of the shiro-rekha computed from the input
185  // word image.
186  static void GetShiroRekhaYExtents(Pix* word_pix,
187  int* shirorekha_top,
188  int* shirorekha_bottom,
189  int* shirorekha_ylevel);
190 
191  Pix* orig_pix_; // Just a clone of the input image passed.
192  Pix* splitted_image_; // Image produced after the last splitting round. The
193  // object is owned by this class.
194  SplitStrategy pageseg_split_strategy_;
195  SplitStrategy ocr_split_strategy_;
196  Pix* debug_image_;
197  // This block list is used as a golden segmentation when performing splitting.
198  BLOCK_LIST* segmentation_block_list_;
199  int global_xheight_;
200  bool perform_close_; // Whether a morphological close operation should be
201  // performed before CCs are run through splitting.
202 };
203 
204 } // namespace tesseract.
205 
206 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
bool devanagari_split_debugimage
void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs)
#define INT_VAR_H(name, val, comment)
Definition: params.h:265
SplitStrategy ocr_split_strategy() const
SplitStrategy pageseg_split_strategy() const
void set_pageseg_split_strategy(SplitStrategy strategy)
void set_segmentation_block_list(BLOCK_LIST *block_list)
bool Split(bool split_for_pageseg)
int count(LIST var_list)
Definition: oldlist.cpp:108
Definition: rect.h:30
void set_ocr_split_strategy(SplitStrategy strategy)
int GetHistogramMaximum(int *count) const
#define NULL
Definition: host.h:144
int devanagari_split_debuglevel
void DumpDebugImage(const char *filename) const
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:268