All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
char_samp.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: char_samp.h
3  * Description: Declaration of a Character Bitmap Sample Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The CharSamp inherits the Bmp8 class that represents images of
21 // words, characters and segments throughout Cube
22 // CharSamp adds more data members to hold the physical location of the image
23 // in a page, page number in a book if available.
24 // It also holds the label (GT) of the image that might correspond to a single
25 // character or a word
26 // It also provides methods for segmenting, scaling and cropping of the sample
27 
28 #ifndef CHAR_SAMP_H
29 #define CHAR_SAMP_H
30 
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string>
34 #include "bmp_8.h"
35 #include "string_32.h"
36 
37 namespace tesseract {
38 
39 class CharSamp : public Bmp8 {
40  public:
41  CharSamp();
42  CharSamp(int wid, int hgt);
43  CharSamp(int left, int top, int wid, int hgt);
44  ~CharSamp();
45  // accessor methods
46  unsigned short Left() const { return left_; }
47  unsigned short Right() const { return left_ + wid_; }
48  unsigned short Top() const { return top_; }
49  unsigned short Bottom() const { return top_ + hgt_; }
50  unsigned short Page() const { return page_; }
51  unsigned short NormTop() const { return norm_top_; }
52  unsigned short NormBottom() const { return norm_bottom_; }
53  unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
54  unsigned short FirstChar() const { return first_char_; }
55  unsigned short LastChar() const { return last_char_; }
56  char_32 Label() const {
57  if (label32_ == NULL || LabelLen() != 1) {
58  return 0;
59  }
60  return label32_[0];
61  }
62  char_32 * StrLabel() const { return label32_; }
63  string stringLabel() const;
64 
65  void SetLeft(unsigned short left) { left_ = left; }
66  void SetTop(unsigned short top) { top_ = top; }
67  void SetPage(unsigned short page) { page_ = page; }
68  void SetLabel(char_32 label) {
69  if (label32_ != NULL) {
70  delete []label32_;
71  }
72  label32_ = new char_32[2];
73  if (label32_ != NULL) {
74  label32_[0] = label;
75  label32_[1] = 0;
76  }
77  }
78  void SetLabel(const char_32 *label32) {
79  if (label32_ != NULL) {
80  delete []label32_;
81  label32_ = NULL;
82  }
83  if (label32 != NULL) {
84  // remove any byte order markes if any
85  if (label32[0] == 0xfeff) {
86  label32++;
87  }
88  int len = LabelLen(label32);
89  label32_ = new char_32[len + 1];
90  if (label32_ != NULL) {
91  memcpy(label32_, label32, len * sizeof(*label32));
92  label32_[len] = 0;
93  }
94  }
95  }
96  void SetLabel(string str);
97  void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
98  void SetNormBottom(unsigned short norm_bottom) {
99  norm_bottom_ = norm_bottom;
100  }
101  void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
102  norm_aspect_ratio_ = norm_aspect_ratio;
103  }
104  void SetFirstChar(unsigned short first_char) {
105  first_char_ = first_char;
106  }
107  void SetLastChar(unsigned short last_char) {
108  last_char_ = last_char;
109  }
110 
111  // Saves the charsamp to a dump file
112  bool Save2CharDumpFile(FILE *fp) const;
113  // Crops the underlying image and returns a new CharSamp with the
114  // same character information but new dimensions. Warning: does not
115  // necessarily set the normalized top and bottom correctly since
116  // those depend on its location within the word (or CubeSearchObject).
117  CharSamp *Crop();
118  // Computes the connected components of the char sample
119  ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
120  int min_con_comp_size) const;
121  // returns a copy of the charsamp that is scaled to the
122  // specified width and height
123  CharSamp *Scale(int wid, int hgt, bool isotropic = true);
124  // returns a Clone of the charsample
125  CharSamp *Clone() const;
126  // computes the features corresponding to the char sample
127  bool ComputeFeatures(int conv_grid_size, float *features);
128  // Load a Char Samp from a dump file
129  static CharSamp *FromCharDumpFile(CachedFile *fp);
130  static CharSamp *FromCharDumpFile(FILE *fp);
131  static CharSamp *FromCharDumpFile(unsigned char **raw_data);
132  static CharSamp *FromRawData(int left, int top, int wid, int hgt,
133  unsigned char *data);
134  static CharSamp *FromConComps(ConComp **concomp_array,
135  int strt_concomp, int seg_flags_size,
136  int *seg_flags, bool *left_most,
137  bool *right_most, int word_hgt);
138  static int AuxFeatureCnt() { return (5); }
139  // Return the length of the label string
140  int LabelLen() const { return LabelLen(label32_); }
141  static int LabelLen(const char_32 *label32) {
142  if (label32 == NULL) {
143  return 0;
144  }
145  int len = 0;
146  while (label32[++len] != 0);
147  return len;
148  }
149  private:
150  char_32 * label32_;
151  unsigned short page_;
152  unsigned short left_;
153  unsigned short top_;
154  // top of sample normalized to a word height of 255
155  unsigned short norm_top_;
156  // bottom of sample normalized to a word height of 255
157  unsigned short norm_bottom_;
158  // 255 * ratio of character width to (width + height)
159  unsigned short norm_aspect_ratio_;
160  unsigned short first_char_;
161  unsigned short last_char_;
162 };
163 
164 }
165 
166 #endif // CHAR_SAMP_H
unsigned short Right() const
Definition: char_samp.h:47
unsigned short Page() const
Definition: char_samp.h:50
static int LabelLen(const char_32 *label32)
Definition: char_samp.h:141
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
void SetTop(unsigned short top)
Definition: char_samp.h:66
void SetNormBottom(unsigned short norm_bottom)
Definition: char_samp.h:98
unsigned short Left() const
Definition: char_samp.h:46
static CharSamp * FromCharDumpFile(CachedFile *fp)
Definition: char_samp.cpp:82
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
Definition: char_samp.cpp:457
unsigned short Bottom() const
Definition: char_samp.h:49
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:273
void SetPage(unsigned short page)
Definition: char_samp.h:67
void SetLastChar(unsigned short last_char)
Definition: char_samp.h:107
void SetFirstChar(unsigned short first_char)
Definition: char_samp.h:104
unsigned short hgt_
Definition: bmp_8.h:96
string stringLabel() const
Definition: char_samp.cpp:61
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:251
char_32 * StrLabel() const
Definition: char_samp.h:62
unsigned short NormBottom() const
Definition: char_samp.h:52
unsigned short wid_
Definition: bmp_8.h:95
static int AuxFeatureCnt()
Definition: char_samp.h:138
unsigned short NormTop() const
Definition: char_samp.h:51
unsigned short LastChar() const
Definition: char_samp.h:55
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
Definition: char_samp.cpp:382
unsigned short Top() const
Definition: char_samp.h:48
void SetLabel(char_32 label)
Definition: char_samp.h:68
bool Save2CharDumpFile(FILE *fp) const
Definition: char_samp.cpp:288
CharSamp * Crop()
Definition: char_samp.cpp:348
void SetLeft(unsigned short left)
Definition: char_samp.h:65
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
Definition: char_samp.h:101
bool ComputeFeatures(int conv_grid_size, float *features)
Definition: char_samp.cpp:646
CharSamp * Clone() const
Definition: char_samp.cpp:565
void SetNormTop(unsigned short norm_top)
Definition: char_samp.h:97
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
unsigned short FirstChar() const
Definition: char_samp.h:54
char_32 Label() const
Definition: char_samp.h:56
void SetLabel(const char_32 *label32)
Definition: char_samp.h:78