All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::CharSamp Class Reference

#include <char_samp.h>

Inheritance diagram for tesseract::CharSamp:
tesseract::Bmp8

Public Member Functions

 CharSamp ()
 
 CharSamp (int wid, int hgt)
 
 CharSamp (int left, int top, int wid, int hgt)
 
 ~CharSamp ()
 
unsigned short Left () const
 
unsigned short Right () const
 
unsigned short Top () const
 
unsigned short Bottom () const
 
unsigned short Page () const
 
unsigned short NormTop () const
 
unsigned short NormBottom () const
 
unsigned short NormAspectRatio () const
 
unsigned short FirstChar () const
 
unsigned short LastChar () const
 
char_32 Label () const
 
char_32StrLabel () const
 
string stringLabel () const
 
void SetLeft (unsigned short left)
 
void SetTop (unsigned short top)
 
void SetPage (unsigned short page)
 
void SetLabel (char_32 label)
 
void SetLabel (const char_32 *label32)
 
void SetLabel (string str)
 
void SetNormTop (unsigned short norm_top)
 
void SetNormBottom (unsigned short norm_bottom)
 
void SetNormAspectRatio (unsigned short norm_aspect_ratio)
 
void SetFirstChar (unsigned short first_char)
 
void SetLastChar (unsigned short last_char)
 
bool Save2CharDumpFile (FILE *fp) const
 
CharSampCrop ()
 
ConComp ** Segment (int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
 
CharSampScale (int wid, int hgt, bool isotropic=true)
 
CharSampClone () const
 
bool ComputeFeatures (int conv_grid_size, float *features)
 
int LabelLen () const
 
- Public Member Functions inherited from tesseract::Bmp8
 Bmp8 (unsigned short wid, unsigned short hgt)
 
 ~Bmp8 ()
 
bool Clear ()
 
unsigned short Width () const
 
unsigned short Stride () const
 
unsigned short Height () const
 
unsigned char * RawData () const
 
bool ScaleFrom (Bmp8 *bmp, bool isotropic=true)
 
bool Deslant ()
 
bool HorizontalDeslant (double *deslant_angle)
 
bool IsIdentical (Bmp8 *pBmp) const
 
ConComp ** FindConComps (int *concomp_cnt, int min_size) const
 
float ForegroundRatio () const
 
float MeanHorizontalHistogramEntropy () const
 
int * HorizontalHistogram () const
 

Static Public Member Functions

static CharSampFromCharDumpFile (CachedFile *fp)
 
static CharSampFromCharDumpFile (FILE *fp)
 
static CharSampFromCharDumpFile (unsigned char **raw_data)
 
static CharSampFromRawData (int left, int top, int wid, int hgt, unsigned char *data)
 
static CharSampFromConComps (ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
 
static int AuxFeatureCnt ()
 
static int LabelLen (const char_32 *label32)
 
- Static Public Member Functions inherited from tesseract::Bmp8
static Bmp8FromCharDumpFile (CachedFile *fp)
 
static Bmp8FromCharDumpFile (FILE *fp)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::Bmp8
bool LoadFromCharDumpFile (CachedFile *fp)
 
bool LoadFromCharDumpFile (FILE *fp)
 
bool LoadFromCharDumpFile (unsigned char **raw_data)
 
bool LoadFromRawData (unsigned char *data)
 
bool SaveBmp2CharDumpFile (FILE *fp) const
 
bool IsBlankColumn (int x) const
 
bool IsBlankRow (int y) const
 
void Crop (int *xst_src, int *yst_src, int *wid, int *hgt)
 
void Copy (int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
 
- Protected Attributes inherited from tesseract::Bmp8
unsigned short wid_
 
unsigned short hgt_
 
unsigned char ** line_buff_
 
- Static Protected Attributes inherited from tesseract::Bmp8
static const int kConCompAllocChunk = 16
 
static const int kDeslantAngleCount
 

Detailed Description

Definition at line 39 of file char_samp.h.

Constructor & Destructor Documentation

tesseract::CharSamp::CharSamp ( )

Definition at line 29 of file char_samp.cpp.

30  : Bmp8(0, 0) {
31  left_ = 0;
32  top_ = 0;
33  label32_ = NULL;
34  page_ = -1;
35 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38
#define NULL
Definition: host.h:144
tesseract::CharSamp::CharSamp ( int  wid,
int  hgt 
)

Definition at line 37 of file char_samp.cpp.

38  : Bmp8(wid, hgt) {
39  left_ = 0;
40  top_ = 0;
41  label32_ = NULL;
42  page_ = -1;
43 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38
#define NULL
Definition: host.h:144
tesseract::CharSamp::CharSamp ( int  left,
int  top,
int  wid,
int  hgt 
)

Definition at line 45 of file char_samp.cpp.

46  : Bmp8(wid, hgt)
47  , left_(left)
48  , top_(top) {
49  label32_ = NULL;
50  page_ = -1;
51 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38
#define NULL
Definition: host.h:144
tesseract::CharSamp::~CharSamp ( )

Definition at line 53 of file char_samp.cpp.

53  {
54  if (label32_ != NULL) {
55  delete []label32_;
56  label32_ = NULL;
57  }
58 }
#define NULL
Definition: host.h:144

Member Function Documentation

static int tesseract::CharSamp::AuxFeatureCnt ( )
inlinestatic

Definition at line 138 of file char_samp.h.

138 { return (5); }
unsigned short tesseract::CharSamp::Bottom ( ) const
inline

Definition at line 49 of file char_samp.h.

49 { return top_ + hgt_; }
unsigned short hgt_
Definition: bmp_8.h:96
CharSamp * tesseract::CharSamp::Clone ( ) const

Definition at line 565 of file char_samp.cpp.

565  {
566  // create the cropped char samp
567  CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
568  samp->SetLabel(label32_);
569  samp->SetFirstChar(first_char_);
570  samp->SetLastChar(last_char_);
571  samp->SetNormTop(norm_top_);
572  samp->SetNormBottom(norm_bottom_);
573  samp->SetNormAspectRatio(norm_aspect_ratio_);
574  // copy the bitmap to the cropped img
575  Copy(0, 0, wid_, hgt_, samp);
576  return samp;
577 }
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:578
unsigned short hgt_
Definition: bmp_8.h:96
unsigned short wid_
Definition: bmp_8.h:95
bool tesseract::CharSamp::ComputeFeatures ( int  conv_grid_size,
float *  features 
)

Definition at line 646 of file char_samp.cpp.

646  {
647  // Create a scaled BMP
648  CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
649  if (!scaled_bmp) {
650  return false;
651  }
652  // prepare input
653  unsigned char *buff = scaled_bmp->RawData();
654  // bitmap features
655  int input;
656  int bmp_size = conv_grid_size * conv_grid_size;
657  for (input = 0; input < bmp_size; input++) {
658  features[input] = 255.0f - (1.0f * buff[input]);
659  }
660  // word context features
661  features[input++] = FirstChar();
662  features[input++] = LastChar();
663  features[input++] = NormTop();
664  features[input++] = NormBottom();
665  features[input++] = NormAspectRatio();
666  delete scaled_bmp;
667  return true;
668 }
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:251
unsigned short NormBottom() const
Definition: char_samp.h:52
unsigned short NormTop() const
Definition: char_samp.h:51
unsigned short LastChar() const
Definition: char_samp.h:55
unsigned short FirstChar() const
Definition: char_samp.h:54
CharSamp * tesseract::CharSamp::Crop ( )

Definition at line 348 of file char_samp.cpp.

348  {
349  // get the dimesions of the cropped img
350  int cropped_left = 0;
351  int cropped_top = 0;
352  int cropped_wid = wid_;
353  int cropped_hgt = hgt_;
354  Bmp8::Crop(&cropped_left, &cropped_top,
355  &cropped_wid, &cropped_hgt);
356 
357  if (cropped_wid == 0 || cropped_hgt == 0) {
358  return NULL;
359  }
360  // create the cropped char samp
361  CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
362  top_ + cropped_top,
363  cropped_wid, cropped_hgt);
364  cropped_samp->SetLabel(label32_);
365  cropped_samp->SetFirstChar(first_char_);
366  cropped_samp->SetLastChar(last_char_);
367  // the following 3 fields may/should be reset by the calling function
368  // using context information, i.e., location of character box
369  // w.r.t. the word bounding box
370  cropped_samp->SetNormAspectRatio(255 *
371  cropped_wid / (cropped_wid + cropped_hgt));
372  cropped_samp->SetNormTop(0);
373  cropped_samp->SetNormBottom(255);
374 
375  // copy the bitmap to the cropped img
376  Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
377  return cropped_samp;
378 }
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:578
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
Definition: bmp_8.cpp:348
unsigned short hgt_
Definition: bmp_8.h:96
unsigned short wid_
Definition: bmp_8.h:95
#define NULL
Definition: host.h:144
unsigned short tesseract::CharSamp::FirstChar ( ) const
inline

Definition at line 54 of file char_samp.h.

54 { return first_char_; }
CharSamp * tesseract::CharSamp::FromCharDumpFile ( CachedFile fp)
static

Definition at line 82 of file char_samp.cpp.

82  {
83  unsigned short left;
84  unsigned short top;
85  unsigned short page;
86  unsigned short first_char;
87  unsigned short last_char;
88  unsigned short norm_top;
89  unsigned short norm_bottom;
90  unsigned short norm_aspect_ratio;
91  unsigned int val32;
92 
93  char_32 *label32;
94 
95  // read and check 32 bit marker
96  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
97  return NULL;
98  }
99  if (val32 != 0xabd0fefe) {
100  return NULL;
101  }
102  // read label length,
103  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
104  return NULL;
105  }
106  // the label is not null terminated in the file
107  if (val32 > 0 && val32 < MAX_UINT32) {
108  label32 = new char_32[val32 + 1];
109  if (label32 == NULL) {
110  return NULL;
111  }
112  // read label
113  if (fp->Read(label32, val32 * sizeof(*label32)) !=
114  (val32 * sizeof(*label32))) {
115  return NULL;
116  }
117  // null terminate
118  label32[val32] = 0;
119  } else {
120  label32 = NULL;
121  }
122  // read coordinates
123  if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
124  return NULL;
125  }
126  if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
127  return NULL;
128  }
129  if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
130  return NULL;
131  }
132  if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
133  return NULL;
134  }
135  if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
136  return NULL;
137  }
138  if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
139  return NULL;
140  }
141  if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
142  return NULL;
143  }
144  if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
145  sizeof(norm_aspect_ratio)) {
146  return NULL;
147  }
148  // create the object
149  CharSamp *char_samp = new CharSamp();
150  if (char_samp == NULL) {
151  return NULL;
152  }
153  // init
154  char_samp->label32_ = label32;
155  char_samp->page_ = page;
156  char_samp->left_ = left;
157  char_samp->top_ = top;
158  char_samp->first_char_ = first_char;
159  char_samp->last_char_ = last_char;
160  char_samp->norm_top_ = norm_top;
161  char_samp->norm_bottom_ = norm_bottom;
162  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
163  // load the Bmp8 part
164  if (char_samp->LoadFromCharDumpFile(fp) == false) {
165  delete char_samp;
166  return NULL;
167  }
168  return char_samp;
169 }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
#define MAX_UINT32
Definition: host.h:123
CharSamp * tesseract::CharSamp::FromCharDumpFile ( FILE *  fp)
static

Definition at line 172 of file char_samp.cpp.

172  {
173  unsigned short left;
174  unsigned short top;
175  unsigned short page;
176  unsigned short first_char;
177  unsigned short last_char;
178  unsigned short norm_top;
179  unsigned short norm_bottom;
180  unsigned short norm_aspect_ratio;
181  unsigned int val32;
182  char_32 *label32;
183 
184  // read and check 32 bit marker
185  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
186  return NULL;
187  }
188  if (val32 != 0xabd0fefe) {
189  return NULL;
190  }
191  // read label length,
192  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
193  return NULL;
194  }
195  // the label is not null terminated in the file
196  if (val32 > 0 && val32 < MAX_UINT32) {
197  label32 = new char_32[val32 + 1];
198  if (label32 == NULL) {
199  return NULL;
200  }
201  // read label
202  if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
203  (val32 * sizeof(*label32))) {
204  delete [] label32;
205  return NULL;
206  }
207  // null terminate
208  label32[val32] = 0;
209  } else {
210  label32 = NULL;
211  }
212  // read coordinates
213  if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
214  fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
215  fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
216  fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
217  fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
218  fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
219  fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
220  fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
221  sizeof(norm_aspect_ratio)) {
222  delete [] label32;
223  return NULL;
224  }
225  // create the object
226  CharSamp *char_samp = new CharSamp();
227  if (char_samp == NULL) {
228  delete [] label32;
229  return NULL;
230  }
231  // init
232  char_samp->label32_ = label32;
233  char_samp->page_ = page;
234  char_samp->left_ = left;
235  char_samp->top_ = top;
236  char_samp->first_char_ = first_char;
237  char_samp->last_char_ = last_char;
238  char_samp->norm_top_ = norm_top;
239  char_samp->norm_bottom_ = norm_bottom;
240  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
241  // load the Bmp8 part
242  if (char_samp->LoadFromCharDumpFile(fp) == false) {
243  delete char_samp; // It owns label32.
244  return NULL;
245  }
246  return char_samp;
247 }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
#define MAX_UINT32
Definition: host.h:123
CharSamp * tesseract::CharSamp::FromCharDumpFile ( unsigned char **  raw_data)
static

Definition at line 580 of file char_samp.cpp.

580  {
581  unsigned int val32;
582  char_32 *label32;
583  unsigned char *raw_data = *raw_data_ptr;
584 
585  // read and check 32 bit marker
586  memcpy(&val32, raw_data, sizeof(val32));
587  raw_data += sizeof(val32);
588  if (val32 != 0xabd0fefe) {
589  return NULL;
590  }
591  // read label length,
592  memcpy(&val32, raw_data, sizeof(val32));
593  raw_data += sizeof(val32);
594  // the label is not null terminated in the file
595  if (val32 > 0 && val32 < MAX_UINT32) {
596  label32 = new char_32[val32 + 1];
597  if (label32 == NULL) {
598  return NULL;
599  }
600  // read label
601  memcpy(label32, raw_data, val32 * sizeof(*label32));
602  raw_data += (val32 * sizeof(*label32));
603  // null terminate
604  label32[val32] = 0;
605  } else {
606  label32 = NULL;
607  }
608 
609  // create the object
610  CharSamp *char_samp = new CharSamp();
611  if (char_samp == NULL) {
612  return NULL;
613  }
614 
615  // read coordinates
616  char_samp->label32_ = label32;
617  memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
618  raw_data += sizeof(char_samp->page_);
619  memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
620  raw_data += sizeof(char_samp->left_);
621  memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
622  raw_data += sizeof(char_samp->top_);
623  memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
624  raw_data += sizeof(char_samp->first_char_);
625  memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
626  raw_data += sizeof(char_samp->last_char_);
627  memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
628  raw_data += sizeof(char_samp->norm_top_);
629  memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
630  raw_data += sizeof(char_samp->norm_bottom_);
631  memcpy(&char_samp->norm_aspect_ratio_, raw_data,
632  sizeof(char_samp->norm_aspect_ratio_));
633  raw_data += sizeof(char_samp->norm_aspect_ratio_);
634 
635  // load the Bmp8 part
636  if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
637  delete char_samp;
638  return NULL;
639  }
640 
641  (*raw_data_ptr) = raw_data;
642  return char_samp;
643 }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
#define MAX_UINT32
Definition: host.h:123
CharSamp * tesseract::CharSamp::FromConComps ( ConComp **  concomp_array,
int  strt_concomp,
int  seg_flags_size,
int *  seg_flags,
bool *  left_most,
bool *  right_most,
int  word_hgt 
)
static

Definition at line 457 of file char_samp.cpp.

460  {
461  int concomp;
462  int end_concomp;
463  int concomp_cnt = 0;
464  end_concomp = strt_concomp + seg_flags_size;
465  // determine ID range
466  bool once = false;
467  int min_id = -1;
468  int max_id = -1;
469  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
470  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
471  if (!once) {
472  min_id = concomp_array[concomp]->ID();
473  max_id = concomp_array[concomp]->ID();
474  once = true;
475  } else {
476  UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
477  }
478  concomp_cnt++;
479  }
480  }
481  if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
482  return NULL;
483  }
484  // alloc memo for computing leftmost and right most attributes
485  int id_cnt = max_id - min_id + 1;
486  bool *id_exist = new bool[id_cnt];
487  bool *left_most_exist = new bool[id_cnt];
488  bool *right_most_exist = new bool[id_cnt];
489  if (!id_exist || !left_most_exist || !right_most_exist)
490  return NULL;
491  memset(id_exist, 0, id_cnt * sizeof(*id_exist));
492  memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
493  memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
494  // find the dimensions of the charsamp
495  once = false;
496  int left = -1;
497  int right = -1;
498  int top = -1;
499  int bottom = -1;
500  int unq_ids = 0;
501  int unq_left_most = 0;
502  int unq_right_most = 0;
503  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
504  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
505  if (!once) {
506  left = concomp_array[concomp]->Left();
507  right = concomp_array[concomp]->Right();
508  top = concomp_array[concomp]->Top();
509  bottom = concomp_array[concomp]->Bottom();
510  once = true;
511  } else {
512  UpdateRange(concomp_array[concomp]->Left(),
513  concomp_array[concomp]->Right(), &left, &right);
514  UpdateRange(concomp_array[concomp]->Top(),
515  concomp_array[concomp]->Bottom(), &top, &bottom);
516  }
517  // count unq ids, unq left most and right mosts ids
518  int concomp_id = concomp_array[concomp]->ID() - min_id;
519  if (!id_exist[concomp_id]) {
520  id_exist[concomp_id] = true;
521  unq_ids++;
522  }
523  if (concomp_array[concomp]->LeftMost()) {
524  if (left_most_exist[concomp_id] == false) {
525  left_most_exist[concomp_id] = true;
526  unq_left_most++;
527  }
528  }
529  if (concomp_array[concomp]->RightMost()) {
530  if (right_most_exist[concomp_id] == false) {
531  right_most_exist[concomp_id] = true;
532  unq_right_most++;
533  }
534  }
535  }
536  }
537  delete []id_exist;
538  delete []left_most_exist;
539  delete []right_most_exist;
540  if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
541  return NULL;
542  }
543  (*left_most) = (unq_left_most >= unq_ids);
544  (*right_most) = (unq_right_most >= unq_ids);
545  // create the char sample object
546  CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
547  if (!samp) {
548  return NULL;
549  }
550 
551  // set the foreground pixels
552  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
553  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
554  ConCompPt *pt_ptr = concomp_array[concomp]->Head();
555  while (pt_ptr) {
556  samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
557  pt_ptr = pt_ptr->Next();
558  }
559  }
560  }
561  return samp;
562 }
unsigned short Right() const
Definition: char_samp.h:47
unsigned short Left() const
Definition: char_samp.h:46
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
unsigned short Bottom() const
Definition: char_samp.h:49
unsigned short Top() const
Definition: char_samp.h:48
#define NULL
Definition: host.h:144
CharSamp * tesseract::CharSamp::FromRawData ( int  left,
int  top,
int  wid,
int  hgt,
unsigned char *  data 
)
static

Definition at line 273 of file char_samp.cpp.

274  {
275  // create the object
276  CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
277  if (char_samp == NULL) {
278  return NULL;
279  }
280  if (char_samp->LoadFromRawData(data) == false) {
281  delete char_samp;
282  return NULL;
283  }
284  return char_samp;
285 }
#define NULL
Definition: host.h:144
char_32 tesseract::CharSamp::Label ( ) const
inline

Definition at line 56 of file char_samp.h.

56  {
57  if (label32_ == NULL || LabelLen() != 1) {
58  return 0;
59  }
60  return label32_[0];
61  }
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
int tesseract::CharSamp::LabelLen ( ) const
inline

Definition at line 140 of file char_samp.h.

140 { return LabelLen(label32_); }
int LabelLen() const
Definition: char_samp.h:140
static int tesseract::CharSamp::LabelLen ( const char_32 label32)
inlinestatic

Definition at line 141 of file char_samp.h.

141  {
142  if (label32 == NULL) {
143  return 0;
144  }
145  int len = 0;
146  while (label32[++len] != 0);
147  return len;
148  }
#define NULL
Definition: host.h:144
unsigned short tesseract::CharSamp::LastChar ( ) const
inline

Definition at line 55 of file char_samp.h.

55 { return last_char_; }
unsigned short tesseract::CharSamp::Left ( ) const
inline

Definition at line 46 of file char_samp.h.

46 { return left_; }
unsigned short tesseract::CharSamp::NormAspectRatio ( ) const
inline

Definition at line 53 of file char_samp.h.

53 { return norm_aspect_ratio_; }
unsigned short tesseract::CharSamp::NormBottom ( ) const
inline

Definition at line 52 of file char_samp.h.

52 { return norm_bottom_; }
unsigned short tesseract::CharSamp::NormTop ( ) const
inline

Definition at line 51 of file char_samp.h.

51 { return norm_top_; }
unsigned short tesseract::CharSamp::Page ( ) const
inline

Definition at line 50 of file char_samp.h.

50 { return page_; }
unsigned short tesseract::CharSamp::Right ( ) const
inline

Definition at line 47 of file char_samp.h.

47 { return left_ + wid_; }
unsigned short wid_
Definition: bmp_8.h:95
bool tesseract::CharSamp::Save2CharDumpFile ( FILE *  fp) const

Definition at line 288 of file char_samp.cpp.

288  {
289  unsigned int val32;
290  // write and check 32 bit marker
291  val32 = 0xabd0fefe;
292  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
293  return false;
294  }
295  // write label length
296  val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
297  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
298  return false;
299  }
300  // write label
301  if (label32_ != NULL) {
302  if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
303  (val32 * sizeof(*label32_))) {
304  return false;
305  }
306  }
307  // write coordinates
308  if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
309  return false;
310  }
311  if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
312  return false;
313  }
314  if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
315  return false;
316  }
317  if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
318  sizeof(first_char_)) {
319  return false;
320  }
321  if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
322  return false;
323  }
324  if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
325  return false;
326  }
327  if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
328  sizeof(norm_bottom_)) {
329  return false;
330  }
331  if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
332  sizeof(norm_aspect_ratio_)) {
333  return false;
334  }
335  if (SaveBmp2CharDumpFile(fp) == false) {
336  return false;
337  }
338  return true;
339 }
bool SaveBmp2CharDumpFile(FILE *fp) const
Definition: bmp_8.cpp:515
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
CharSamp * tesseract::CharSamp::Scale ( int  wid,
int  hgt,
bool  isotropic = true 
)

Definition at line 251 of file char_samp.cpp.

251  {
252  CharSamp *scaled_samp = new CharSamp(wid, hgt);
253  if (scaled_samp == NULL) {
254  return NULL;
255  }
256  if (scaled_samp->ScaleFrom(this, isotropic) == false) {
257  delete scaled_samp;
258  return NULL;
259  }
260  scaled_samp->left_ = left_;
261  scaled_samp->top_ = top_;
262  scaled_samp->page_ = page_;
263  scaled_samp->SetLabel(label32_);
264  scaled_samp->first_char_ = first_char_;
265  scaled_samp->last_char_ = last_char_;
266  scaled_samp->norm_top_ = norm_top_;
267  scaled_samp->norm_bottom_ = norm_bottom_;
268  scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
269  return scaled_samp;
270 }
#define NULL
Definition: host.h:144
ConComp ** tesseract::CharSamp::Segment ( int *  seg_cnt,
bool  right_2_left,
int  max_hist_wnd,
int  min_con_comp_size 
) const

Definition at line 382 of file char_samp.cpp.

383  {
384  // init
385  (*segment_cnt) = 0;
386  int concomp_cnt = 0;
387  int seg_cnt = 0;
388  // find the concomps of the image
389  ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
390  if (concomp_cnt <= 0 || !concomp_array) {
391  if (concomp_array)
392  delete []concomp_array;
393  return NULL;
394  }
395  ConComp **seg_array = NULL;
396  // segment each concomp further using vertical histogram
397  for (int concomp = 0; concomp < concomp_cnt; concomp++) {
398  int concomp_seg_cnt = 0;
399  // segment the concomp
400  ConComp **concomp_seg_array = NULL;
401  ConComp **concomp_alloc_seg =
402  concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
403  // no segments, add the whole concomp
404  if (concomp_alloc_seg == NULL) {
405  concomp_seg_cnt = 1;
406  concomp_seg_array = concomp_array + concomp;
407  } else {
408  // delete the original concomp, we no longer need it
409  concomp_seg_array = concomp_alloc_seg;
410  delete concomp_array[concomp];
411  }
412  // add the resulting segments
413  for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
414  // too small of a segment: ignore
415  if (concomp_seg_array[seg_idx]->Width() < 2 &&
416  concomp_seg_array[seg_idx]->Height() < 2) {
417  delete concomp_seg_array[seg_idx];
418  } else {
419  // add the new segment
420  // extend the segment array
421  if ((seg_cnt % kConCompAllocChunk) == 0) {
422  ConComp **temp_segm_array =
423  new ConComp *[seg_cnt + kConCompAllocChunk];
424  if (temp_segm_array == NULL) {
425  fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
426  "allocate additional connected components\n");
427  delete []concomp_seg_array;
428  delete []concomp_array;
429  delete []seg_array;
430  return NULL;
431  }
432  if (seg_cnt > 0) {
433  memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
434  delete []seg_array;
435  }
436  seg_array = temp_segm_array;
437  }
438  seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
439  }
440  } // segment
441  if (concomp_alloc_seg != NULL) {
442  delete []concomp_alloc_seg;
443  }
444  } // concomp
445  delete []concomp_array;
446 
447  // sort the concomps from Left2Right or Right2Left, based on the reading order
448  if (seg_cnt > 0 && seg_array != NULL) {
449  qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
451  }
452  (*segment_cnt) = seg_cnt;
453  return seg_array;
454 }
static const int kConCompAllocChunk
Definition: bmp_8.h:100
static int Left2RightComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:73
unsigned short Width() const
Definition: bmp_8.h:48
static int Right2LeftComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:82
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
Definition: bmp_8.cpp:605
unsigned short Height() const
Definition: bmp_8.h:50
#define NULL
Definition: host.h:144
void tesseract::CharSamp::SetFirstChar ( unsigned short  first_char)
inline

Definition at line 104 of file char_samp.h.

104  {
105  first_char_ = first_char;
106  }
void tesseract::CharSamp::SetLabel ( char_32  label)
inline

Definition at line 68 of file char_samp.h.

68  {
69  if (label32_ != NULL) {
70  delete []label32_;
71  }
72  label32_ = new char_32[2];
73  if (label32_ != NULL) {
74  label32_[0] = label;
75  label32_[1] = 0;
76  }
77  }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
void tesseract::CharSamp::SetLabel ( const char_32 label32)
inline

Definition at line 78 of file char_samp.h.

78  {
79  if (label32_ != NULL) {
80  delete []label32_;
81  label32_ = NULL;
82  }
83  if (label32 != NULL) {
84  // remove any byte order markes if any
85  if (label32[0] == 0xfeff) {
86  label32++;
87  }
88  int len = LabelLen(label32);
89  label32_ = new char_32[len + 1];
90  if (label32_ != NULL) {
91  memcpy(label32_, label32, len * sizeof(*label32));
92  label32_[len] = 0;
93  }
94  }
95  }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
void tesseract::CharSamp::SetLabel ( string  str)

Definition at line 71 of file char_samp.cpp.

71  {
72  if (label32_ != NULL) {
73  delete []label32_;
74  label32_ = NULL;
75  }
76  string_32 str32;
77  CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
78  SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
79 }
basic_string< char_32 > string_32
Definition: string_32.h:41
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:266
void SetLabel(char_32 label)
Definition: char_samp.h:68
#define NULL
Definition: host.h:144
void tesseract::CharSamp::SetLastChar ( unsigned short  last_char)
inline

Definition at line 107 of file char_samp.h.

107  {
108  last_char_ = last_char;
109  }
void tesseract::CharSamp::SetLeft ( unsigned short  left)
inline

Definition at line 65 of file char_samp.h.

65 { left_ = left; }
void tesseract::CharSamp::SetNormAspectRatio ( unsigned short  norm_aspect_ratio)
inline

Definition at line 101 of file char_samp.h.

101  {
102  norm_aspect_ratio_ = norm_aspect_ratio;
103  }
void tesseract::CharSamp::SetNormBottom ( unsigned short  norm_bottom)
inline

Definition at line 98 of file char_samp.h.

98  {
99  norm_bottom_ = norm_bottom;
100  }
void tesseract::CharSamp::SetNormTop ( unsigned short  norm_top)
inline

Definition at line 97 of file char_samp.h.

97 { norm_top_ = norm_top; }
void tesseract::CharSamp::SetPage ( unsigned short  page)
inline

Definition at line 67 of file char_samp.h.

67 { page_ = page; }
void tesseract::CharSamp::SetTop ( unsigned short  top)
inline

Definition at line 66 of file char_samp.h.

66 { top_ = top; }
string tesseract::CharSamp::stringLabel ( ) const

Definition at line 61 of file char_samp.cpp.

61  {
62  string str = "";
63  if (label32_ != NULL) {
64  string_32 str32(label32_);
65  CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
66  }
67  return str;
68 }
basic_string< char_32 > string_32
Definition: string_32.h:41
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:282
#define NULL
Definition: host.h:144
char_32* tesseract::CharSamp::StrLabel ( ) const
inline

Definition at line 62 of file char_samp.h.

62 { return label32_; }
unsigned short tesseract::CharSamp::Top ( ) const
inline

Definition at line 48 of file char_samp.h.

48 { return top_; }

The documentation for this class was generated from the following files: