All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
char_samp.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: char_samp.cpp
3  * Description: Implementation of a Character Bitmap Sample Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <string.h>
21 #include <string>
22 #include "char_samp.h"
23 #include "cube_utils.h"
24 
25 namespace tesseract {
26 
27 #define MAX_LINE_LEN 1024
28 
30  : Bmp8(0, 0) {
31  left_ = 0;
32  top_ = 0;
33  label32_ = NULL;
34  page_ = -1;
35 }
36 
37 CharSamp::CharSamp(int wid, int hgt)
38  : Bmp8(wid, hgt) {
39  left_ = 0;
40  top_ = 0;
41  label32_ = NULL;
42  page_ = -1;
43 }
44 
45 CharSamp::CharSamp(int left, int top, int wid, int hgt)
46  : Bmp8(wid, hgt)
47  , left_(left)
48  , top_(top) {
49  label32_ = NULL;
50  page_ = -1;
51 }
52 
54  if (label32_ != NULL) {
55  delete []label32_;
56  label32_ = NULL;
57  }
58 }
59 
60 // returns a UTF-8 version of the string label
61 string CharSamp::stringLabel() const {
62  string str = "";
63  if (label32_ != NULL) {
64  string_32 str32(label32_);
65  CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
66  }
67  return str;
68 }
69 
70 // set a the string label using a UTF encoded string
71 void CharSamp::SetLabel(string str) {
72  if (label32_ != NULL) {
73  delete []label32_;
74  label32_ = NULL;
75  }
76  string_32 str32;
77  CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
78  SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
79 }
80 
81 // creates a CharSamp object from file
83  unsigned short left;
84  unsigned short top;
85  unsigned short page;
86  unsigned short first_char;
87  unsigned short last_char;
88  unsigned short norm_top;
89  unsigned short norm_bottom;
90  unsigned short norm_aspect_ratio;
91  unsigned int val32;
92 
93  char_32 *label32;
94 
95  // read and check 32 bit marker
96  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
97  return NULL;
98  }
99  if (val32 != 0xabd0fefe) {
100  return NULL;
101  }
102  // read label length,
103  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
104  return NULL;
105  }
106  // the label is not null terminated in the file
107  if (val32 > 0 && val32 < MAX_UINT32) {
108  label32 = new char_32[val32 + 1];
109  if (label32 == NULL) {
110  return NULL;
111  }
112  // read label
113  if (fp->Read(label32, val32 * sizeof(*label32)) !=
114  (val32 * sizeof(*label32))) {
115  return NULL;
116  }
117  // null terminate
118  label32[val32] = 0;
119  } else {
120  label32 = NULL;
121  }
122  // read coordinates
123  if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
124  return NULL;
125  }
126  if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
127  return NULL;
128  }
129  if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
130  return NULL;
131  }
132  if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
133  return NULL;
134  }
135  if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
136  return NULL;
137  }
138  if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
139  return NULL;
140  }
141  if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
142  return NULL;
143  }
144  if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
145  sizeof(norm_aspect_ratio)) {
146  return NULL;
147  }
148  // create the object
149  CharSamp *char_samp = new CharSamp();
150  if (char_samp == NULL) {
151  return NULL;
152  }
153  // init
154  char_samp->label32_ = label32;
155  char_samp->page_ = page;
156  char_samp->left_ = left;
157  char_samp->top_ = top;
158  char_samp->first_char_ = first_char;
159  char_samp->last_char_ = last_char;
160  char_samp->norm_top_ = norm_top;
161  char_samp->norm_bottom_ = norm_bottom;
162  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
163  // load the Bmp8 part
164  if (char_samp->LoadFromCharDumpFile(fp) == false) {
165  delete char_samp;
166  return NULL;
167  }
168  return char_samp;
169 }
170 
171 // Load a Char Samp from a dump file
173  unsigned short left;
174  unsigned short top;
175  unsigned short page;
176  unsigned short first_char;
177  unsigned short last_char;
178  unsigned short norm_top;
179  unsigned short norm_bottom;
180  unsigned short norm_aspect_ratio;
181  unsigned int val32;
182  char_32 *label32;
183 
184  // read and check 32 bit marker
185  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
186  return NULL;
187  }
188  if (val32 != 0xabd0fefe) {
189  return NULL;
190  }
191  // read label length,
192  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
193  return NULL;
194  }
195  // the label is not null terminated in the file
196  if (val32 > 0 && val32 < MAX_UINT32) {
197  label32 = new char_32[val32 + 1];
198  if (label32 == NULL) {
199  return NULL;
200  }
201  // read label
202  if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
203  (val32 * sizeof(*label32))) {
204  delete [] label32;
205  return NULL;
206  }
207  // null terminate
208  label32[val32] = 0;
209  } else {
210  label32 = NULL;
211  }
212  // read coordinates
213  if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
214  fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
215  fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
216  fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
217  fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
218  fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
219  fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
220  fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
221  sizeof(norm_aspect_ratio)) {
222  delete [] label32;
223  return NULL;
224  }
225  // create the object
226  CharSamp *char_samp = new CharSamp();
227  if (char_samp == NULL) {
228  delete [] label32;
229  return NULL;
230  }
231  // init
232  char_samp->label32_ = label32;
233  char_samp->page_ = page;
234  char_samp->left_ = left;
235  char_samp->top_ = top;
236  char_samp->first_char_ = first_char;
237  char_samp->last_char_ = last_char;
238  char_samp->norm_top_ = norm_top;
239  char_samp->norm_bottom_ = norm_bottom;
240  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
241  // load the Bmp8 part
242  if (char_samp->LoadFromCharDumpFile(fp) == false) {
243  delete char_samp; // It owns label32.
244  return NULL;
245  }
246  return char_samp;
247 }
248 
249 // returns a copy of the charsamp that is scaled to the
250 // specified width and height
251 CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
252  CharSamp *scaled_samp = new CharSamp(wid, hgt);
253  if (scaled_samp == NULL) {
254  return NULL;
255  }
256  if (scaled_samp->ScaleFrom(this, isotropic) == false) {
257  delete scaled_samp;
258  return NULL;
259  }
260  scaled_samp->left_ = left_;
261  scaled_samp->top_ = top_;
262  scaled_samp->page_ = page_;
263  scaled_samp->SetLabel(label32_);
264  scaled_samp->first_char_ = first_char_;
265  scaled_samp->last_char_ = last_char_;
266  scaled_samp->norm_top_ = norm_top_;
267  scaled_samp->norm_bottom_ = norm_bottom_;
268  scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
269  return scaled_samp;
270 }
271 
272 // Load a Char Samp from a dump file
273 CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
274  unsigned char *data) {
275  // create the object
276  CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
277  if (char_samp == NULL) {
278  return NULL;
279  }
280  if (char_samp->LoadFromRawData(data) == false) {
281  delete char_samp;
282  return NULL;
283  }
284  return char_samp;
285 }
286 
287 // Saves the charsamp to a dump file
288 bool CharSamp::Save2CharDumpFile(FILE *fp) const {
289  unsigned int val32;
290  // write and check 32 bit marker
291  val32 = 0xabd0fefe;
292  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
293  return false;
294  }
295  // write label length
296  val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
297  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
298  return false;
299  }
300  // write label
301  if (label32_ != NULL) {
302  if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
303  (val32 * sizeof(*label32_))) {
304  return false;
305  }
306  }
307  // write coordinates
308  if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
309  return false;
310  }
311  if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
312  return false;
313  }
314  if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
315  return false;
316  }
317  if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
318  sizeof(first_char_)) {
319  return false;
320  }
321  if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
322  return false;
323  }
324  if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
325  return false;
326  }
327  if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
328  sizeof(norm_bottom_)) {
329  return false;
330  }
331  if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
332  sizeof(norm_aspect_ratio_)) {
333  return false;
334  }
335  if (SaveBmp2CharDumpFile(fp) == false) {
336  return false;
337  }
338  return true;
339 }
340 
341 // Crop the char samp such that there are no white spaces on any side.
342 // The norm_top_ and norm_bottom_ fields are the character top/bottom
343 // with respect to whatever context the character is being recognized
344 // in (e.g. word bounding box) normalized to a standard size of
345 // 255. Here they default to 0 and 255 (word box boundaries), but
346 // since they are context dependent, they may need to be reset by the
347 // calling function.
349  // get the dimesions of the cropped img
350  int cropped_left = 0;
351  int cropped_top = 0;
352  int cropped_wid = wid_;
353  int cropped_hgt = hgt_;
354  Bmp8::Crop(&cropped_left, &cropped_top,
355  &cropped_wid, &cropped_hgt);
356 
357  if (cropped_wid == 0 || cropped_hgt == 0) {
358  return NULL;
359  }
360  // create the cropped char samp
361  CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
362  top_ + cropped_top,
363  cropped_wid, cropped_hgt);
364  cropped_samp->SetLabel(label32_);
365  cropped_samp->SetFirstChar(first_char_);
366  cropped_samp->SetLastChar(last_char_);
367  // the following 3 fields may/should be reset by the calling function
368  // using context information, i.e., location of character box
369  // w.r.t. the word bounding box
370  cropped_samp->SetNormAspectRatio(255 *
371  cropped_wid / (cropped_wid + cropped_hgt));
372  cropped_samp->SetNormTop(0);
373  cropped_samp->SetNormBottom(255);
374 
375  // copy the bitmap to the cropped img
376  Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
377  return cropped_samp;
378 }
379 
380 // segment the char samp to connected components
381 // based on contiguity and vertical pixel density histogram
382 ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
383  int max_hist_wnd, int min_con_comp_size) const {
384  // init
385  (*segment_cnt) = 0;
386  int concomp_cnt = 0;
387  int seg_cnt = 0;
388  // find the concomps of the image
389  ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
390  if (concomp_cnt <= 0 || !concomp_array) {
391  if (concomp_array)
392  delete []concomp_array;
393  return NULL;
394  }
395  ConComp **seg_array = NULL;
396  // segment each concomp further using vertical histogram
397  for (int concomp = 0; concomp < concomp_cnt; concomp++) {
398  int concomp_seg_cnt = 0;
399  // segment the concomp
400  ConComp **concomp_seg_array = NULL;
401  ConComp **concomp_alloc_seg =
402  concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
403  // no segments, add the whole concomp
404  if (concomp_alloc_seg == NULL) {
405  concomp_seg_cnt = 1;
406  concomp_seg_array = concomp_array + concomp;
407  } else {
408  // delete the original concomp, we no longer need it
409  concomp_seg_array = concomp_alloc_seg;
410  delete concomp_array[concomp];
411  }
412  // add the resulting segments
413  for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
414  // too small of a segment: ignore
415  if (concomp_seg_array[seg_idx]->Width() < 2 &&
416  concomp_seg_array[seg_idx]->Height() < 2) {
417  delete concomp_seg_array[seg_idx];
418  } else {
419  // add the new segment
420  // extend the segment array
421  if ((seg_cnt % kConCompAllocChunk) == 0) {
422  ConComp **temp_segm_array =
423  new ConComp *[seg_cnt + kConCompAllocChunk];
424  if (temp_segm_array == NULL) {
425  fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
426  "allocate additional connected components\n");
427  delete []concomp_seg_array;
428  delete []concomp_array;
429  delete []seg_array;
430  return NULL;
431  }
432  if (seg_cnt > 0) {
433  memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
434  delete []seg_array;
435  }
436  seg_array = temp_segm_array;
437  }
438  seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
439  }
440  } // segment
441  if (concomp_alloc_seg != NULL) {
442  delete []concomp_alloc_seg;
443  }
444  } // concomp
445  delete []concomp_array;
446 
447  // sort the concomps from Left2Right or Right2Left, based on the reading order
448  if (seg_cnt > 0 && seg_array != NULL) {
449  qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
451  }
452  (*segment_cnt) = seg_cnt;
453  return seg_array;
454 }
455 
456 // builds a char samp from a set of connected components
457 CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
458  int seg_flags_size, int *seg_flags,
459  bool *left_most, bool *right_most,
460  int word_hgt) {
461  int concomp;
462  int end_concomp;
463  int concomp_cnt = 0;
464  end_concomp = strt_concomp + seg_flags_size;
465  // determine ID range
466  bool once = false;
467  int min_id = -1;
468  int max_id = -1;
469  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
470  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
471  if (!once) {
472  min_id = concomp_array[concomp]->ID();
473  max_id = concomp_array[concomp]->ID();
474  once = true;
475  } else {
476  UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
477  }
478  concomp_cnt++;
479  }
480  }
481  if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
482  return NULL;
483  }
484  // alloc memo for computing leftmost and right most attributes
485  int id_cnt = max_id - min_id + 1;
486  bool *id_exist = new bool[id_cnt];
487  bool *left_most_exist = new bool[id_cnt];
488  bool *right_most_exist = new bool[id_cnt];
489  if (!id_exist || !left_most_exist || !right_most_exist)
490  return NULL;
491  memset(id_exist, 0, id_cnt * sizeof(*id_exist));
492  memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
493  memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
494  // find the dimensions of the charsamp
495  once = false;
496  int left = -1;
497  int right = -1;
498  int top = -1;
499  int bottom = -1;
500  int unq_ids = 0;
501  int unq_left_most = 0;
502  int unq_right_most = 0;
503  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
504  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
505  if (!once) {
506  left = concomp_array[concomp]->Left();
507  right = concomp_array[concomp]->Right();
508  top = concomp_array[concomp]->Top();
509  bottom = concomp_array[concomp]->Bottom();
510  once = true;
511  } else {
512  UpdateRange(concomp_array[concomp]->Left(),
513  concomp_array[concomp]->Right(), &left, &right);
514  UpdateRange(concomp_array[concomp]->Top(),
515  concomp_array[concomp]->Bottom(), &top, &bottom);
516  }
517  // count unq ids, unq left most and right mosts ids
518  int concomp_id = concomp_array[concomp]->ID() - min_id;
519  if (!id_exist[concomp_id]) {
520  id_exist[concomp_id] = true;
521  unq_ids++;
522  }
523  if (concomp_array[concomp]->LeftMost()) {
524  if (left_most_exist[concomp_id] == false) {
525  left_most_exist[concomp_id] = true;
526  unq_left_most++;
527  }
528  }
529  if (concomp_array[concomp]->RightMost()) {
530  if (right_most_exist[concomp_id] == false) {
531  right_most_exist[concomp_id] = true;
532  unq_right_most++;
533  }
534  }
535  }
536  }
537  delete []id_exist;
538  delete []left_most_exist;
539  delete []right_most_exist;
540  if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
541  return NULL;
542  }
543  (*left_most) = (unq_left_most >= unq_ids);
544  (*right_most) = (unq_right_most >= unq_ids);
545  // create the char sample object
546  CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
547  if (!samp) {
548  return NULL;
549  }
550 
551  // set the foreground pixels
552  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
553  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
554  ConCompPt *pt_ptr = concomp_array[concomp]->Head();
555  while (pt_ptr) {
556  samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
557  pt_ptr = pt_ptr->Next();
558  }
559  }
560  }
561  return samp;
562 }
563 
564 // clones the object
566  // create the cropped char samp
567  CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
568  samp->SetLabel(label32_);
569  samp->SetFirstChar(first_char_);
570  samp->SetLastChar(last_char_);
571  samp->SetNormTop(norm_top_);
572  samp->SetNormBottom(norm_bottom_);
573  samp->SetNormAspectRatio(norm_aspect_ratio_);
574  // copy the bitmap to the cropped img
575  Copy(0, 0, wid_, hgt_, samp);
576  return samp;
577 }
578 
579 // Load a Char Samp from a dump file
580 CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
581  unsigned int val32;
582  char_32 *label32;
583  unsigned char *raw_data = *raw_data_ptr;
584 
585  // read and check 32 bit marker
586  memcpy(&val32, raw_data, sizeof(val32));
587  raw_data += sizeof(val32);
588  if (val32 != 0xabd0fefe) {
589  return NULL;
590  }
591  // read label length,
592  memcpy(&val32, raw_data, sizeof(val32));
593  raw_data += sizeof(val32);
594  // the label is not null terminated in the file
595  if (val32 > 0 && val32 < MAX_UINT32) {
596  label32 = new char_32[val32 + 1];
597  if (label32 == NULL) {
598  return NULL;
599  }
600  // read label
601  memcpy(label32, raw_data, val32 * sizeof(*label32));
602  raw_data += (val32 * sizeof(*label32));
603  // null terminate
604  label32[val32] = 0;
605  } else {
606  label32 = NULL;
607  }
608 
609  // create the object
610  CharSamp *char_samp = new CharSamp();
611  if (char_samp == NULL) {
612  return NULL;
613  }
614 
615  // read coordinates
616  char_samp->label32_ = label32;
617  memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
618  raw_data += sizeof(char_samp->page_);
619  memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
620  raw_data += sizeof(char_samp->left_);
621  memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
622  raw_data += sizeof(char_samp->top_);
623  memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
624  raw_data += sizeof(char_samp->first_char_);
625  memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
626  raw_data += sizeof(char_samp->last_char_);
627  memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
628  raw_data += sizeof(char_samp->norm_top_);
629  memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
630  raw_data += sizeof(char_samp->norm_bottom_);
631  memcpy(&char_samp->norm_aspect_ratio_, raw_data,
632  sizeof(char_samp->norm_aspect_ratio_));
633  raw_data += sizeof(char_samp->norm_aspect_ratio_);
634 
635  // load the Bmp8 part
636  if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
637  delete char_samp;
638  return NULL;
639  }
640 
641  (*raw_data_ptr) = raw_data;
642  return char_samp;
643 }
644 
645 // computes the features corresponding to the char sample
646 bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
647  // Create a scaled BMP
648  CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
649  if (!scaled_bmp) {
650  return false;
651  }
652  // prepare input
653  unsigned char *buff = scaled_bmp->RawData();
654  // bitmap features
655  int input;
656  int bmp_size = conv_grid_size * conv_grid_size;
657  for (input = 0; input < bmp_size; input++) {
658  features[input] = 255.0f - (1.0f * buff[input]);
659  }
660  // word context features
661  features[input++] = FirstChar();
662  features[input++] = LastChar();
663  features[input++] = NormTop();
664  features[input++] = NormBottom();
665  features[input++] = NormAspectRatio();
666  delete scaled_bmp;
667  return true;
668 }
669 } // namespace tesseract
unsigned short Right() const
Definition: char_samp.h:47
bool LoadFromCharDumpFile(CachedFile *fp)
Definition: bmp_8.cpp:137
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:578
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
int Top() const
Definition: con_comp.h:66
static const int kConCompAllocChunk
Definition: bmp_8.h:100
void SetNormBottom(unsigned short norm_bottom)
Definition: char_samp.h:98
unsigned char * RawData() const
Definition: bmp_8.h:51
ConCompPt * Head()
Definition: con_comp.h:64
bool LoadFromRawData(unsigned char *data)
Definition: bmp_8.cpp:504
basic_string< char_32 > string_32
Definition: string_32.h:41
unsigned short Left() const
Definition: char_samp.h:46
static CharSamp * FromCharDumpFile(CachedFile *fp)
Definition: char_samp.cpp:82
static int Left2RightComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:73
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
Definition: char_samp.cpp:457
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
unsigned short Bottom() const
Definition: char_samp.h:49
unsigned short Width() const
Definition: bmp_8.h:48
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:273
static int Right2LeftComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:82
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
Definition: bmp_8.cpp:605
void SetLastChar(unsigned short last_char)
Definition: char_samp.h:107
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:266
void SetFirstChar(unsigned short first_char)
Definition: char_samp.h:104
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
Definition: bmp_8.cpp:348
unsigned short hgt_
Definition: bmp_8.h:96
bool SaveBmp2CharDumpFile(FILE *fp) const
Definition: bmp_8.cpp:515
string stringLabel() const
Definition: char_samp.cpp:61
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:251
int Left() const
Definition: con_comp.h:65
int ID() const
Definition: con_comp.h:94
unsigned short NormBottom() const
Definition: char_samp.h:52
unsigned short wid_
Definition: bmp_8.h:95
unsigned short NormTop() const
Definition: char_samp.h:51
unsigned short LastChar() const
Definition: char_samp.h:55
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
Definition: char_samp.cpp:382
unsigned short Top() const
Definition: char_samp.h:48
void SetLabel(char_32 label)
Definition: char_samp.h:68
bool Save2CharDumpFile(FILE *fp) const
Definition: char_samp.cpp:288
unsigned char ** line_buff_
Definition: bmp_8.h:98
CharSamp * Crop()
Definition: char_samp.cpp:348
ConComp ** Segment(int max_hist_wnd, int *concomp_cnt)
Definition: con_comp.cpp:189
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:282
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
Definition: char_samp.h:101
bool ScaleFrom(Bmp8 *bmp, bool isotropic=true)
Definition: bmp_8.cpp:393
bool ComputeFeatures(int conv_grid_size, float *features)
Definition: char_samp.cpp:646
unsigned short Height() const
Definition: bmp_8.h:50
CharSamp * Clone() const
Definition: char_samp.cpp:565
void SetNormTop(unsigned short norm_top)
Definition: char_samp.h:97
signed int char_32
Definition: string_32.h:40
int Bottom() const
Definition: con_comp.h:68
#define NULL
Definition: host.h:144
#define MAX_UINT32
Definition: host.h:123
int LabelLen() const
Definition: char_samp.h:140
int Read(void *read_buff, int bytes)
Definition: cached_file.cpp:82
unsigned short FirstChar() const
Definition: char_samp.h:54
ConCompPt * Next()
Definition: con_comp.h:50
int Right() const
Definition: con_comp.h:67