All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::CharSet Class Reference

#include <char_set.h>

Public Member Functions

 CharSet ()
 
 ~CharSet ()
 
bool SharedUnicharset ()
 
int ClassID (const char_32 *str) const
 
int ClassID (char_32 ch) const
 
int UnicharID (const char_32 *str) const
 
int UnicharID (char_32 ch) const
 
const char_32ClassString (int class_id) const
 
int ClassCount () const
 
UNICHARSETInternalUnicharset ()
 

Static Public Member Functions

static CharSetCreate (TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
 

Detailed Description

Definition at line 42 of file char_set.h.

Constructor & Destructor Documentation

tesseract::CharSet::CharSet ( )

Definition at line 28 of file char_set.cpp.

28  {
29  class_cnt_ = 0;
30  class_strings_ = NULL;
31  unicharset_map_ = NULL;
32  init_ = false;
33 
34  // init hash table
35  memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
36 }
#define NULL
Definition: host.h:144
tesseract::CharSet::~CharSet ( )

Definition at line 38 of file char_set.cpp.

38  {
39  if (class_strings_ != NULL) {
40  for (int cls = 0; cls < class_cnt_; cls++) {
41  if (class_strings_[cls] != NULL) {
42  delete class_strings_[cls];
43  }
44  }
45  delete []class_strings_;
46  class_strings_ = NULL;
47  }
48  delete []unicharset_map_;
49 }
#define NULL
Definition: host.h:144

Member Function Documentation

int tesseract::CharSet::ClassCount ( ) const
inline

Definition at line 111 of file char_set.h.

111 { return class_cnt_; }
int tesseract::CharSet::ClassID ( const char_32 str) const
inline

Definition at line 54 of file char_set.h.

54  {
55  int hash_val = Hash(str);
56  if (hash_bin_size_[hash_val] == 0)
57  return -1;
58  for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
59  if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
60  return hash_bins_[hash_val][bin];
61  }
62  return -1;
63  }
int tesseract::CharSet::ClassID ( char_32  ch) const
inline

Definition at line 65 of file char_set.h.

65  {
66  int hash_val = Hash(ch);
67  if (hash_bin_size_[hash_val] == 0)
68  return -1;
69  for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
70  if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
71  class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
72  return hash_bins_[hash_val][bin];
73  }
74  }
75  return -1;
76  }
const char_32* tesseract::CharSet::ClassString ( int  class_id) const
inline

Definition at line 104 of file char_set.h.

104  {
105  if (class_id < 0 || class_id >= class_cnt_) {
106  return NULL;
107  }
108  return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
109  }
signed int char_32
Definition: string_32.h:40
#define NULL
Definition: host.h:144
CharSet * tesseract::CharSet::Create ( TessdataManager tessdata_manager,
UNICHARSET tess_unicharset 
)
static

Definition at line 54 of file char_set.cpp.

55  {
56  CharSet *char_set = new CharSet();
57  if (char_set == NULL) {
58  return NULL;
59  }
60 
61  // First look for Cube's unicharset; if not there, use tesseract's
62  bool cube_unicharset_exists;
63  if (!(cube_unicharset_exists =
64  tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
65  !tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
66  fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
67  "either cube or tesseract unicharset\n");
68  return NULL;
69  }
70  FILE *charset_fp = tessdata_manager->GetDataFilePtr();
71  if (!charset_fp) {
72  fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
73  "a unicharset\n");
74  return NULL;
75  }
76 
77  // If we found a cube unicharset separate from tesseract's, load it and
78  // map its unichars to tesseract's; if only one unicharset exists,
79  // just load it.
80  bool loaded;
81  if (cube_unicharset_exists) {
82  char_set->cube_unicharset_.load_from_file(charset_fp);
83  loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
84  loaded = loaded && char_set->LoadSupportedCharList(
85  tessdata_manager->GetDataFilePtr(), tess_unicharset);
86  char_set->unicharset_ = &char_set->cube_unicharset_;
87  } else {
88  loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
89  char_set->unicharset_ = tess_unicharset;
90  }
91  if (!loaded) {
92  delete char_set;
93  return NULL;
94  }
95 
96  char_set->init_ = true;
97  return char_set;
98 }
#define NULL
Definition: host.h:144
UNICHARSET* tesseract::CharSet::InternalUnicharset ( )
inline

Definition at line 121 of file char_set.h.

121 { return unicharset_; }
bool tesseract::CharSet::SharedUnicharset ( )
inline

Definition at line 48 of file char_set.h.

48 { return (unicharset_map_ == NULL); }
#define NULL
Definition: host.h:144
int tesseract::CharSet::UnicharID ( const char_32 str) const
inline

Definition at line 80 of file char_set.h.

80  {
81  int class_id = ClassID(str);
82  if (class_id == INVALID_UNICHAR_ID)
83  return INVALID_UNICHAR_ID;
84  int unichar_id;
85  if (unicharset_map_)
86  unichar_id = unicharset_map_[class_id];
87  else
88  unichar_id = class_id;
89  return unichar_id;
90  }
int ClassID(const char_32 *str) const
Definition: char_set.h:54
int tesseract::CharSet::UnicharID ( char_32  ch) const
inline

Definition at line 92 of file char_set.h.

92  {
93  int class_id = ClassID(ch);
94  if (class_id == INVALID_UNICHAR_ID)
95  return INVALID_UNICHAR_ID;
96  int unichar_id;
97  if (unicharset_map_)
98  unichar_id = unicharset_map_[class_id];
99  else
100  unichar_id = class_id;
101  return unichar_id;
102  }
int ClassID(const char_32 *str) const
Definition: char_set.h:54

The documentation for this class was generated from the following files: