All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tesseract::Dawg Class Referenceabstract

#include <dawg.h>

Inheritance diagram for tesseract::Dawg:
tesseract::SquishedDawg tesseract::Trie

Public Member Functions

DawgType type () const
 
const STRINGlang () const
 
PermuterType permuter () const
 
virtual ~Dawg ()
 
bool word_in_dawg (const WERD_CHOICE &word) const
 Returns true if the given word is in the Dawg. More...
 
bool prefix_in_dawg (const WERD_CHOICE &prefix, bool requires_complete) const
 
int check_for_words (const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
 
void iterate_words (const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE * > *cb) const
 
void iterate_words (const UNICHARSET &unicharset, TessCallback1< const char * > *cb) const
 
virtual EDGE_REF edge_char_of (NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
 Returns the edge that corresponds to the letter out of this node. More...
 
virtual void unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const =0
 
virtual NODE_REF next_node (EDGE_REF edge_ref) const =0
 
virtual bool end_of_word (EDGE_REF edge_ref) const =0
 
virtual UNICHAR_ID edge_letter (EDGE_REF edge_ref) const =0
 Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. More...
 
virtual void print_node (NODE_REF node, int max_num_edges) const =0
 
virtual void unichar_id_to_patterns (UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
 
virtual EDGE_REF pattern_loop_edge (EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
 

Static Public Attributes

static const inT16 kDawgMagicNumber = 42
 Magic number to determine endianness when reading the Dawg from file. More...
 
static const UNICHAR_ID kPatternUnicharID = 0
 

Protected Member Functions

 Dawg ()
 
NODE_REF next_node_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the next node visited by following this edge. More...
 
bool marker_flag_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the marker flag of this edge. More...
 
int direction_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the direction flag of this edge. More...
 
bool end_of_word_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns true if this edge marks the end of a word. More...
 
UNICHAR_ID unichar_id_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns UNICHAR_ID recorded in this edge. More...
 
void set_next_node_in_edge_rec (EDGE_RECORD *edge_rec, EDGE_REF value)
 Sets the next node link for this edge in the Dawg. More...
 
void set_marker_flag_in_edge_rec (EDGE_RECORD *edge_rec)
 Sets this edge record to be the last one in a sequence of edges. More...
 
int given_greater_than_edge_rec (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
 
bool edge_rec_match (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
 
void init (DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
 
bool match_words (WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
 
void iterate_words_rec (const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE * > *cb) const
 

Protected Attributes

DawgType type_
 
STRING lang_
 
PermuterType perm_
 Permuter code that should be used if the word is found in this Dawg. More...
 
int unicharset_size_
 
int flag_start_bit_
 
int next_node_start_bit_
 
uinT64 next_node_mask_
 
uinT64 flags_mask_
 
uinT64 letter_mask_
 
int debug_level_
 

Detailed Description

Abstract class (an interface) that declares methods needed by the various tesseract classes to operate on SquishedDawg and Trie objects.

This class initializes all the edge masks (since their usage by SquishedDawg and Trie is identical) and implements simple accessors for each of the fields encoded in an EDGE_RECORD. This class also implements word_in_dawg() and check_for_words() (since they use only the public methods of SquishedDawg and Trie classes that are inherited from the Dawg base class).

Definition at line 118 of file dawg.h.

Constructor & Destructor Documentation

virtual tesseract::Dawg::~Dawg ( )
inlinevirtual

Definition at line 131 of file dawg.h.

131 {};
tesseract::Dawg::Dawg ( )
inlineprotected

Definition at line 197 of file dawg.h.

197 {}

Member Function Documentation

int tesseract::Dawg::check_for_words ( const char *  filename,
const UNICHARSET unicharset,
bool  enable_wildcard 
) const

Checks the Dawg for the words that are listed in the requested file. Returns the number of words in the given file missing from the Dawg.

Definition at line 74 of file dawg.cpp.

76  {
77  if (filename == NULL) return 0;
78 
79  FILE *word_file;
80  char string [CHARS_PER_LINE];
81  int misses = 0;
82  UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard);
83 
84  word_file = open_file (filename, "r");
85 
86  while (fgets (string, CHARS_PER_LINE, word_file) != NULL) {
87  chomp_string(string); // remove newline
88  WERD_CHOICE word(string, unicharset);
89  if (word.length() > 0 &&
90  !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
91  if (!match_words(&word, 0, 0,
92  enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
93  tprintf("Missing word: %s\n", string);
94  ++misses;
95  }
96  } else {
97  tprintf("Failed to create a valid word from %s\n", string);
98  }
99  }
100  fclose (word_file);
101  // Make sure the user sees this with fprintf instead of tprintf.
102  if (debug_level_) tprintf("Number of lost words=%d\n", misses);
103  return misses;
104 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
void chomp_string(char *str)
Definition: helpers.h:75
bool match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:145
int debug_level_
Definition: dawg.h:304
int UNICHAR_ID
Definition: unichar.h:33
FILE * open_file(const char *filename, const char *mode)
Definition: cutil.cpp:82
#define CHARS_PER_LINE
Definition: cutil.h:57
#define NULL
Definition: host.h:144
int tesseract::Dawg::direction_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the direction flag of this edge.

Definition at line 208 of file dawg.h.

208  {
209  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
211  }
#define DIRECTION_FLAG
Definition: dawg.h:88
#define FORWARD_EDGE
Definition: dawg.h:84
#define BACKWARD_EDGE
Definition: dawg.h:85
int flag_start_bit_
Definition: dawg.h:298
virtual EDGE_REF tesseract::Dawg::edge_char_of ( NODE_REF  node,
UNICHAR_ID  unichar_id,
bool  word_end 
) const
pure virtual

Returns the edge that corresponds to the letter out of this node.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

virtual UNICHAR_ID tesseract::Dawg::edge_letter ( EDGE_REF  edge_ref) const
pure virtual

Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

bool tesseract::Dawg::edge_rec_match ( NODE_REF  next_node,
bool  word_end,
UNICHAR_ID  unichar_id,
NODE_REF  other_next_node,
bool  other_word_end,
UNICHAR_ID  other_unichar_id 
) const
inlineprotected

Returns true if all the values are equal (any value matches next_node if next_node == NO_EDGE, any value matches word_end if word_end is false).

Definition at line 259 of file dawg.h.

264  {
265  return ((unichar_id == other_unichar_id) &&
266  (next_node == NO_EDGE || next_node == other_next_node) &&
267  (!word_end || (word_end == other_word_end)));
268  }
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
virtual bool tesseract::Dawg::end_of_word ( EDGE_REF  edge_ref) const
pure virtual

Returns true if the edge indicated by the given EDGE_REF marks the end of a word.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

bool tesseract::Dawg::end_of_word_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns true if this edge marks the end of a word.

Definition at line 213 of file dawg.h.

213  {
214  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
215  }
#define WERD_END_FLAG
Definition: dawg.h:89
int flag_start_bit_
Definition: dawg.h:298
int tesseract::Dawg::given_greater_than_edge_rec ( NODE_REF  next_node,
bool  word_end,
UNICHAR_ID  unichar_id,
const EDGE_RECORD edge_rec 
) const
inlineprotected

Sequentially compares the given values of unichar ID, next node and word end marker with the values in the given EDGE_RECORD. Returns: 1 if at any step the given input value exceeds that of edge_rec (and all the values already checked are the same) 0 if edge_rec_match() returns true -1 otherwise

Definition at line 238 of file dawg.h.

241  {
242  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
243  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
244  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
245  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
246  curr_word_end, curr_unichar_id)) return 0;
247  if (unichar_id > curr_unichar_id) return 1;
248  if (unichar_id == curr_unichar_id) {
249  if (next_node > curr_next_node) return 1;
250  if (next_node == curr_next_node) {
251  if (word_end > curr_word_end) return 1;
252  }
253  }
254  return -1;
255  }
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:213
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:200
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:259
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:217
int UNICHAR_ID
Definition: unichar.h:33
inT64 NODE_REF
Definition: dawg.h:55
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
void tesseract::Dawg::init ( DawgType  type,
const STRING lang,
PermuterType  perm,
int  unicharset_size,
int  debug_level 
)
protected

Sets type_, lang_, perm_, unicharset_size_. Initializes the values of various masks from unicharset_size_.

Definition at line 177 of file dawg.cpp.

178  {
179  type_ = type;
180  lang_ = lang;
181  perm_ = perm;
182  ASSERT_HOST(unicharset_size > 0);
183  unicharset_size_ = unicharset_size;
184  // Set bit masks. We will use the value unicharset_size_ as a null char, so
185  // the actual number of unichars is unicharset_size_ + 1.
186  flag_start_bit_ = ceil(log(unicharset_size_ + 1.0) / log(2.0));
188  letter_mask_ = ~(~0ull << flag_start_bit_);
191 
192  debug_level_ = debug_level;
193 }
int next_node_start_bit_
Definition: dawg.h:299
const STRING & lang() const
Definition: dawg.h:128
#define ASSERT_HOST(x)
Definition: errcode.h:84
uinT64 next_node_mask_
Definition: dawg.h:300
uinT64 letter_mask_
Definition: dawg.h:302
int unicharset_size_
Definition: dawg.h:297
int debug_level_
Definition: dawg.h:304
#define NUM_FLAG_BITS
Definition: dawg.h:91
STRING lang_
Definition: dawg.h:290
int flag_start_bit_
Definition: dawg.h:298
DawgType type() const
Definition: dawg.h:127
DawgType type_
Definition: dawg.h:289
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:292
uinT64 flags_mask_
Definition: dawg.h:301
void tesseract::Dawg::iterate_words ( const UNICHARSET unicharset,
TessCallback1< const WERD_CHOICE * > *  cb 
) const

Definition at line 106 of file dawg.cpp.

107  {
108  WERD_CHOICE word(&unicharset);
109  iterate_words_rec(word, 0, cb);
110 }
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE * > *cb) const
Definition: dawg.cpp:127
void tesseract::Dawg::iterate_words ( const UNICHARSET unicharset,
TessCallback1< const char * > *  cb 
) const

Definition at line 118 of file dawg.cpp.

119  {
122  WERD_CHOICE word(&unicharset);
123  iterate_words_rec(word, 0, shim);
124  delete shim;
125 }
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE * > *cb) const
Definition: dawg.cpp:127
void CallWithUTF8(TessCallback1< const char * > *cb, const WERD_CHOICE *wc)
Definition: dawg.cpp:112
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void tesseract::Dawg::iterate_words_rec ( const WERD_CHOICE word_so_far,
NODE_REF  to_explore,
TessCallback1< const WERD_CHOICE * > *  cb 
) const
protected

Definition at line 127 of file dawg.cpp.

129  {
130  NodeChildVector children;
131  this->unichar_ids_of(to_explore, &children, false);
132  for (int i = 0; i < children.size(); i++) {
133  WERD_CHOICE next_word(word_so_far);
134  next_word.append_unichar_id(children[i].unichar_id, 1, 0.0, 0.0);
135  if (this->end_of_word(children[i].edge_ref)) {
136  cb->Run(&next_word);
137  }
138  NODE_REF next = next_node(children[i].edge_ref);
139  if (next != 0) {
140  iterate_words_rec(next_word, next, cb);
141  }
142  }
143 }
virtual void Run(A1)=0
virtual bool end_of_word(EDGE_REF edge_ref) const =0
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE * > *cb) const
Definition: dawg.cpp:127
GenericVector< NodeChild > NodeChildVector
Definition: dawg.h:67
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
inT64 NODE_REF
Definition: dawg.h:55
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
const STRING& tesseract::Dawg::lang ( ) const
inline

Definition at line 128 of file dawg.h.

128 { return lang_; }
STRING lang_
Definition: dawg.h:290
bool tesseract::Dawg::marker_flag_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the marker flag of this edge.

Definition at line 204 of file dawg.h.

204  {
205  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
206  }
int flag_start_bit_
Definition: dawg.h:298
#define MARKER_FLAG
Definition: dawg.h:87
bool tesseract::Dawg::match_words ( WERD_CHOICE word,
inT32  index,
NODE_REF  node,
UNICHAR_ID  wildcard 
) const
protected

Matches all of the words that are represented by this string. If wilcard is set to something other than INVALID_UNICHAR_ID, the *'s in this string are interpreted as wildcards. WERD_CHOICE param is not passed by const so that wildcard searches can modify it and work without having to copy WERD_CHOICEs.

Definition at line 145 of file dawg.cpp.

146  {
147  EDGE_REF edge;
148  inT32 word_end;
149 
150  if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
151  bool any_matched = false;
152  NodeChildVector vec;
153  this->unichar_ids_of(node, &vec, false);
154  for (int i = 0; i < vec.size(); ++i) {
155  word->set_unichar_id(vec[i].unichar_id, index);
156  if (match_words(word, index, node, wildcard))
157  any_matched = true;
158  }
159  word->set_unichar_id(wildcard, index);
160  return any_matched;
161  } else {
162  word_end = index == word->length() - 1;
163  edge = edge_char_of(node, word->unichar_id(index), word_end);
164  if (edge != NO_EDGE) { // normal edge in DAWG
165  node = next_node(edge);
166  if (word_end) {
167  if (debug_level_ > 1) word->print("match_words() found: ");
168  return true;
169  } else if (node != 0) {
170  return match_words(word, index+1, node, wildcard);
171  }
172  }
173  }
174  return false;
175 }
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:356
int length() const
Definition: ratngs.h:300
GenericVector< NodeChild > NodeChildVector
Definition: dawg.h:67
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:312
bool match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:145
int debug_level_
Definition: dawg.h:304
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
void print() const
Definition: ratngs.h:563
inT64 EDGE_REF
Definition: dawg.h:54
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
int inT32
Definition: host.h:102
virtual NODE_REF tesseract::Dawg::next_node ( EDGE_REF  edge_ref) const
pure virtual

Returns the next node visited by following the edge indicated by the given EDGE_REF.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

NODE_REF tesseract::Dawg::next_node_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns the next node visited by following this edge.

Definition at line 200 of file dawg.h.

200  {
201  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
202  }
int next_node_start_bit_
Definition: dawg.h:299
uinT64 next_node_mask_
Definition: dawg.h:300
virtual EDGE_REF tesseract::Dawg::pattern_loop_edge ( EDGE_REF  edge_ref,
UNICHAR_ID  unichar_id,
bool  word_end 
) const
inlinevirtual

Returns the given EDGE_REF if the EDGE_RECORD that it points to has a self loop and the given unichar_id matches the unichar_id stored in the EDGE_RECORD, returns NO_EDGE otherwise.

Reimplemented in tesseract::Trie.

Definition at line 191 of file dawg.h.

192  {
193  return false;
194  }
PermuterType tesseract::Dawg::permuter ( ) const
inline

Definition at line 129 of file dawg.h.

129 { return perm_; }
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:292
bool tesseract::Dawg::prefix_in_dawg ( const WERD_CHOICE prefix,
bool  requires_complete 
) const

Definition at line 49 of file dawg.cpp.

50  {
51  if (word.length() == 0) return !requires_complete;
52  NODE_REF node = 0;
53  int end_index = word.length() - 1;
54  for (int i = 0; i < end_index; i++) {
55  EDGE_REF edge = edge_char_of(node, word.unichar_id(i), false);
56  if (edge == NO_EDGE) {
57  return false;
58  }
59  if ((node = next_node(edge)) == 0) {
60  // This only happens if all words following this edge terminate --
61  // there are no larger words. See Trie::add_word_to_dawg()
62  return false;
63  }
64  }
65  // Now check the last character.
66  return edge_char_of(node, word.unichar_id(end_index), requires_complete) !=
67  NO_EDGE;
68 }
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
inT64 EDGE_REF
Definition: dawg.h:54
inT64 NODE_REF
Definition: dawg.h:55
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
virtual void tesseract::Dawg::print_node ( NODE_REF  node,
int  max_num_edges 
) const
pure virtual

Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

void tesseract::Dawg::set_marker_flag_in_edge_rec ( EDGE_RECORD edge_rec)
inlineprotected

Sets this edge record to be the last one in a sequence of edges.

Definition at line 228 of file dawg.h.

228  {
229  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
230  }
int flag_start_bit_
Definition: dawg.h:298
#define MARKER_FLAG
Definition: dawg.h:87
void tesseract::Dawg::set_next_node_in_edge_rec ( EDGE_RECORD edge_rec,
EDGE_REF  value 
)
inlineprotected

Sets the next node link for this edge in the Dawg.

Definition at line 222 of file dawg.h.

223  {
224  *edge_rec &= (~next_node_mask_);
225  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
226  }
int next_node_start_bit_
Definition: dawg.h:299
uinT64 next_node_mask_
Definition: dawg.h:300
DawgType tesseract::Dawg::type ( ) const
inline

Definition at line 127 of file dawg.h.

127 { return type_; }
DawgType type_
Definition: dawg.h:289
UNICHAR_ID tesseract::Dawg::unichar_id_from_edge_rec ( const EDGE_RECORD edge_rec) const
inlineprotected

Returns UNICHAR_ID recorded in this edge.

Definition at line 217 of file dawg.h.

218  {
219  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
220  }
#define LETTER_START_BIT
Definition: dawg.h:90
uinT64 letter_mask_
Definition: dawg.h:302
virtual void tesseract::Dawg::unichar_id_to_patterns ( UNICHAR_ID  unichar_id,
const UNICHARSET unicharset,
GenericVector< UNICHAR_ID > *  vec 
) const
inlinevirtual

Fills vec with unichar ids that represent the character classes of the given unichar_id.

Reimplemented in tesseract::Trie.

Definition at line 184 of file dawg.h.

186  {};
virtual void tesseract::Dawg::unichar_ids_of ( NODE_REF  node,
NodeChildVector vec,
bool  word_end 
) const
pure virtual

Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.

Implemented in tesseract::SquishedDawg, and tesseract::Trie.

bool tesseract::Dawg::word_in_dawg ( const WERD_CHOICE word) const

Returns true if the given word is in the Dawg.

Definition at line 70 of file dawg.cpp.

70  {
71  return prefix_in_dawg(word, true);
72 }
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:49

Member Data Documentation

int tesseract::Dawg::debug_level_
protected

Definition at line 304 of file dawg.h.

int tesseract::Dawg::flag_start_bit_
protected

Definition at line 298 of file dawg.h.

uinT64 tesseract::Dawg::flags_mask_
protected

Definition at line 301 of file dawg.h.

const inT16 tesseract::Dawg::kDawgMagicNumber = 42
static

Magic number to determine endianness when reading the Dawg from file.

Definition at line 121 of file dawg.h.

const UNICHAR_ID tesseract::Dawg::kPatternUnicharID = 0
static

A special unichar id that indicates that any appropriate pattern (e.g.dicitonary word, 0-9 digit, etc) can be inserted instead Used for expressing patterns in punctuation and number Dawgs.

Definition at line 125 of file dawg.h.

STRING tesseract::Dawg::lang_
protected

Definition at line 290 of file dawg.h.

uinT64 tesseract::Dawg::letter_mask_
protected

Definition at line 302 of file dawg.h.

uinT64 tesseract::Dawg::next_node_mask_
protected

Definition at line 300 of file dawg.h.

int tesseract::Dawg::next_node_start_bit_
protected

Definition at line 299 of file dawg.h.

PermuterType tesseract::Dawg::perm_
protected

Permuter code that should be used if the word is found in this Dawg.

Definition at line 292 of file dawg.h.

DawgType tesseract::Dawg::type_
protected

Definition at line 289 of file dawg.h.

int tesseract::Dawg::unicharset_size_
protected

Definition at line 297 of file dawg.h.


The documentation for this class was generated from the following files: