tesseract v5.3.3.20231005
|
#include <dawg.h>
Public Member Functions | |
SquishedDawg (DawgType type, const std::string &lang, PermuterType perm, int debug_level) | |
SquishedDawg (const char *filename, DawgType type, const std::string &lang, PermuterType perm, int debug_level) | |
SquishedDawg (EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang, PermuterType perm, int unicharset_size, int debug_level) | |
~SquishedDawg () override | |
bool | Load (TFile *fp) |
int | NumEdges () |
EDGE_REF | edge_char_of (NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override |
Returns the edge that corresponds to the letter out of this node. More... | |
void | unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const override |
NODE_REF | next_node (EDGE_REF edge) const override |
bool | end_of_word (EDGE_REF edge_ref) const override |
UNICHAR_ID | edge_letter (EDGE_REF edge_ref) const override |
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. More... | |
void | print_node (NODE_REF node, int max_num_edges) const override |
bool | write_squished_dawg (TFile *file) |
Writes the squished/reduced Dawg to a file. More... | |
bool | write_squished_dawg (const char *filename) |
![]() | |
DawgType | type () const |
const std::string & | lang () const |
PermuterType | permuter () const |
virtual | ~Dawg () |
bool | word_in_dawg (const WERD_CHOICE &word) const |
Returns true if the given word is in the Dawg. More... | |
bool | prefix_in_dawg (const WERD_CHOICE &prefix, bool requires_complete) const |
int | check_for_words (const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const |
void | iterate_words (const UNICHARSET &unicharset, std::function< void(const WERD_CHOICE *)> cb) const |
void | iterate_words (const UNICHARSET &unicharset, const std::function< void(const char *)> &cb) const |
virtual EDGE_REF | edge_char_of (NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0 |
Returns the edge that corresponds to the letter out of this node. More... | |
virtual void | unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const =0 |
virtual NODE_REF | next_node (EDGE_REF edge_ref) const =0 |
virtual bool | end_of_word (EDGE_REF edge_ref) const =0 |
virtual UNICHAR_ID | edge_letter (EDGE_REF edge_ref) const =0 |
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. More... | |
virtual void | print_node (NODE_REF node, int max_num_edges) const =0 |
virtual void | unichar_id_to_patterns (UNICHAR_ID unichar_id, const UNICHARSET &unicharset, std::vector< UNICHAR_ID > *vec) const |
virtual EDGE_REF | pattern_loop_edge (EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const |
Additional Inherited Members | |
![]() | |
static const int16_t | kDawgMagicNumber = 42 |
Magic number to determine endianness when reading the Dawg from file. More... | |
static const UNICHAR_ID | kPatternUnicharID = 0 |
![]() | |
Dawg (DawgType type, const std::string &lang, PermuterType perm, int debug_level) | |
NODE_REF | next_node_from_edge_rec (const EDGE_RECORD &edge_rec) const |
Returns the next node visited by following this edge. More... | |
bool | marker_flag_from_edge_rec (const EDGE_RECORD &edge_rec) const |
Returns the marker flag of this edge. More... | |
int | direction_from_edge_rec (const EDGE_RECORD &edge_rec) const |
Returns the direction flag of this edge. More... | |
bool | end_of_word_from_edge_rec (const EDGE_RECORD &edge_rec) const |
Returns true if this edge marks the end of a word. More... | |
UNICHAR_ID | unichar_id_from_edge_rec (const EDGE_RECORD &edge_rec) const |
Returns UNICHAR_ID recorded in this edge. More... | |
void | set_next_node_in_edge_rec (EDGE_RECORD *edge_rec, EDGE_REF value) |
Sets the next node link for this edge in the Dawg. More... | |
void | set_marker_flag_in_edge_rec (EDGE_RECORD *edge_rec) |
Sets this edge record to be the last one in a sequence of edges. More... | |
int | given_greater_than_edge_rec (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const |
bool | edge_rec_match (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const |
void | init (int unicharset_size) |
bool | match_words (WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const |
void | iterate_words_rec (const WERD_CHOICE &word_so_far, NODE_REF to_explore, const std::function< void(const WERD_CHOICE *)> &cb) const |
![]() | |
std::string | lang_ |
DawgType | type_ |
PermuterType | perm_ |
Permuter code that should be used if the word is found in this Dawg. More... | |
uint64_t | next_node_mask_ = 0 |
uint64_t | flags_mask_ = 0 |
uint64_t | letter_mask_ = 0 |
int | unicharset_size_ |
int | flag_start_bit_ = 0 |
int | next_node_start_bit_ = 0 |
int | debug_level_ |
Concrete class that can operate on a compacted (squished) Dawg (read, search and write to file). This class is read-only in the sense that new words cannot be added to an instance of SquishedDawg. The underlying representation of the nodes and edges in SquishedDawg is stored as a contiguous EDGE_ARRAY (read from file or given as an argument to the constructor).
|
inline |
Definition at line 410 of file dawg.h.
|
inline |
Definition at line 413 of file dawg.h.
|
inline |
|
override |
|
overridevirtual |
Returns the edge that corresponds to the letter out of this node.
Implements tesseract::Dawg.
Definition at line 198 of file dawg.cpp.
|
inlineoverridevirtual |
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
|
inlineoverridevirtual |
Returns true if the edge indicated by the given EDGE_REF marks the end of a word.
Implements tesseract::Dawg.
|
inline |
Returns the next node visited by following the edge indicated by the given EDGE_REF.
Implements tesseract::Dawg.
Definition at line 470 of file dawg.h.
|
inline |
|
overridevirtual |
Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.
Implements tesseract::Dawg.
Definition at line 243 of file dawg.cpp.
|
inlineoverridevirtual |
Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.
Implements tesseract::Dawg.
Definition at line 454 of file dawg.h.
|
inline |
Opens the file with the given filename and writes the squished/reduced Dawg to the file.
Definition at line 494 of file dawg.h.
bool tesseract::SquishedDawg::write_squished_dawg | ( | TFile * | file | ) |
Writes the squished/reduced Dawg to a file.
Definition at line 391 of file dawg.cpp.