tesseract  4.00.00dev
UNICHARMAP Class Reference

#include <unicharmap.h>

Public Member Functions

 UNICHARMAP ()
 
 ~UNICHARMAP ()
 
void insert (const char *const unichar_repr, UNICHAR_ID id)
 
UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
 
bool contains (const char *const unichar_repr, int length) const
 
int minmatch (const char *const unichar_repr) const
 
void clear ()
 

Detailed Description

Definition at line 27 of file unicharmap.h.

Constructor & Destructor Documentation

◆ UNICHARMAP()

UNICHARMAP::UNICHARMAP ( )

Definition at line 25 of file unicharmap.cpp.

25  :
26 nodes(0) {
27 }

◆ ~UNICHARMAP()

UNICHARMAP::~UNICHARMAP ( )

Definition at line 29 of file unicharmap.cpp.

29  {
30  if (nodes != 0)
31  delete[] nodes;
32 }

Member Function Documentation

◆ clear()

void UNICHARMAP::clear ( )

Definition at line 118 of file unicharmap.cpp.

118  {
119  if (nodes != 0)
120  {
121  delete[] nodes;
122  nodes = 0;
123  }
124 }

◆ contains()

bool UNICHARMAP::contains ( const char *const  unichar_repr,
int  length 
) const

Definition at line 82 of file unicharmap.cpp.

83  {
84  if (unichar_repr == NULL || *unichar_repr == '\0') return false;
85  if (length <= 0 || length > UNICHAR_LEN) return false;
86  int index = 0;
87  if (index >= length || unichar_repr[index] == '\0') return false;
88  UNICHARMAP_NODE* current_nodes = nodes;
89 
90  while (current_nodes != 0 && index + 1 < length &&
91  unichar_repr[index + 1] != '\0') {
92  current_nodes =
93  current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
94  ++index;
95  }
96  return current_nodes != 0 &&
97  (index + 1 >= length || unichar_repr[index + 1] == '\0') &&
98  current_nodes[static_cast<unsigned char>(unichar_repr[index])].id >= 0;
99 }
#define UNICHAR_LEN
Definition: unichar.h:31

◆ insert()

void UNICHARMAP::insert ( const char *const  unichar_repr,
UNICHAR_ID  id 
)

Definition at line 59 of file unicharmap.cpp.

59  {
60  const char* current_char = unichar_repr;
61  if (*current_char == '\0') return;
62  UNICHARMAP_NODE** current_nodes_pointer = &nodes;
63  do {
64  if (*current_nodes_pointer == 0)
65  *current_nodes_pointer = new UNICHARMAP_NODE[256];
66  if (current_char[1] == '\0') {
67  (*current_nodes_pointer)
68  [static_cast<unsigned char>(*current_char)].id = id;
69  return;
70  }
71  current_nodes_pointer =
72  &((*current_nodes_pointer)
73  [static_cast<unsigned char>(*current_char)].children);
74  ++current_char;
75  } while (true);
76 }

◆ minmatch()

int UNICHARMAP::minmatch ( const char *const  unichar_repr) const

Definition at line 103 of file unicharmap.cpp.

103  {
104  const char* current_char = unichar_repr;
105  if (*current_char == '\0') return 0;
106  UNICHARMAP_NODE* current_nodes = nodes;
107 
108  while (current_nodes != NULL && *current_char != '\0') {
109  if (current_nodes[static_cast<unsigned char>(*current_char)].id >= 0)
110  return current_char + 1 - unichar_repr;
111  current_nodes =
112  current_nodes[static_cast<unsigned char>(*current_char)].children;
113  ++current_char;
114  }
115  return 0;
116 }

◆ unichar_to_id()

UNICHAR_ID UNICHARMAP::unichar_to_id ( const char *const  unichar_repr,
int  length 
) const

Definition at line 37 of file unicharmap.cpp.

38  {
39  UNICHARMAP_NODE* current_nodes = nodes;
40 
41  assert(*unichar_repr != '\0');
42  assert(length > 0 && length <= UNICHAR_LEN);
43 
44  int index = 0;
45  if (index >= length || unichar_repr[index] == '\0') return INVALID_UNICHAR_ID;
46  do {
47  if (index + 1 >= length || unichar_repr[index + 1] == '\0')
48  return current_nodes[static_cast<unsigned char>(unichar_repr[index])].id;
49  current_nodes =
50  current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
51  ++index;
52  } while (true);
53 }
#define UNICHAR_LEN
Definition: unichar.h:31

The documentation for this class was generated from the following files: