#include <ligature_table.h>
Definition at line 38 of file ligature_table.h.
◆ LigatureTable()
tesseract::LigatureTable::LigatureTable |
( |
| ) |
|
|
protected |
◆ AddLigatures()
std::string tesseract::LigatureTable::AddLigatures |
( |
const std::string & |
str, |
|
|
const PangoFontInfo * |
font |
|
) |
| const |
Definition at line 153 of file ligature_table.cpp.
153 {
154 std::string result;
155 int len = str.size();
156 int step = 0;
159 step = 0;
161 if (
i + liglen <= len) {
162 std::string lig_cand = str.substr(
i, liglen);
165 tlog(3,
"Considering %s -> %s\n", lig_cand.c_str(), it->second.c_str());
166 if (font) {
167
168 if (!font->CanRenderString(it->second.data(), it->second.length())) {
169 continue;
170 }
171 }
172
173 step = liglen;
174 result += it->second;
175 tlog(2,
"Substituted %s -> %s\n", lig_cand.c_str(), it->second.c_str());
176 break;
177 }
178 }
179 }
180 if (step == 0) {
182 step = 1;
183 }
184 }
185 result += str.substr(
i, len -
i);
186 return result;
187}
LigHash norm_to_lig_table_
◆ Get()
Definition at line 51 of file ligature_table.cpp.
51 {
55 }
57}
static std::unique_ptr< LigatureTable > instance_
◆ Init()
void tesseract::LigatureTable::Init |
( |
| ) |
|
|
protected |
Definition at line 62 of file ligature_table.cpp.
62 {
65
66
67 std::string lig8 = EncodeAsUTF8(lig);
68 icu::UnicodeString unicode_lig8(static_cast<UChar32>(lig));
69 icu::UnicodeString normed8_result;
70 icu::ErrorCode status;
72 std::string normed8;
73 normed8_result.toUTF8String(normed8);
74 int lig_length = lig8.length();
75 int norm_length = normed8.size();
76 if (normed8 != lig8 && lig_length > 1 && norm_length > 1) {
81 }
84 }
87 }
90 }
91 }
92 }
93
99 }
102 }
103
105 }
106 }
107}
static const char * kCustomLigatures[][2]
LigHash lig_to_norm_table_
◆ lig_to_norm_table()
const LigHash & tesseract::LigatureTable::lig_to_norm_table |
( |
| ) |
const |
|
inline |
◆ norm_to_lig_table()
const LigHash & tesseract::LigatureTable::norm_to_lig_table |
( |
| ) |
const |
|
inline |
◆ RemoveCustomLigatures()
std::string tesseract::LigatureTable::RemoveCustomLigatures |
( |
const std::string & |
str | ) |
const |
Definition at line 128 of file ligature_table.cpp.
128 {
129 std::string result;
130 UNICHAR::const_iterator it_begin =
UNICHAR::begin(str.c_str(), str.length());
131 UNICHAR::const_iterator it_end =
UNICHAR::end(str.c_str(), str.length());
132 char tmp[5];
133 int len;
134 int norm_ind;
135 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
136 len = it.get_utf8(tmp);
137 tmp[len] = '\0';
138 norm_ind = -1;
142 }
143 }
144 if (norm_ind >= 0) {
146 } else {
147 result += tmp;
148 }
149 }
150 return result;
151}
static const_iterator begin(const char *utf8_str, int byte_length)
static const_iterator end(const char *utf8_str, int byte_length)
◆ RemoveLigatures()
std::string tesseract::LigatureTable::RemoveLigatures |
( |
const std::string & |
str | ) |
const |
Definition at line 109 of file ligature_table.cpp.
109 {
110 std::string result;
111 UNICHAR::const_iterator it_begin =
UNICHAR::begin(str.c_str(), str.length());
112 UNICHAR::const_iterator it_end =
UNICHAR::end(str.c_str(), str.length());
113 char tmp[5];
114 int len;
115 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
116 len = it.get_utf8(tmp);
117 tmp[len] = '\0';
120 result += lig_it->second;
121 } else {
122 result += tmp;
123 }
124 }
125 return result;
126}
◆ instance_
std::unique_ptr< LigatureTable > tesseract::LigatureTable::instance_ |
|
staticprotected |
◆ lig_to_norm_table_
LigHash tesseract::LigatureTable::lig_to_norm_table_ |
|
protected |
◆ max_lig_length_
int tesseract::LigatureTable::max_lig_length_ |
|
protected |
◆ max_norm_length_
int tesseract::LigatureTable::max_norm_length_ |
|
protected |
◆ min_lig_length_
int tesseract::LigatureTable::min_lig_length_ |
|
protected |
◆ min_norm_length_
int tesseract::LigatureTable::min_norm_length_ |
|
protected |
◆ norm_to_lig_table_
LigHash tesseract::LigatureTable::norm_to_lig_table_ |
|
protected |
The documentation for this class was generated from the following files:
- /media/home/debian/src/github/tesseract-ocr/tesseract/src/training/pango/ligature_table.h
- /media/home/debian/src/github/tesseract-ocr/tesseract/src/training/pango/ligature_table.cpp