Definition at line 298 of file paragraphs.cpp.
◆ UnicodeSpanSkipper()
tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper |
( |
const UNICHARSET * |
unicharset, |
|
|
const WERD_CHOICE * |
word |
|
) |
| |
|
inline |
Definition at line 300 of file paragraphs.cpp.
301 : u_(unicharset), word_(word), wordlen_(word->length()) {
302 }
◆ SkipAlpha()
unsigned tesseract::UnicodeSpanSkipper::SkipAlpha |
( |
unsigned |
pos | ) |
|
Definition at line 346 of file paragraphs.cpp.
346 {
347 while (pos < wordlen_ && u_->get_isalpha(word_->
unichar_id(pos))) {
348 pos++;
349 }
350 return pos;
351}
UNICHAR_ID unichar_id(unsigned index) const
◆ SkipDigits()
unsigned tesseract::UnicodeSpanSkipper::SkipDigits |
( |
unsigned |
pos | ) |
|
Definition at line 326 of file paragraphs.cpp.
326 {
327 while (pos < wordlen_ &&
329 pos++;
330 }
331 return pos;
332}
bool get_isdigit(UNICHAR_ID unichar_id) const
◆ SkipPunc()
unsigned tesseract::UnicodeSpanSkipper::SkipPunc |
( |
unsigned |
pos | ) |
|
Definition at line 319 of file paragraphs.cpp.
319 {
320 while (pos < wordlen_ && u_->get_ispunctuation(word_->
unichar_id(pos))) {
321 pos++;
322 }
323 return pos;
324}
◆ SkipRomans()
unsigned tesseract::UnicodeSpanSkipper::SkipRomans |
( |
unsigned |
pos | ) |
|
Definition at line 334 of file paragraphs.cpp.
334 {
335 const char *kRomans = "ivxlmdIVXLMD";
336 while (pos < wordlen_) {
337 int ch = UnicodeFor(u_, word_, pos);
338 if (
ch >= 0xF0 || strchr(kRomans,
ch) ==
nullptr) {
339 break;
340 }
341 pos++;
342 }
343 return pos;
344}
The documentation for this class was generated from the following file:
- /media/home/debian/src/github/tesseract-ocr/tesseract/src/ccmain/paragraphs.cpp