tesseract v5.3.3.20231005
tesseract::UnicodeSpanSkipper Class Reference

Public Member Functions

 UnicodeSpanSkipper (const UNICHARSET *unicharset, const WERD_CHOICE *word)
 
unsigned SkipPunc (unsigned pos)
 
unsigned SkipDigits (unsigned pos)
 
unsigned SkipRomans (unsigned pos)
 
unsigned SkipAlpha (unsigned pos)
 

Detailed Description

Definition at line 298 of file paragraphs.cpp.

Constructor & Destructor Documentation

◆ UnicodeSpanSkipper()

tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper ( const UNICHARSET unicharset,
const WERD_CHOICE word 
)
inline

Definition at line 300 of file paragraphs.cpp.

301 : u_(unicharset), word_(word), wordlen_(word->length()) {
302 }

Member Function Documentation

◆ SkipAlpha()

unsigned tesseract::UnicodeSpanSkipper::SkipAlpha ( unsigned  pos)

Definition at line 346 of file paragraphs.cpp.

346 {
347 while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) {
348 pos++;
349 }
350 return pos;
351}
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:299

◆ SkipDigits()

unsigned tesseract::UnicodeSpanSkipper::SkipDigits ( unsigned  pos)

Definition at line 326 of file paragraphs.cpp.

326 {
327 while (pos < wordlen_ &&
328 (u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos)))) {
329 pos++;
330 }
331 return pos;
332}
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:524

◆ SkipPunc()

unsigned tesseract::UnicodeSpanSkipper::SkipPunc ( unsigned  pos)

Definition at line 319 of file paragraphs.cpp.

319 {
320 while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) {
321 pos++;
322 }
323 return pos;
324}

◆ SkipRomans()

unsigned tesseract::UnicodeSpanSkipper::SkipRomans ( unsigned  pos)

Definition at line 334 of file paragraphs.cpp.

334 {
335 const char *kRomans = "ivxlmdIVXLMD";
336 while (pos < wordlen_) {
337 int ch = UnicodeFor(u_, word_, pos);
338 if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) {
339 break;
340 }
341 pos++;
342 }
343 return pos;
344}

The documentation for this class was generated from the following file: