tesseract v5.3.3.20231005
UnicodeText::const_iterator Class Reference

#include <unicodetext.h>

Public Types

typedef std::bidirectional_iterator_tag iterator_category
 
typedef char32 value_type
 
typedef ptrdiff_t difference_type
 
typedef void pointer
 
typedef const char32 reference
 

Public Member Functions

 const_iterator ()
 
 const_iterator (const const_iterator &other)
 
const_iteratoroperator= (const const_iterator &other)
 
char32 operator* () const
 
const_iteratoroperator++ ()
 
const_iterator operator++ (int)
 
const_iteratoroperator-- ()
 
const_iterator operator-- (int)
 
int get_utf8 (char *buf) const
 
string get_utf8_string () const
 
int utf8_length () const
 
const char * utf8_data () const
 
string DebugString () const
 

Friends

class UnicodeText
 
class UnicodeTextUtils
 
class UTF8StateTableProperty
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 
bool operator< (const CI &lhs, const CI &rhs)
 
bool operator> (const CI &lhs, const CI &rhs)
 
bool operator<= (const CI &lhs, const CI &rhs)
 
bool operator>= (const CI &lhs, const CI &rhs)
 
difference_type distance (const CI &first, const CI &last)
 

Detailed Description

Definition at line 181 of file unicodetext.h.

Member Typedef Documentation

◆ difference_type

Definition at line 187 of file unicodetext.h.

◆ iterator_category

typedef std::bidirectional_iterator_tag UnicodeText::const_iterator::iterator_category

Definition at line 185 of file unicodetext.h.

◆ pointer

Definition at line 188 of file unicodetext.h.

◆ reference

Definition at line 189 of file unicodetext.h.

◆ value_type

Definition at line 186 of file unicodetext.h.

Constructor & Destructor Documentation

◆ const_iterator() [1/2]

UnicodeText::const_iterator::const_iterator ( )

Definition at line 398 of file unicodetext.cc.

398: it_(nullptr) {}

◆ const_iterator() [2/2]

UnicodeText::const_iterator::const_iterator ( const const_iterator other)

Definition at line 400 of file unicodetext.cc.

400: it_(other.it_) {}

Member Function Documentation

◆ DebugString()

string UnicodeText::const_iterator::DebugString ( ) const

◆ get_utf8()

int UnicodeText::const_iterator::get_utf8 ( char *  buf) const

Definition at line 454 of file unicodetext.cc.

454 {
455 utf8_output[0] = it_[0];
456 if ((it_[0] & 0xff) < 0x80)
457 return 1;
458 utf8_output[1] = it_[1];
459 if ((it_[0] & 0xff) < 0xE0)
460 return 2;
461 utf8_output[2] = it_[2];
462 if ((it_[0] & 0xff) < 0xF0)
463 return 3;
464 utf8_output[3] = it_[3];
465 return 4;
466}

◆ get_utf8_string()

string UnicodeText::const_iterator::get_utf8_string ( ) const

Definition at line 468 of file unicodetext.cc.

468 {
469 return string(utf8_data(), utf8_length());
470}
const char * utf8_data() const
Definition: unicodetext.h:244

◆ operator*()

char32 UnicodeText::const_iterator::operator* ( ) const

Definition at line 420 of file unicodetext.cc.

420 {
421 // (We could call chartorune here, but that does some
422 // error-checking, and we're guaranteed that our data is valid
423 // UTF-8. Also, we expect this routine to be called very often. So
424 // for speed, we do the calculation ourselves.)
425
426 // Convert from UTF-8
427 unsigned char byte1 = it_[0];
428 if (byte1 < 0x80)
429 return byte1;
430
431 unsigned char byte2 = it_[1];
432 if (byte1 < 0xE0)
433 return ((byte1 & 0x1F) << 6) | (byte2 & 0x3F);
434
435 unsigned char byte3 = it_[2];
436 if (byte1 < 0xF0)
437 return ((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F);
438
439 unsigned char byte4 = it_[3];
440 return ((byte1 & 0x07) << 18) | ((byte2 & 0x3F) << 12) | ((byte3 & 0x3F) << 6) | (byte4 & 0x3F);
441}

◆ operator++() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator++ ( )

Definition at line 443 of file unicodetext.cc.

443 {
444 it_ += UniLib::OneCharLen(it_);
445 return *this;
446}
int OneCharLen(const char *src)

◆ operator++() [2/2]

const_iterator UnicodeText::const_iterator::operator++ ( int  )
inline

Definition at line 201 of file unicodetext.h.

201 { // (iter++)
202 const_iterator result(*this);
203 ++*this;
204 return result;
205 }

◆ operator--() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator-- ( )

Definition at line 448 of file unicodetext.cc.

448 {
449 while (UniLib::IsTrailByte(*--it_))
450 ;
451 return *this;
452}
bool IsTrailByte(char x)

◆ operator--() [2/2]

const_iterator UnicodeText::const_iterator::operator-- ( int  )
inline

Definition at line 208 of file unicodetext.h.

208 { // (iter--)
209 const_iterator result(*this);
210 --*this;
211 return result;
212 }

◆ operator=()

UnicodeText::const_iterator & UnicodeText::const_iterator::operator= ( const const_iterator other)

Definition at line 402 of file unicodetext.cc.

402 {
403 if (&other != this)
404 it_ = other.it_;
405 return *this;
406}

◆ utf8_data()

const char * UnicodeText::const_iterator::utf8_data ( ) const
inline

Definition at line 244 of file unicodetext.h.

244 {
245 return it_;
246 }

◆ utf8_length()

int UnicodeText::const_iterator::utf8_length ( ) const

Definition at line 472 of file unicodetext.cc.

472 {
473 if ((it_[0] & 0xff) < 0x80) {
474 return 1;
475 } else if ((it_[0] & 0xff) < 0xE0) {
476 return 2;
477 } else if ((it_[0] & 0xff) < 0xF0) {
478 return 3;
479 } else {
480 return 4;
481 }
482}

Friends And Related Function Documentation

◆ distance

difference_type distance ( const CI first,
const CI last 
)
friend

Definition at line 44 of file unicodetext.cc.

45 {
46 return CodepointDistance(first.it_, last.it_);
47}
LIST last(LIST var_list)
Definition: oldlist.cpp:153

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 218 of file unicodetext.h.

218 {
219 return !(lhs == rhs);
220 }

◆ operator<

bool operator< ( const CI lhs,
const CI rhs 
)
friend

Definition at line 416 of file unicodetext.cc.

416 {
417 return lhs.it_ < rhs.it_;
418}

◆ operator<=

bool operator<= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 225 of file unicodetext.h.

225 {
226 return !(rhs < lhs);
227 }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 215 of file unicodetext.h.

215 {
216 return lhs.it_ == rhs.it_;
217 }

◆ operator>

bool operator> ( const CI lhs,
const CI rhs 
)
friend

Definition at line 222 of file unicodetext.h.

222 {
223 return rhs < lhs;
224 }

◆ operator>=

bool operator>= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 228 of file unicodetext.h.

228 {
229 return !(lhs < rhs);
230 }

◆ UnicodeText

friend class UnicodeText
friend

Definition at line 251 of file unicodetext.h.

◆ UnicodeTextUtils

friend class UnicodeTextUtils
friend

Definition at line 252 of file unicodetext.h.

◆ UTF8StateTableProperty

friend class UTF8StateTableProperty
friend

Definition at line 253 of file unicodetext.h.


The documentation for this class was generated from the following files: