#include <unicodetext.h>
Definition at line 181 of file unicodetext.h.
◆ difference_type
◆ iterator_category
◆ pointer
◆ reference
◆ value_type
◆ const_iterator() [1/2]
UnicodeText::const_iterator::const_iterator |
( |
| ) |
|
◆ const_iterator() [2/2]
UnicodeText::const_iterator::const_iterator |
( |
const const_iterator & |
other | ) |
|
◆ DebugString()
string UnicodeText::const_iterator::DebugString |
( |
| ) |
const |
◆ get_utf8()
int UnicodeText::const_iterator::get_utf8 |
( |
char * |
buf | ) |
const |
Definition at line 454 of file unicodetext.cc.
454 {
455 utf8_output[0] = it_[0];
456 if ((it_[0] & 0xff) < 0x80)
457 return 1;
458 utf8_output[1] = it_[1];
459 if ((it_[0] & 0xff) < 0xE0)
460 return 2;
461 utf8_output[2] = it_[2];
462 if ((it_[0] & 0xff) < 0xF0)
463 return 3;
464 utf8_output[3] = it_[3];
465 return 4;
466}
◆ get_utf8_string()
string UnicodeText::const_iterator::get_utf8_string |
( |
| ) |
const |
Definition at line 468 of file unicodetext.cc.
468 {
470}
const char * utf8_data() const
◆ operator*()
char32 UnicodeText::const_iterator::operator* |
( |
| ) |
const |
Definition at line 420 of file unicodetext.cc.
420 {
421
422
423
424
425
426
427 unsigned char byte1 = it_[0];
428 if (byte1 < 0x80)
429 return byte1;
430
431 unsigned char byte2 = it_[1];
432 if (byte1 < 0xE0)
433 return ((byte1 & 0x1F) << 6) | (byte2 & 0x3F);
434
435 unsigned char byte3 = it_[2];
436 if (byte1 < 0xF0)
437 return ((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F);
438
439 unsigned char byte4 = it_[3];
440 return ((byte1 & 0x07) << 18) | ((byte2 & 0x3F) << 12) | ((byte3 & 0x3F) << 6) | (byte4 & 0x3F);
441}
◆ operator++() [1/2]
Definition at line 443 of file unicodetext.cc.
443 {
445 return *this;
446}
int OneCharLen(const char *src)
◆ operator++() [2/2]
Definition at line 201 of file unicodetext.h.
201 {
203 ++*this;
204 return result;
205 }
◆ operator--() [1/2]
◆ operator--() [2/2]
Definition at line 208 of file unicodetext.h.
208 {
210 --*this;
211 return result;
212 }
◆ operator=()
Definition at line 402 of file unicodetext.cc.
402 {
403 if (&other != this)
404 it_ = other.it_;
405 return *this;
406}
◆ utf8_data()
const char * UnicodeText::const_iterator::utf8_data |
( |
| ) |
const |
|
inline |
◆ utf8_length()
int UnicodeText::const_iterator::utf8_length |
( |
| ) |
const |
Definition at line 472 of file unicodetext.cc.
472 {
473 if ((it_[0] & 0xff) < 0x80) {
474 return 1;
475 } else if ((it_[0] & 0xff) < 0xE0) {
476 return 2;
477 } else if ((it_[0] & 0xff) < 0xF0) {
478 return 3;
479 } else {
480 return 4;
481 }
482}
◆ distance
Definition at line 44 of file unicodetext.cc.
45 {
46 return CodepointDistance(first.it_,
last.it_);
47}
◆ operator!=
bool operator!= |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
Definition at line 218 of file unicodetext.h.
218 {
219 return !(lhs == rhs);
220 }
◆ operator<
bool operator< |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
Definition at line 416 of file unicodetext.cc.
416 {
417 return lhs.it_ < rhs.it_;
418}
◆ operator<=
bool operator<= |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
Definition at line 225 of file unicodetext.h.
225 {
226 return !(rhs < lhs);
227 }
◆ operator==
bool operator== |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
Definition at line 215 of file unicodetext.h.
215 {
216 return lhs.it_ == rhs.it_;
217 }
◆ operator>
bool operator> |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
◆ operator>=
bool operator>= |
( |
const CI & |
lhs, |
|
|
const CI & |
rhs |
|
) |
| |
|
friend |
Definition at line 228 of file unicodetext.h.
228 {
229 return !(lhs < rhs);
230 }
◆ UnicodeText
◆ UnicodeTextUtils
friend class UnicodeTextUtils |
|
friend |
◆ UTF8StateTableProperty
friend class UTF8StateTableProperty |
|
friend |
The documentation for this class was generated from the following files:
- /media/home/debian/src/github/tesseract-ocr/tesseract/unittest/util/utf8/unicodetext.h
- /media/home/debian/src/github/tesseract-ocr/tesseract/unittest/util/utf8/unicodetext.cc