tesseract v5.3.3.20231005
tesseract::UNICHAR::const_iterator Class Reference

#include <unichar.h>

Public Member Functions

const_iteratoroperator++ ()
 
int operator* () const
 
int get_utf8 (char *buf) const
 
int utf8_len () const
 
bool is_legal () const
 
const char * utf8_data () const
 

Friends

class UNICHAR
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 

Detailed Description

Definition at line 105 of file unichar.h.

Member Function Documentation

◆ get_utf8()

int tesseract::UNICHAR::const_iterator::get_utf8 ( char *  buf) const

Definition at line 183 of file unichar.cpp.

183 {
184 ASSERT_HOST(it_ != nullptr);
185 const int len = utf8_step(it_);
186 if (len == 0) {
187 tprintf("WARNING: Illegal UTF8 encountered\n");
188 utf8_output[0] = ' ';
189 return 1;
190 }
191 strncpy(utf8_output, it_, len);
192 return len;
193}
#define ASSERT_HOST(x)
Definition: errcode.h:54
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:143

◆ is_legal()

bool tesseract::UNICHAR::const_iterator::is_legal ( ) const

Definition at line 205 of file unichar.cpp.

205 {
206 return utf8_step(it_) > 0;
207}

◆ operator*()

int tesseract::UNICHAR::const_iterator::operator* ( ) const

Definition at line 172 of file unichar.cpp.

172 {
173 ASSERT_HOST(it_ != nullptr);
174 const int len = utf8_step(it_);
175 if (len == 0) {
176 tprintf("WARNING: Illegal UTF8 encountered\n");
177 return ' ';
178 }
179 UNICHAR uch(it_, len);
180 return uch.first_uni();
181}

◆ operator++()

UNICHAR::const_iterator & tesseract::UNICHAR::const_iterator::operator++ ( )

Definition at line 158 of file unichar.cpp.

158 {
159 ASSERT_HOST(it_ != nullptr);
160 int step = utf8_step(it_);
161 if (step == 0) {
162 tprintf("ERROR: Illegal UTF8 encountered.\n");
163 for (int i = 0; i < 5 && it_[i] != '\0'; ++i) {
164 tprintf("Index %d char = 0x%x\n", i, it_[i]);
165 }
166 step = 1;
167 }
168 it_ += step;
169 return *this;
170}

◆ utf8_data()

const char * tesseract::UNICHAR::const_iterator::utf8_data ( ) const
inline

Definition at line 133 of file unichar.h.

133 {
134 return it_;
135 }

◆ utf8_len()

int tesseract::UNICHAR::const_iterator::utf8_len ( ) const

Definition at line 195 of file unichar.cpp.

195 {
196 ASSERT_HOST(it_ != nullptr);
197 const int len = utf8_step(it_);
198 if (len == 0) {
199 tprintf("WARNING: Illegal UTF8 encountered\n");
200 return 1;
201 }
202 return len;
203}

Friends And Related Function Documentation

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 141 of file unichar.h.

141 {
142 return !(lhs == rhs);
143 }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 138 of file unichar.h.

138 {
139 return lhs.it_ == rhs.it_;
140 }

◆ UNICHAR

friend class UNICHAR
friend

Definition at line 146 of file unichar.h.


The documentation for this class was generated from the following files: