tesseract v5.3.3.20231005
unicodetext.h File Reference
#include <stddef.h>
#include <iterator>
#include <string>
#include <utility>
#include "syntaxnet/base.h"

Go to the source code of this file.

Classes

class  UnicodeText
 
class  UnicodeText::const_iterator
 
class  UnicodeText::const_reverse_iterator
 

Macros

#define arraysize(array)   (sizeof(ArraySizeHelper(array)))
 

Typedefs

typedef pair< UnicodeText::const_iterator, UnicodeText::const_iteratorUnicodeTextRange
 

Functions

bool operator== (const UnicodeText &lhs, const UnicodeText &rhs)
 
bool operator!= (const UnicodeText &lhs, const UnicodeText &rhs)
 
bool UnicodeTextRangeIsEmpty (const UnicodeTextRange &r)
 
UnicodeText MakeUnicodeTextAcceptingOwnership (char *utf8_buffer, int byte_length, int byte_capacity)
 
UnicodeText MakeUnicodeTextWithoutAcceptingOwnership (const char *utf8_buffer, int byte_length)
 
UnicodeText UTF8ToUnicodeText (const char *utf8_buf, int len, bool do_copy)
 
UnicodeText UTF8ToUnicodeText (const string &utf_string, bool do_copy)
 
UnicodeText UTF8ToUnicodeText (const char *utf8_buf, int len)
 
UnicodeText UTF8ToUnicodeText (const string &utf8_string)
 
string UnicodeTextToUTF8 (const UnicodeText &t)
 
template<typename T , size_t N>
char(& ArraySizeHelper (T(&array)[N]))[N]
 
string CodepointString (const UnicodeText &t)
 

Macro Definition Documentation

◆ arraysize

#define arraysize (   array)    (sizeof(ArraySizeHelper(array)))

Definition at line 483 of file unicodetext.h.

Typedef Documentation

◆ UnicodeTextRange

Function Documentation

◆ ArraySizeHelper()

template<typename T , size_t N>
char(& ArraySizeHelper ( T(&)  array[N]) )[N]

◆ CodepointString()

string CodepointString ( const UnicodeText t)

◆ MakeUnicodeTextAcceptingOwnership()

UnicodeText MakeUnicodeTextAcceptingOwnership ( char *  utf8_buffer,
int  byte_length,
int  byte_capacity 
)
inline

Definition at line 420 of file unicodetext.h.

421 {
422 return UnicodeText().TakeOwnershipOfUTF8(utf8_buffer, byte_length, byte_capacity);
423}
UnicodeText & TakeOwnershipOfUTF8(char *utf8_buffer, int byte_length, int byte_capacity)
Definition: unicodetext.cc:237

◆ MakeUnicodeTextWithoutAcceptingOwnership()

UnicodeText MakeUnicodeTextWithoutAcceptingOwnership ( const char *  utf8_buffer,
int  byte_length 
)
inline

Definition at line 429 of file unicodetext.h.

430 {
431 return UnicodeText().PointToUTF8(utf8_buffer, byte_length);
432}
UnicodeText & PointToUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:254

◆ operator!=()

bool operator!= ( const UnicodeText lhs,
const UnicodeText rhs 
)
inline

Definition at line 397 of file unicodetext.h.

397 {
398 return !(lhs == rhs);
399}

◆ operator==()

bool operator== ( const UnicodeText lhs,
const UnicodeText rhs 
)

Definition at line 178 of file unicodetext.cc.

377 {
378 if (&lhs == &rhs)
379 return true;
380 if (lhs.repr_.size_ != rhs.repr_.size_)
381 return false;
382 return memcmp(lhs.repr_.data_, rhs.repr_.data_, lhs.repr_.size_) == 0;
383}

◆ UnicodeTextRangeIsEmpty()

bool UnicodeTextRangeIsEmpty ( const UnicodeTextRange r)
inline

Definition at line 405 of file unicodetext.h.

405 {
406 return r.first == r.second;
407}

◆ UnicodeTextToUTF8()

string UnicodeTextToUTF8 ( const UnicodeText t)
inline

Definition at line 474 of file unicodetext.h.

474 {
475 return string(t.utf8_data(), t.utf8_length());
476}
int utf8_length() const
Definition: unicodetext.h:308
const char * utf8_data() const
Definition: unicodetext.h:305

◆ UTF8ToUnicodeText() [1/4]

UnicodeText UTF8ToUnicodeText ( const char *  utf8_buf,
int  len 
)
inline

Definition at line 465 of file unicodetext.h.

465 {
466 return UTF8ToUnicodeText(utf8_buf, len, true);
467}
UnicodeText UTF8ToUnicodeText(const char *utf8_buf, int len, bool do_copy)
Definition: unicodetext.h:451

◆ UTF8ToUnicodeText() [2/4]

UnicodeText UTF8ToUnicodeText ( const char *  utf8_buf,
int  len,
bool  do_copy 
)
inline

Definition at line 451 of file unicodetext.h.

451 {
452 UnicodeText t;
453 if (do_copy) {
454 t.CopyUTF8(utf8_buf, len);
455 } else {
456 t.PointToUTF8(utf8_buf, len);
457 }
458 return t;
459}
UnicodeText & CopyUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:221

◆ UTF8ToUnicodeText() [3/4]

UnicodeText UTF8ToUnicodeText ( const string &  utf8_string)
inline

Definition at line 468 of file unicodetext.h.

468 {
469 return UTF8ToUnicodeText(utf8_string, true);
470}

◆ UTF8ToUnicodeText() [4/4]

UnicodeText UTF8ToUnicodeText ( const string &  utf_string,
bool  do_copy 
)
inline

Definition at line 461 of file unicodetext.h.

461 {
462 return UTF8ToUnicodeText(utf_string.data(), utf_string.size(), do_copy);
463}