tesseract-ocr.github.io/5.3.3/a01607_source.html

#ifndef UTIL_UTF8_PUBLIC_UNICODETEXT_H_

#define UTIL_UTF8_PUBLIC_UNICODETEXT_H_


#include <stddef.h> // for NULL, ptrdiff_t

#include <iterator> // for bidirectional_iterator_tag, etc

#include <string>   // for string

#include <utility>  // for pair


#include "syntaxnet/base.h"


// ***************************** UnicodeText **************************

//

// A UnicodeText object is a container for a sequence of Unicode

// codepoint values. It has default, copy, and assignment constructors.

// Data can be appended to it from another UnicodeText, from

// iterators, or from a single codepoint.

//

// The internal representation of the text is UTF-8. Since UTF-8 is a

// variable-width format, UnicodeText does not provide random access

// to the text, and changes to the text are permitted only at the end.

//

// The UnicodeText class defines a const_iterator. The dereferencing

// operator (*) returns a codepoint (char32). The iterator is a

// bidirectional, read-only iterator. It becomes invalid if the text

// is changed.

//

// There are methods for appending and retrieving UTF-8 data directly.

// The 'utf8_data' method returns a const char* that contains the

// UTF-8-encoded version of the text; 'utf8_length' returns the number

// of bytes in the UTF-8 data. An iterator's 'get' method stores up to

// 4 bytes of UTF-8 data in a char array and returns the number of

// bytes that it stored.

//

// Codepoints are integers in the range [0, 0xD7FF] or [0xE000,

// 0x10FFFF], but UnicodeText has the additional restriction that it

// can contain only those characters that are valid for interchange on

// the Web. This excludes all of the control codes except for carriage

// return, line feed, and horizontal tab.  It also excludes

// non-characters, but codepoints that are in the Private Use regions

// are allowed, as are codepoints that are unassigned. (See the

// Unicode reference for details.) The function UniLib::IsInterchangeValid

// can be used as a test for this property.

//

// UnicodeTexts are safe. Every method that constructs or modifies a

// UnicodeText tests for interchange-validity, and will substitute a

// space for the invalid data. Such cases are reported via

// LOG(WARNING).

//

// MEMORY MANAGEMENT: copy, take ownership, or point to

//

// A UnicodeText is either an "owner", meaning that it owns the memory

// for the data buffer and will free it when the UnicodeText is

// destroyed, or it is an "alias", meaning that it does not.

//

// There are three methods for storing UTF-8 data in a UnicodeText:

//

// CopyUTF8(buffer, len) copies buffer.

//

// TakeOwnershipOfUTF8(buffer, size, capacity) takes ownership of buffer.

//

// PointToUTF8(buffer, size) creates an alias pointing to buffer.

//

// All three methods perform a validity check on the buffer. There are

// private, "unsafe" versions of these functions that bypass the

// validity check. They are used internally and by friend-functions

// that are handling UTF-8 data that has already been validated.

//

// The purpose of an alias is to avoid making an unnecessary copy of a

// UTF-8 buffer while still providing access to the Unicode values

// within that text through iterators or the fast scanners that are

// based on UTF-8 state tables. The lifetime of an alias must not

// exceed the lifetime of the buffer from which it was constructed.

//

// The semantics of an alias might be described as "copy on write or

// repair." The source data is never modified. If push_back() or

// append() is called on an alias, a copy of the data will be created,

// and the UnicodeText will become an owner. If clear() is called on

// an alias, it becomes an (empty) owner.

//

// The copy constructor and the assignment operator produce an owner.

// That is, after direct initialization ("UnicodeText x(y);") or copy

// initialization ("UnicodeText x = y;") x will be an owner, even if y

// was an alias. The assignment operator ("x = y;") also produces an

// owner unless x and y are the same object and y is an alias.

//

// Aliases should be used with care. If the source from which an alias

// was created is freed, or if the contents are changed, while the

// alias is still in use, fatal errors could result. But it can be

// quite useful to have a UnicodeText "window" through which to see a

// UTF-8 buffer without having to pay the price of making a copy.

//

// UTILITIES

//

// The interfaces in util/utf8/public/textutils.h provide higher-level

// utilities for dealing with UnicodeTexts, including routines for

// creating UnicodeTexts (both owners and aliases) from UTF-8 buffers or

// strings, creating strings from UnicodeTexts, normalizing text for

// efficient matching or display, and others.


class UnicodeText {

public:

  class const_iterator;


  typedef char32 value_type;


  // Constructors. These always produce owners.

  UnicodeText();                       // Create an empty text.

  UnicodeText(const UnicodeText &src); // copy constructor

  // Construct a substring (copies the data).

  UnicodeText(const const_iterator &first, const const_iterator &last);


  // Assignment operator. This copies the data and produces an owner

  // unless this == &src, e.g., "x = x;", which is a no-op.

  UnicodeText &operator=(const UnicodeText &src);


  // x.Copy(y) copies the data from y into x.

  UnicodeText &Copy(const UnicodeText &src);

  inline UnicodeText &assign(const UnicodeText &src) {

    return Copy(src);

  }


  // x.PointTo(y) changes x so that it points to y's data.

  // It does not copy y or take ownership of y's data.

  UnicodeText &PointTo(const UnicodeText &src);

  UnicodeText &PointTo(const const_iterator &first, const const_iterator &last);


  ~UnicodeText();


  void clear(); // Clear text.

  bool empty() const {

    return repr_.size_ == 0;

  } // Test if text is empty.


  // Add a codepoint to the end of the text.

  // If the codepoint is not interchange-valid, add a space instead

  // and log a warning.

  void push_back(char32 codepoint);


  // Generic appending operation.

  // iterator_traits<ForwardIterator>::value_type must be implicitly

  // convertible to char32. Typical uses of this method might include:

  //     char32 chars[] = {0x1, 0x2, ...};

  //     vector<char32> more_chars = ...;

  //     utext.append(chars, chars+arraysize(chars));

  //     utext.append(more_chars.begin(), more_chars.end());

  template <typename ForwardIterator>

  UnicodeText &append(ForwardIterator first, const ForwardIterator last) {

    while (first != last) {

      push_back(*first++);

    }

    return *this;

  }


  // A specialization of the generic append() method.

  UnicodeText &append(const const_iterator &first, const const_iterator &last);


  // An optimization of append(source.begin(), source.end()).

  UnicodeText &append(const UnicodeText &source);


  int size() const; // the number of Unicode characters (codepoints)


  friend bool operator==(const UnicodeText &lhs, const UnicodeText &rhs);

  friend bool operator!=(const UnicodeText &lhs, const UnicodeText &rhs);


  class const_iterator {

    typedef const_iterator CI;


  public:

    typedef std::bidirectional_iterator_tag iterator_category;

    typedef char32 value_type;

    typedef ptrdiff_t difference_type;

    typedef void pointer;           // (Not needed.)

    typedef const char32 reference; // (Needed for const_reverse_iterator)


    // Iterators are default-constructible.

    const_iterator();


    // It's safe to make multiple passes over a UnicodeText.

    const_iterator(const const_iterator &other);

    const_iterator &operator=(const const_iterator &other);


    char32 operator*() const; // Dereference


    const_iterator &operator++();    // Advance (++iter)

    const_iterator operator++(int) { // (iter++)

      const_iterator result(*this);

      ++*this;

      return result;

    }


    const_iterator &operator--();    // Retreat (--iter)

    const_iterator operator--(int) { // (iter--)

      const_iterator result(*this);

      --*this;

      return result;

    }


    // We love relational operators.

    friend bool operator==(const CI &lhs, const CI &rhs) {

      return lhs.it_ == rhs.it_;

    }

    friend bool operator!=(const CI &lhs, const CI &rhs) {

      return !(lhs == rhs);

    }

    friend bool operator<(const CI &lhs, const CI &rhs);

    friend bool operator>(const CI &lhs, const CI &rhs) {

      return rhs < lhs;

    }

    friend bool operator<=(const CI &lhs, const CI &rhs) {

      return !(rhs < lhs);

    }

    friend bool operator>=(const CI &lhs, const CI &rhs) {

      return !(lhs < rhs);

    }


    friend difference_type distance(const CI &first, const CI &last);


    // UTF-8-specific methods

    // Store the UTF-8 encoding of the current codepoint into buf,

    // which must be at least 4 bytes long. Return the number of

    // bytes written.

    int get_utf8(char *buf) const;

    // Return the UTF-8 character that the iterator points to.

    string get_utf8_string() const;

    // Return the byte length of the UTF-8 character the iterator points to.

    int utf8_length() const;

    // Return the iterator's pointer into the UTF-8 data.

    const char *utf8_data() const {

      return it_;

    }


    string DebugString() const;


  private:

    friend class UnicodeText;

    friend class UnicodeTextUtils;

    friend class UTF8StateTableProperty;

    explicit const_iterator(const char *it) : it_(it) {}


    const char *it_;

  };


  const_iterator begin() const;

  const_iterator end() const;


  class const_reverse_iterator : public std::reverse_iterator<const_iterator> {

  public:

    explicit const_reverse_iterator(const_iterator it)

        : std::reverse_iterator<const_iterator>(it) {}

    const char *utf8_data() const {

      const_iterator tmp_it = base();

      return (--tmp_it).utf8_data();

    }

    int get_utf8(char *buf) const {

      const_iterator tmp_it = base();

      return (--tmp_it).get_utf8(buf);

    }

    string get_utf8_string() const {

      const_iterator tmp_it = base();

      return (--tmp_it).get_utf8_string();

    }

    int utf8_length() const {

      const_iterator tmp_it = base();

      return (--tmp_it).utf8_length();

    }

  };

  const_reverse_iterator rbegin() const {

    return const_reverse_iterator(end());

  }

  const_reverse_iterator rend() const {

    return const_reverse_iterator(begin());

  }


  // Substring searching.  Returns the beginning of the first

  // occurrence of "look", or end() if not found.

  const_iterator find(const UnicodeText &look, const_iterator start_pos) const;

  // Equivalent to find(look, begin())

  const_iterator find(const UnicodeText &look) const;


  // Returns whether this contains the character U+FFFD.  This can

  // occur, for example, if the input to Encodings::Decode() had byte

  // sequences that were invalid in the source encoding.

  bool HasReplacementChar() const;


  // UTF-8-specific methods

  //

  // Return the data, length, and capacity of UTF-8-encoded version of

  // the text. Length and capacity are measured in bytes.

  const char *utf8_data() const {

    return repr_.data_;

  }

  int utf8_length() const {

    return repr_.size_;

  }

  int utf8_capacity() const {

    return repr_.capacity_;

  }


  // Return the UTF-8 data as a string.

  static string UTF8Substring(const const_iterator &first, const const_iterator &last);


  // There are three methods for initializing a UnicodeText from UTF-8

  // data. They vary in details of memory management. In all cases,

  // the data is tested for interchange-validity. If it is not

  // interchange-valid, a LOG(WARNING) is issued, and each

  // structurally invalid byte and each interchange-invalid codepoint

  // is replaced with a space.


  // x.CopyUTF8(buf, len) copies buf into x.

  UnicodeText &CopyUTF8(const char *utf8_buffer, int byte_length);


  // x.TakeOwnershipOfUTF8(buf, len, capacity). x takes ownership of

  // buf. buf is not copied.

  UnicodeText &TakeOwnershipOfUTF8(char *utf8_buffer, int byte_length, int byte_capacity);


  // x.PointToUTF8(buf,len) changes x so that it points to buf

  // ("becomes an alias"). It does not take ownership or copy buf.

  // If the buffer is not valid, this has the same effect as

  // CopyUTF8(utf8_buffer, byte_length).

  UnicodeText &PointToUTF8(const char *utf8_buffer, int byte_length);


  // Occasionally it is necessary to use functions that operate on the

  // pointer returned by utf8_data(). MakeIterator(p) provides a way

  // to get back to the UnicodeText level. It uses CHECK to ensure

  // that p is a pointer within this object's UTF-8 data, and that it

  // points to the beginning of a character.

  const_iterator MakeIterator(const char *p) const;


  string DebugString() const;


private:

  friend class const_iterator;

  friend class UnicodeTextUtils;


  class Repr { // A byte-string.

  public:

    char *data_;

    int size_;

    int capacity_;

    bool ours_; // Do we own data_?


    Repr() : data_(nullptr), size_(0), capacity_(0), ours_(true) {}

    ~Repr() {

      if (ours_)

        delete[] data_;

    }


    void clear();

    void reserve(int capacity);

    void resize(int size);


    void append(const char *bytes, int byte_length);

    void Copy(const char *data, int size);

    void TakeOwnershipOf(char *data, int size, int capacity);

    void PointTo(const char *data, int size);


    string DebugString() const;


  private:

    Repr &operator=(const Repr &);

    Repr(const Repr &other);

  };


  Repr repr_;


  // UTF-8-specific private methods.

  // These routines do not perform a validity check when compiled

  // in opt mode.

  // It is an error to call these methods with UTF-8 data that

  // is not interchange-valid.

  //

  UnicodeText &UnsafeCopyUTF8(const char *utf8_buffer, int byte_length);

  UnicodeText &UnsafeTakeOwnershipOfUTF8(char *utf8_buffer, int byte_length, int byte_capacity);

  UnicodeText &UnsafePointToUTF8(const char *utf8_buffer, int byte_length);

  UnicodeText &UnsafeAppendUTF8(const char *utf8_buffer, int byte_length);

  const_iterator UnsafeFind(const UnicodeText &look, const_iterator start_pos) const;

};


bool operator==(const UnicodeText &lhs, const UnicodeText &rhs);


inline bool operator!=(const UnicodeText &lhs, const UnicodeText &rhs) {

  return !(lhs == rhs);

}


// UnicodeTextRange is a pair of iterators, useful for specifying text

// segments. If the iterators are ==, the segment is empty.

typedef pair<UnicodeText::const_iterator, UnicodeText::const_iterator> UnicodeTextRange;


inline bool UnicodeTextRangeIsEmpty(const UnicodeTextRange &r) {

  return r.first == r.second;

}


// *************************** Utilities *************************


// A factory function for creating a UnicodeText from a buffer of

// UTF-8 data. The new UnicodeText takes ownership of the buffer. (It

// is an "owner.")

//

// Each byte that is structurally invalid will be replaced with a

// space. Each codepoint that is interchange-invalid will also be

// replaced with a space, even if the codepoint was represented with a

// multibyte sequence in the UTF-8 data.

//

inline UnicodeText MakeUnicodeTextAcceptingOwnership(char *utf8_buffer, int byte_length,

                                                     int byte_capacity) {

  return UnicodeText().TakeOwnershipOfUTF8(utf8_buffer, byte_length, byte_capacity);

}


// A factory function for creating a UnicodeText from a buffer of

// UTF-8 data. The new UnicodeText does not take ownership of the

// buffer. (It is an "alias.")

//

inline UnicodeText MakeUnicodeTextWithoutAcceptingOwnership(const char *utf8_buffer,

                                                            int byte_length) {

  return UnicodeText().PointToUTF8(utf8_buffer, byte_length);

}


// Create a UnicodeText from a UTF-8 string or buffer.

//

// If do_copy is true, then a copy of the string is made. The copy is

// owned by the resulting UnicodeText object and will be freed when

// the object is destroyed. This UnicodeText object is referred to

// as an "owner."

//

// If do_copy is false, then no copy is made. The resulting

// UnicodeText object does NOT take ownership of the string; in this

// case, the lifetime of the UnicodeText object must not exceed the

// lifetime of the string. This Unicodetext object is referred to as

// an "alias." This is the same as MakeUnicodeTextWithoutAcceptingOwnership.

//

// If the input string does not contain valid UTF-8, then a copy is

// made (as if do_copy were true) and coerced to valid UTF-8 by

// replacing each invalid byte with a space.

//

inline UnicodeText UTF8ToUnicodeText(const char *utf8_buf, int len, bool do_copy) {

  UnicodeText t;

  if (do_copy) {

    t.CopyUTF8(utf8_buf, len);

  } else {

    t.PointToUTF8(utf8_buf, len);

  }

  return t;

}


inline UnicodeText UTF8ToUnicodeText(const string &utf_string, bool do_copy) {

  return UTF8ToUnicodeText(utf_string.data(), utf_string.size(), do_copy);

}


inline UnicodeText UTF8ToUnicodeText(const char *utf8_buf, int len) {

  return UTF8ToUnicodeText(utf8_buf, len, true);

}

inline UnicodeText UTF8ToUnicodeText(const string &utf8_string) {

  return UTF8ToUnicodeText(utf8_string, true);

}


// Return a string containing the UTF-8 encoded version of all the

// Unicode characters in t.

inline string UnicodeTextToUTF8(const UnicodeText &t) {

  return string(t.utf8_data(), t.utf8_length());

}


// This template function declaration is used in defining arraysize.

// Note that the function doesn't need an implementation, as we only

// use its type.

template <typename T, size_t N>

char (&ArraySizeHelper(T (&array)[N]))[N];

#define arraysize(array) (sizeof(ArraySizeHelper(array)))


// For debugging.  Return a string of integers, written in uppercase

// hex (%X), corresponding to the codepoints within the text. Each

// integer is followed by a space. E.g., "61 62 6A 3005 ".

string CodepointString(const UnicodeText &t);


#endif // UTIL_UTF8_PUBLIC_UNICODETEXT_H_

char32
signed int char32
Definition: pango_font_info.h:36

ArraySizeHelper
char(& ArraySizeHelper(T(&array)[N]))[N]

UTF8ToUnicodeText
UnicodeText UTF8ToUnicodeText(const char *utf8_buf, int len, bool do_copy)
Definition: unicodetext.h:451

UnicodeTextRange
pair< UnicodeText::const_iterator, UnicodeText::const_iterator > UnicodeTextRange
Definition: unicodetext.h:403

CodepointString
string CodepointString(const UnicodeText &t)

MakeUnicodeTextAcceptingOwnership
UnicodeText MakeUnicodeTextAcceptingOwnership(char *utf8_buffer, int byte_length, int byte_capacity)
Definition: unicodetext.h:420

MakeUnicodeTextWithoutAcceptingOwnership
UnicodeText MakeUnicodeTextWithoutAcceptingOwnership(const char *utf8_buffer, int byte_length)
Definition: unicodetext.h:429

operator==
bool operator==(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.cc:377

UnicodeTextToUTF8
string UnicodeTextToUTF8(const UnicodeText &t)
Definition: unicodetext.h:474

UnicodeTextRangeIsEmpty
bool UnicodeTextRangeIsEmpty(const UnicodeTextRange &r)
Definition: unicodetext.h:405

operator!=
bool operator!=(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.h:397

p
const char * p
Definition: gmock-matchers_test.cc:4030

base.h

std
STL namespace.

tesseract::last
LIST last(LIST var_list)
Definition: oldlist.cpp:153

UnicodeText
Definition: unicodetext.h:116

UnicodeText::UTF8Substring
static string UTF8Substring(const const_iterator &first, const const_iterator &last)
Definition: unicodetext.cc:202

UnicodeText::~UnicodeText
~UnicodeText()
Definition: unicodetext.cc:355

UnicodeText::push_back
void push_back(char32 codepoint)
Definition: unicodetext.cc:357

UnicodeText::MakeIterator
const_iterator MakeIterator(const char *p) const
Definition: unicodetext.cc:484

UnicodeText::CopyUTF8
UnicodeText & CopyUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:221

UnicodeText::find
const_iterator find(const UnicodeText &look, const_iterator start_pos) const
Definition: unicodetext.cc:301

UnicodeText::value_type
char32 value_type
Definition: unicodetext.h:120

UnicodeText::assign
UnicodeText & assign(const UnicodeText &src)
Definition: unicodetext.h:134

UnicodeText::append
UnicodeText & append(ForwardIterator first, const ForwardIterator last)
Definition: unicodetext.h:163

UnicodeText::UnicodeTextUtils
friend class UnicodeTextUtils
Definition: unicodetext.h:349

UnicodeText::Copy
UnicodeText & Copy(const UnicodeText &src)
Definition: unicodetext.cc:216

UnicodeText::rend
const_reverse_iterator rend() const
Definition: unicodetext.h:286

UnicodeText::PointTo
UnicodeText & PointTo(const UnicodeText &src)
Definition: unicodetext.cc:270

UnicodeText::empty
bool empty() const
Definition: unicodetext.h:146

UnicodeText::DebugString
string DebugString() const

UnicodeText::utf8_capacity
int utf8_capacity() const
Definition: unicodetext.h:311

UnicodeText::operator==
friend bool operator==(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.cc:377

UnicodeText::end
const_iterator end() const
Definition: unicodetext.cc:412

UnicodeText::const_iterator
friend class const_iterator
Definition: unicodetext.h:348

UnicodeText::operator=
UnicodeText & operator=(const UnicodeText &src)
Definition: unicodetext.cc:209

UnicodeText::PointToUTF8
UnicodeText & PointToUTF8(const char *utf8_buffer, int byte_length)
Definition: unicodetext.cc:254

UnicodeText::TakeOwnershipOfUTF8
UnicodeText & TakeOwnershipOfUTF8(char *utf8_buffer, int byte_length, int byte_capacity)
Definition: unicodetext.cc:237

UnicodeText::utf8_length
int utf8_length() const
Definition: unicodetext.h:308

UnicodeText::size
int size() const
Definition: unicodetext.cc:373

UnicodeText::HasReplacementChar
bool HasReplacementChar() const

UnicodeText::begin
const_iterator begin() const
Definition: unicodetext.cc:408

UnicodeText::operator!=
friend bool operator!=(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.h:397

UnicodeText::utf8_data
const char * utf8_data() const
Definition: unicodetext.h:305

UnicodeText::rbegin
const_reverse_iterator rbegin() const
Definition: unicodetext.h:283

UnicodeText::UnicodeText
UnicodeText()
Definition: unicodetext.cc:188

UnicodeText::clear
void clear()
Definition: unicodetext.cc:350

UnicodeText::const_iterator
Definition: unicodetext.h:181

UnicodeText::const_iterator::operator>
friend bool operator>(const CI &lhs, const CI &rhs)
Definition: unicodetext.h:222

UnicodeText::const_iterator::operator<=
friend bool operator<=(const CI &lhs, const CI &rhs)
Definition: unicodetext.h:225

UnicodeText::const_iterator::DebugString
string DebugString() const

UnicodeText::const_iterator::const_iterator
const_iterator()
Definition: unicodetext.cc:398

UnicodeText::const_iterator::operator++
const_iterator operator++(int)
Definition: unicodetext.h:201

UnicodeText::const_iterator::UTF8StateTableProperty
friend class UTF8StateTableProperty
Definition: unicodetext.h:253

UnicodeText::const_iterator::operator--
const_iterator operator--(int)
Definition: unicodetext.h:208

UnicodeText::const_iterator::distance
friend difference_type distance(const CI &first, const CI &last)
Definition: unicodetext.cc:44

UnicodeText::const_iterator::UnicodeTextUtils
friend class UnicodeTextUtils
Definition: unicodetext.h:252

UnicodeText::const_iterator::iterator_category
std::bidirectional_iterator_tag iterator_category
Definition: unicodetext.h:185

UnicodeText::const_iterator::operator++
const_iterator & operator++()
Definition: unicodetext.cc:443

UnicodeText::const_iterator::operator--
const_iterator & operator--()
Definition: unicodetext.cc:448

UnicodeText::const_iterator::reference
const char32 reference
Definition: unicodetext.h:189

UnicodeText::const_iterator::difference_type
ptrdiff_t difference_type
Definition: unicodetext.h:187

UnicodeText::const_iterator::get_utf8
int get_utf8(char *buf) const
Definition: unicodetext.cc:454

UnicodeText::const_iterator::operator=
const_iterator & operator=(const const_iterator &other)
Definition: unicodetext.cc:402

UnicodeText::const_iterator::operator>=
friend bool operator>=(const CI &lhs, const CI &rhs)
Definition: unicodetext.h:228

UnicodeText::const_iterator::pointer
void pointer
Definition: unicodetext.h:188

UnicodeText::const_iterator::operator*
char32 operator*() const
Definition: unicodetext.cc:420

UnicodeText::const_iterator::utf8_length
int utf8_length() const
Definition: unicodetext.cc:472

UnicodeText::const_iterator::get_utf8_string
string get_utf8_string() const
Definition: unicodetext.cc:468

UnicodeText::const_iterator::value_type
char32 value_type
Definition: unicodetext.h:186

UnicodeText::const_iterator::utf8_data
const char * utf8_data() const
Definition: unicodetext.h:244

UnicodeText::const_iterator::operator!=
friend bool operator!=(const CI &lhs, const CI &rhs)
Definition: unicodetext.h:218

UnicodeText::const_iterator::operator<
friend bool operator<(const CI &lhs, const CI &rhs)
Definition: unicodetext.cc:416

UnicodeText::const_iterator::operator==
friend bool operator==(const CI &lhs, const CI &rhs)
Definition: unicodetext.h:215

UnicodeText::const_reverse_iterator
Definition: unicodetext.h:262

UnicodeText::const_reverse_iterator::utf8_data
const char * utf8_data() const
Definition: unicodetext.h:266

UnicodeText::const_reverse_iterator::const_reverse_iterator
const_reverse_iterator(const_iterator it)
Definition: unicodetext.h:264

UnicodeText::const_reverse_iterator::get_utf8_string
string get_utf8_string() const
Definition: unicodetext.h:274

UnicodeText::const_reverse_iterator::utf8_length
int utf8_length() const
Definition: unicodetext.h:278

UnicodeText::const_reverse_iterator::get_utf8
int get_utf8(char *buf) const
Definition: unicodetext.h:270