Functions
bool	IsInterchangeValid (char32 c)

int	SpanInterchangeValid (const char *begin, int byte_length)

int	SpanInterchangeValid (const std::string &src)

bool	IsInterchangeValid (const char *src, int byte_length)

bool	IsInterchangeValid (const std::string &src)

bool	IsValidCodepoint (char32 c)

int	OneCharLen (const char *src)

bool	IsTrailByte (char x)

Detailed Description

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Function Documentation

◆ IsInterchangeValid() [1/3]

bool UniLib::IsInterchangeValid ( char32 c )

Definition at line 33 of file unilib.cc.

                                  {
  return !((c >= 0x00 && c <= 0x08) || c == 0x0B || (c >= 0x0E && c <= 0x1F) ||
           (c >= 0x7F && c <= 0x9F) || (c >= 0xD800 && c <= 0xDFFF) ||
           (c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE);
}

◆ IsInterchangeValid() [2/3]

bool UniLib::IsInterchangeValid	(	const char *	src,
		int	byte_length
	)

inline

Definition at line 54 of file unilib.h.

                                                                 {
  return (byte_length == SpanInterchangeValid(src, byte_length));
}

◆ IsInterchangeValid() [3/3]

bool UniLib::IsInterchangeValid ( const std::string & src )

inline

Definition at line 57 of file unilib.h.

                                                     {
  return IsInterchangeValid(src.data(), src.size());
}

◆ IsTrailByte()

bool UniLib::IsTrailByte ( char x )

inline

Definition at line 58 of file unilib_utf8_utils.h.

                                {
  // return (x & 0xC0) == 0x80;
  // Since trail bytes are always in [0x80, 0xBF], we can optimize:
  return static_cast<signed char>(x) < -0x40;
}

◆ IsValidCodepoint()

bool UniLib::IsValidCodepoint ( char32 c )

inline

Definition at line 31 of file unilib_utf8_utils.h.

                                       {
  return (static_cast<uint32_t>(c) < 0xD800) || (c >= 0xE000 && c <= 0x10FFFF);
}

◆ OneCharLen()

int UniLib::OneCharLen ( const char * src )

inline

Definition at line 53 of file unilib_utf8_utils.h.

                                       {
  return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[(*src & 0xFF) >> 4];
}

◆ SpanInterchangeValid() [1/2]

int UniLib::SpanInterchangeValid	(	const char *	begin,
		int	byte_length
	)

Definition at line 39 of file unilib.cc.

                                                             {
  char32 rune;
  const char *p = begin;
  const char *end = begin + byte_length;
  while (p < end) {
    int bytes_consumed = charntorune(&rune, p, end - p);
    // We want to accept Runeerror == U+FFFD as a valid char, but it is used
    // by chartorune to indicate error. Luckily, the real codepoint is size 3
    // while errors return bytes_consumed <= 1.
    if ((rune == Runeerror && bytes_consumed <= 1) || !IsInterchangeValid(rune)) {
      break; // Found
    }
    p += bytes_consumed;
  }
  return p - begin;
}

◆ SpanInterchangeValid() [2/2]

int UniLib::SpanInterchangeValid ( const std::string & src )

inline

Definition at line 46 of file unilib.h.

                                                      {
  return SpanInterchangeValid(src.data(), src.size());
}

Functions

Detailed Description

Function Documentation

◆ IsInterchangeValid() [1/3]

◆ IsInterchangeValid() [2/3]

◆ IsInterchangeValid() [3/3]

◆ IsTrailByte()

◆ IsValidCodepoint()

◆ OneCharLen()

◆ SpanInterchangeValid() [1/2]

◆ SpanInterchangeValid() [2/2]