37static const char kSeparator =
'|';
39static const char kNaturalFlag =
'n';
41static const int ISALPHA_MASK = 0x1;
42static const int ISLOWER_MASK = 0x2;
43static const int ISUPPER_MASK = 0x4;
44static const int ISDIGIT_MASK = 0x8;
45static const int ISPUNCTUATION_MASK = 0x10;
50static const int kMeanlineThreshold = 220;
72const char *UNICHARSET::kCleanupMaps[][2] = {
80 " ",
"Joined",
"|Broken|0|1"};
82const char *UNICHARSET::null_script =
"NULL";
84UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() {
89void UNICHARSET::UNICHAR_PROPERTIES::Init() {
94 ispunctuation =
false;
108void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
110 max_bottom = UINT8_MAX;
122void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
123 min_bottom = UINT8_MAX;
137bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty()
const {
138 return width == 0.0f || advance == 0.0f;
142void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
143 const UNICHAR_PROPERTIES &src) {
144 UpdateRange(src.min_bottom, &min_bottom, &max_bottom);
145 UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
148 if (src.width_sd > width_sd) {
150 width_sd = src.width_sd;
152 if (src.bearing_sd > bearing_sd) {
153 bearing = src.bearing;
154 bearing_sd = src.bearing_sd;
156 if (src.advance_sd > advance_sd) {
157 advance = src.advance;
158 advance_sd = src.advance_sd;
163void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(
const UNICHAR_PROPERTIES &src) {
165 CHAR_FRAGMENT *saved_fragment = fragment;
167 fragment = saved_fragment;
171 : ids(), script_table(nullptr), script_table_size_used(0) {
187 std::string cleaned =
188 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
189 return ids.
contains(cleaned.data(), cleaned.size())
191 : INVALID_UNICHAR_ID;
197 std::string cleaned(unichar_repr, length);
198 if (!old_style_included_) {
201 return ids.
contains(cleaned.data(), cleaned.size())
203 : INVALID_UNICHAR_ID;
212 std::vector<UNICHAR_ID> encoding;
213 std::vector<char> lengths;
215 if (encoding.empty() || encoding[0] == INVALID_UNICHAR_ID) {
225 unsigned *first_bad_position)
const {
226 std::vector<UNICHAR_ID> encoding;
227 return encode_string(str,
true, &encoding,
nullptr, first_bad_position);
240 std::vector<UNICHAR_ID> *encoding,
241 std::vector<char> *lengths,
242 unsigned *encoded_length)
const {
243 std::vector<UNICHAR_ID> working_encoding;
244 std::vector<char> working_lengths;
245 std::vector<char> best_lengths;
247 auto str_length = strlen(str);
248 unsigned str_pos = 0;
250 while (str_pos < str_length) {
251 encode_string(str, str_pos, str_length, &working_encoding, &working_lengths,
252 &str_pos, encoding, &best_lengths);
253 if (str_pos < str_length) {
256 if (give_up_on_failure) {
263 encoding->push_back(INVALID_UNICHAR_ID);
264 best_lengths.push_back(
step);
266 working_encoding = *encoding;
267 working_lengths = best_lengths;
270 if (lengths !=
nullptr) {
271 *lengths = best_lengths;
273 if (encoded_length !=
nullptr) {
274 *encoded_length = str_pos;
280 if (
id == INVALID_UNICHAR_ID) {
281 return INVALID_UNICHAR;
284 return unichars[id].representation;
288 if (
id == INVALID_UNICHAR_ID) {
289 return INVALID_UNICHAR;
302 return unichars[id].representation;
308 std::string result = str;
312 for (
int i = 0; str[
i] !=
'\0';
i +=
step) {
313 char hex[
sizeof(int) * 2 + 1];
317 snprintf(hex,
sizeof(hex),
"%x", str[
i]);
320 snprintf(hex,
sizeof(hex),
"%x",
ch.first_uni());
332 if (
id == INVALID_UNICHAR_ID) {
365 unichars[unichar_id].properties.normed_ids.clear();
367 unichars[unichar_id].properties.normed_ids.push_back(
UNICHAR_SPACE);
368 }
else if (!
encode_string(unichars[unichar_id].properties.normed.c_str(),
369 true, &unichars[unichar_id].properties.normed_ids,
371 unichars[unichar_id].properties.normed_ids.clear();
372 unichars[unichar_id].properties.normed_ids.push_back(unichar_id);
382 return (uni >= 0xE000 && uni <= 0xF8FF);
387 for (
auto &uc : unichars) {
388 uc.properties.SetRangesEmpty();
397 for (
unsigned ch = start_index;
ch < unichars.size(); ++
ch) {
399 UNICHAR_PROPERTIES properties;
400 if (src.GetStrProperties(utf8, &properties)) {
404 const char *other_case = src.
id_to_unichar(properties.other_case);
408 properties.other_case =
ch;
410 const char *mirror_str = src.
id_to_unichar(properties.mirror);
414 properties.mirror =
ch;
416 unichars[
ch].properties.CopyFrom(properties);
426 for (
unsigned ch = 0;
ch < unichars.size(); ++
ch) {
428 UNICHAR_PROPERTIES properties;
429 if (src.GetStrProperties(utf8, &properties)) {
431 unichars[
ch].properties.ExpandRangesFrom(properties);
440 for (
unsigned ch = 0;
ch < src.unichars.size(); ++
ch) {
441 const UNICHAR_PROPERTIES &src_props = src.unichars[
ch].properties;
444 unichars[
ch].properties.ExpandRangesFrom(src_props);
455 int initial_used = unichars.size();
456 for (
unsigned ch = 0;
ch < src.unichars.size(); ++
ch) {
457 const UNICHAR_PROPERTIES &src_props = src.unichars[
ch].properties;
459 int id = unichars.size();
463 unichars[id].properties.ExpandRangesFrom(src_props);
466 unichars[id].properties.SetRangesEmpty();
477 int overlap = std::min(unichars[id1].properties.max_top,
478 unichars[id2].properties.max_top) -
479 std::max(unichars[id1].properties.min_top,
480 unichars[id2].properties.min_top);
493 std::vector<UNICHAR_ID> *encoding,
494 std::vector<char> *lengths,
495 unsigned *best_total_length,
496 std::vector<UNICHAR_ID> *best_encoding,
497 std::vector<char> *best_lengths)
const {
498 if (str_index >
static_cast<int>(*best_total_length)) {
500 *best_total_length = str_index;
501 *best_encoding = *encoding;
502 if (best_lengths !=
nullptr) {
503 *best_lengths = *lengths;
506 if (str_index == str_length) {
509 int encoding_index = encoding->size();
511 int length = ids.
minmatch(str + str_index);
512 if (length == 0 || str_index + length > str_length) {
516 if (ids.
contains(str + str_index, length)) {
519 encoding->push_back(
id);
520 lengths->push_back(length);
521 encode_string(str, str_index + length, str_length, encoding, lengths,
522 best_total_length, best_encoding, best_lengths);
523 if (
static_cast<int>(*best_total_length) == str_length) {
527 encoding->resize(encoding_index);
528 lengths->resize(encoding_index);
535 }
while (length <=
UNICHAR_LEN && str_index + length <= str_length);
543bool UNICHARSET::GetStrProperties(
const char *utf8_str,
544 UNICHAR_PROPERTIES *props)
const {
546 props->SetRangesEmpty();
547 int total_unicodes = 0;
548 std::vector<UNICHAR_ID> encoding;
549 if (!
encode_string(utf8_str,
true, &encoding,
nullptr,
nullptr)) {
552 for (
auto it : encoding) {
554 const UNICHAR_PROPERTIES &src_props = unichars[id].properties;
556 if (src_props.isalpha) {
557 props->isalpha =
true;
559 if (src_props.islower) {
560 props->islower =
true;
562 if (src_props.isupper) {
563 props->isupper =
true;
565 if (src_props.isdigit) {
566 props->isdigit =
true;
568 if (src_props.ispunctuation) {
569 props->ispunctuation =
true;
571 if (src_props.isngram) {
572 props->isngram =
true;
574 if (src_props.enabled) {
575 props->enabled =
true;
578 UpdateRange(src_props.min_bottom, &props->min_bottom, &props->max_bottom);
579 UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
580 UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
581 UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
582 float bearing = props->advance + src_props.bearing;
583 if (total_unicodes == 0 || bearing < props->bearing) {
584 props->bearing = bearing;
585 props->bearing_sd = props->advance_sd + src_props.bearing_sd;
587 props->advance += src_props.advance;
588 props->advance_sd += src_props.advance_sd;
590 props->width = src_props.width;
591 props->width_sd = src_props.width_sd;
594 if (total_unicodes == 0) {
595 props->script_id = src_props.script_id;
596 props->other_case = src_props.other_case;
597 props->mirror = src_props.mirror;
598 props->direction = src_props.direction;
602 props->normed += src_props.normed;
605 if (total_unicodes > 1) {
607 props->width = props->advance - props->bearing;
608 props->width_sd = props->advance_sd + props->bearing_sd;
610 return total_unicodes > 0;
616 unsigned int properties = 0;
618 properties |= ISALPHA_MASK;
621 properties |= ISLOWER_MASK;
624 properties |= ISUPPER_MASK;
627 properties |= ISDIGIT_MASK;
630 properties |= ISPUNCTUATION_MASK;
657 old_style_included_ =
true;
659 std::string cleaned =
660 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
661 if (!cleaned.empty() && !ids.
contains(cleaned.data(), cleaned.size())) {
662 const char *str = cleaned.c_str();
663 std::vector<int> encoding;
664 if (!old_style_included_ &&
668 unichars.emplace_back();
669 auto &u = unichars.back();
673 fprintf(stderr,
"Utf8 buffer too big, size>%d for %s\n",
UNICHAR_LEN,
677 u.representation[index++] = *str++;
678 }
while (*str !=
'\0');
679 u.representation[index] =
'\0';
680 this->
set_script(unichars.size() - 1, null_script);
686 u.properties.fragment = frag;
690 u.properties.enabled =
true;
691 ids.
insert(u.representation, unichars.size() - 1);
696 std::string cleaned =
697 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
698 return ids.
contains(cleaned.data(), cleaned.size());
706 std::string cleaned(unichar_repr, length);
707 if (!old_style_included_) {
710 return ids.
contains(cleaned.data(), cleaned.size());
714 const char *
const unichar_repr)
const {
715 return strcmp(this->
id_to_unichar(unichar_id), unichar_repr) == 0;
719 const int kFileBufSize = 1024;
720 char buffer[kFileBufSize + 1];
721 snprintf(buffer, kFileBufSize,
"%zu\n", this->
size());
723 for (
unsigned id = 0;
id < this->
size(); ++id) {
724 int min_bottom, max_bottom, min_top, max_top;
726 float width, width_sd;
728 float bearing, bearing_sd;
730 float advance, advance_sd;
734 snprintf(buffer, kFileBufSize,
"%s %x %s %d\n",
"NULL", properties,
739 std::ostringstream stream;
740 stream.imbue(std::locale::classic());
741 stream << this->
id_to_unichar(
id) <<
' ' << properties <<
' '
742 << min_bottom <<
',' << max_bottom <<
',' << min_top <<
','
743 << max_top <<
',' << width <<
',' << width_sd <<
',' << bearing
744 <<
',' << bearing_sd <<
',' << advance <<
',' << advance_sd <<
' '
750 str += stream.str().c_str();
760 return ::fgets(dst, size, fp_);
769 using namespace std::placeholders;
770 std::function<
char *(
char *, int)> fgets_cb =
772 bool success = load_via_fgets(fgets_cb, skip_fragments);
777 using namespace std::placeholders;
778 std::function<
char *(
char *, int)> fgets_cb =
780 bool success = load_via_fgets(fgets_cb, skip_fragments);
784bool UNICHARSET::load_via_fgets(
785 const std::function<
char *(
char *,
int)> &fgets_cb,
bool skip_fragments) {
790 if (fgets_cb(buffer,
sizeof(buffer)) ==
nullptr ||
791 sscanf(buffer,
"%d", &unicharset_size) != 1) {
794 for (
UNICHAR_ID id = 0;
id < unicharset_size; ++id) {
796 unsigned int properties;
799 strncpy(script, null_script,
sizeof(script) - 1);
801 int max_bottom = UINT8_MAX;
803 int max_top = UINT8_MAX;
805 float width_sd = 0.0f;
806 float bearing = 0.0f;
807 float bearing_sd = 0.0f;
808 float advance = 0.0f;
809 float advance_sd = 0.0f;
815 if (fgets_cb(buffer,
sizeof(buffer)) ==
nullptr) {
820 std::istringstream stream(buffer);
821 stream.imbue(std::locale::classic());
824 stream >> std::setw(255) >> unichar >> std::hex >> properties >> std::dec;
827 fprintf(stderr,
"%s:%u failed\n", __FILE__, __LINE__);
830 auto position = stream.tellg();
831 stream.seekg(position);
832 char c1, c2, c3, c4, c5, c6, c7, c8, c9;
833 stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
834 max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
835 bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
836 script >> other_case >> direction >> mirror >> std::setw(63) >> normed;
837 if (stream.fail() || c1 !=
',' || c2 !=
',' || c3 !=
',' || c4 !=
',' ||
838 c5 !=
',' || c6 !=
',' || c7 !=
',' || c8 !=
',' || c9 !=
',') {
840 stream.seekg(position);
841 stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
842 max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
843 bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
844 script >> other_case >> direction >> mirror;
845 if (stream.fail() || c1 !=
',' || c2 !=
',' || c3 !=
',' || c4 !=
',' ||
846 c5 !=
',' || c6 !=
',' || c7 !=
',' || c8 !=
',' || c9 !=
',') {
848 stream.seekg(position);
849 stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
850 max_top >> std::setw(63) >> script >> other_case >> direction >>
852 if (stream.fail() || c1 !=
',' || c2 !=
',' || c3 !=
',') {
854 stream.seekg(position);
855 stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
856 max_top >> std::setw(63) >> script >> other_case;
857 if (stream.fail() || c1 !=
',' || c2 !=
',' || c3 !=
',') {
859 stream.seekg(position);
860 stream >> std::setw(63) >> script >> other_case;
863 stream.seekg(position);
864 stream >> std::setw(63) >> script;
872 CHAR_FRAGMENT *frag =
nullptr;
874 int num_pieces = frag->get_total();
877 if (num_pieces > 1) {
882 if (strcmp(unichar,
"NULL") == 0) {
895 this->unichars[id].properties.enabled =
true;
896 this->
set_top_bottom(
id, min_bottom, max_bottom, min_top, max_top);
901 this->
set_other_case(
id, (other_case < unicharset_size) ? other_case :
id);
902 this->
set_mirror(
id, (mirror < unicharset_size) ? mirror :
id);
903 this->
set_normed(
id, normed[0] !=
'\0' ? normed : unichar);
915 int net_case_alphas = 0;
916 int x_height_alphas = 0;
917 int cap_height_alphas = 0;
918 top_bottom_set_ =
false;
919 for (
unsigned id = 0;
id < unichars.size(); ++id) {
921 int max_bottom = UINT8_MAX;
923 int max_top = UINT8_MAX;
926 top_bottom_set_ =
true;
934 if (min_top < kMeanlineThreshold && max_top < kMeanlineThreshold) {
936 }
else if (min_top > kMeanlineThreshold && max_top > kMeanlineThreshold) {
943 script_has_upper_lower_ = net_case_alphas > 0;
944 script_has_xheight_ =
945 script_has_upper_lower_ ||
963 int *script_counts =
new int[script_table_size_used];
964 memset(script_counts, 0,
sizeof(*script_counts) * script_table_size_used);
965 for (
unsigned id = 0;
id < unichars.size(); ++id) {
971 for (
int s = 1; s < script_table_size_used; ++s) {
972 if (script_counts[s] > script_counts[default_sid_] && s != common_sid_) {
976 delete[] script_counts;
986 for (
unsigned id = 0;
id < unichars.size(); ++id) {
997 return rtl_count > ltr_count;
1005 const char *whitelist,
1006 const char *unblacklist) {
1007 bool def_enabled = whitelist ==
nullptr || whitelist[0] ==
'\0';
1009 for (
auto &uc : unichars) {
1010 uc.properties.enabled = def_enabled;
1014 std::vector<UNICHAR_ID> encoding;
1015 encode_string(whitelist,
false, &encoding,
nullptr,
nullptr);
1016 for (
auto it : encoding) {
1017 if (it != INVALID_UNICHAR_ID) {
1018 unichars[it].properties.enabled =
true;
1022 if (blacklist !=
nullptr && blacklist[0] !=
'\0') {
1024 std::vector<UNICHAR_ID> encoding;
1025 encode_string(blacklist,
false, &encoding,
nullptr,
nullptr);
1026 for (
auto it : encoding) {
1027 if (it != INVALID_UNICHAR_ID) {
1028 unichars[it].properties.enabled =
false;
1032 if (unblacklist !=
nullptr && unblacklist[0] !=
'\0') {
1034 std::vector<UNICHAR_ID> encoding;
1035 encode_string(unblacklist,
false, &encoding,
nullptr,
nullptr);
1036 for (
auto it : encoding) {
1037 if (it != INVALID_UNICHAR_ID) {
1038 unichars[it].properties.enabled =
true;
1051 for (
unsigned id = start_id;
id < unichars.size(); ++id) {
1054 for (
size_t u = 1; u < unicodes.size(); ++u) {
1055 if (unicodes[u - 1] == unicodes[u]) {
1064 for (
int i = 0;
i < script_table_size_used; ++
i) {
1065 if (strcmp(script, script_table[
i]) == 0) {
1069 if (script_table_size_reserved == 0) {
1070 script_table_size_reserved = 8;
1071 script_table =
new char *[script_table_size_reserved];
1072 }
else if (script_table_size_used >= script_table_size_reserved) {
1073 assert(script_table_size_used == script_table_size_reserved);
1074 script_table_size_reserved += script_table_size_reserved;
1075 char **new_script_table =
new char *[script_table_size_reserved];
1076 memcpy(new_script_table, script_table,
1077 script_table_size_used *
sizeof(
char *));
1078 delete[] script_table;
1079 script_table = new_script_table;
1081 script_table[script_table_size_used] =
new char[strlen(script) + 1];
1082 strcpy(script_table[script_table_size_used], script);
1083 return script_table_size_used++;
1091 return std::string(unichar);
1094 result += kSeparator;
1097 snprintf(buffer,
kMaxLen,
"%c%d%c%d", kSeparator, pos,
1098 natural ? kNaturalFlag : kSeparator, total);
1104 const char *ptr = string;
1105 int len = strlen(
string);
1106 if (len <
kMinLen || *ptr != kSeparator) {
1111 while ((ptr + step) < (
string + len) && *(ptr + step) != kSeparator) {
1118 strncpy(unichar, ptr, step);
1119 unichar[step] =
'\0';
1123 bool natural =
false;
1124 char *end_ptr =
nullptr;
1125 for (
int i = 0;
i < 2;
i++) {
1126 if (ptr >
string + len || *ptr != kSeparator) {
1127 if (
i == 1 && *ptr == kNaturalFlag) {
1134 i == 0 ? pos =
static_cast<int>(strtol(ptr, &end_ptr, 10))
1135 : total =
static_cast<int>(strtol(ptr, &end_ptr, 10));
1138 if (ptr !=
string + len) {
1142 fragment->set_all(unichar, pos, total, natural);
1147 for (
int i = 0;
i < script_table_size_used; ++
i) {
1148 if (strcmp(script_name, script_table[
i]) == 0) {
1160 result.reserve(length);
1162 while ((
ch = *utf8_str) !=
'\0' && length-- > 0) {
1165 while ((key = kCleanupMaps[key_index][0]) !=
nullptr) {
1167 while (key[match] !=
'\0' && key[match] == utf8_str[match]) {
1170 if (key[match] ==
'\0') {
1176 if (key ==
nullptr) {
1177 result.push_back(
ch);
1180 result.append(kCleanupMaps[key_index][1]);
@ SPECIAL_UNICHAR_CODES_COUNT
const double kMinCapHeightFraction
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
const double kMinXHeightFraction
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
static int utf8_step(const char *utf8_str)
char * FGets(char *buffer, int buffer_size)
bool contains(const char *const unichar_repr, int length) const
void insert(const char *const unichar_repr, UNICHAR_ID id)
UNICHAR_ID unichar_to_id(const char *const unichar_repr, int length) const
int minmatch(const char *const unichar_repr) const
LocalFilePointer(FILE *stream)
char * fgets(char *dst, int size)
static CHAR_FRAGMENT * parse_from_string(const char *str)
static std::string to_string(const char *unichar, int pos, int total, bool natural)
std::string to_string() const
const char * get_unichar() const
bool get_isprivate(UNICHAR_ID unichar_id) const
bool encode_string(const char *str, bool give_up_on_failure, std::vector< UNICHAR_ID > *encoding, std::vector< char > *lengths, unsigned *encoded_length) const
void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror)
void set_script(UNICHAR_ID unichar_id, const char *value)
const char * get_script_from_script_id(int id) const
int get_script(UNICHAR_ID unichar_id) const
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
static std::string debug_utf8_str(const char *str)
bool SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
bool get_isalpha(UNICHAR_ID unichar_id) const
bool has_special_codes() const
Direction get_direction(UNICHAR_ID unichar_id) const
void set_isupper(UNICHAR_ID unichar_id, bool value)
void set_normed(UNICHAR_ID unichar_id, const char *normed)
unsigned int get_properties(UNICHAR_ID unichar_id) const
void get_advance_stats(UNICHAR_ID unichar_id, float *advance, float *advance_sd) const
bool get_islower(UNICHAR_ID unichar_id) const
void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd)
bool encodable_string(const char *str, unsigned *first_bad_position) const
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)
bool load_from_file(const char *const filename, bool skip_fragments)
void set_black_and_whitelist(const char *blacklist, const char *whitelist, const char *unblacklist)
void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value)
void ExpandRangesFromOther(const UNICHARSET &src)
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
int get_script_id_from_name(const char *script_name) const
bool AnyRepeatedUnicodes() const
const char * id_to_unichar(UNICHAR_ID id) const
bool contains_unichar(const char *const unichar_repr) const
bool major_right_to_left() const
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
void CopyFrom(const UNICHARSET &src)
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
char get_chartype(UNICHAR_ID unichar_id) const
void set_isalpha(UNICHAR_ID unichar_id, bool value)
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
int step(const char *str) const
int add_script(const char *script)
bool get_isupper(UNICHAR_ID unichar_id) const
void unichar_insert_backwards_compatible(const char *const unichar_repr)
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
bool get_isdigit(UNICHAR_ID unichar_id) const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
static const char * kCustomLigatures[][2]
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
void set_islower(UNICHAR_ID unichar_id, bool value)
bool get_ispunctuation(UNICHAR_ID unichar_id) const
void AppendOtherUnicharset(const UNICHARSET &src)
bool save_to_string(std::string &str) const
static const char * kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]
void set_advance_stats(UNICHAR_ID unichar_id, float advance, float advance_sd)
std::string debug_str(UNICHAR_ID id) const
void get_width_stats(UNICHAR_ID unichar_id, float *width, float *width_sd) const
void set_isdigit(UNICHAR_ID unichar_id, bool value)
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
static std::string CleanupString(const char *utf8_str)
void set_normed_ids(UNICHAR_ID unichar_id)
void PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src)
void set_isngram(UNICHAR_ID unichar_id, bool value)
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, float bearing_sd)
const char * id_to_unichar_ext(UNICHAR_ID id) const