37 static const char kSeparator =
'|';
39 static const char kNaturalFlag =
'n';
41 static const int ISALPHA_MASK = 0x1;
42 static const int ISLOWER_MASK = 0x2;
43 static const int ISUPPER_MASK = 0x4;
44 static const int ISDIGIT_MASK = 0x8;
45 static const int ISPUNCTUATION_MASK = 0x10;
50 static const int kMeanlineThreshold = 220;
73 const char* UNICHARSET::kCleanupMaps[][2] = {
86 UNICHARSET::UNICHAR_PROPERTIES::UNICHAR_PROPERTIES() {
91 void UNICHARSET::UNICHAR_PROPERTIES::Init() {
96 ispunctuation =
false;
110 void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
124 void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
139 bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty()
const {
140 return width == 0.0f || advance == 0.0f;
144 void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
145 const UNICHAR_PROPERTIES& src) {
146 UpdateRange(src.min_bottom, &min_bottom, &max_bottom);
147 UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
150 if (src.width_sd > width_sd) {
152 width_sd = src.width_sd;
154 if (src.bearing_sd > bearing_sd) {
155 bearing = src.bearing;
156 bearing_sd = src.bearing_sd;
158 if (src.advance_sd > advance_sd) {
159 advance = src.advance;
160 advance_sd = src.advance_sd;
165 void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(
const UNICHAR_PROPERTIES& src) {
169 fragment = saved_fragment;
178 script_table_size_used(0),
179 null_script(
"NULL") {
193 if (unichars_number > size_reserved) {
194 UNICHAR_SLOT* unichars_new =
new UNICHAR_SLOT[unichars_number];
195 for (
int i = 0; i < size_used; ++i)
196 unichars_new[i] = unichars[i];
197 for (
int j = size_used; j < unichars_number; ++j) {
198 unichars_new[j].properties.script_id =
add_script(null_script);
201 unichars = unichars_new;
202 size_reserved = unichars_number;
209 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
210 return ids.
contains(cleaned.data(), cleaned.size())
212 : INVALID_UNICHAR_ID;
218 string cleaned(unichar_repr, length);
219 if (!old_style_included_) cleaned =
CleanupString(unichar_repr, length);
220 return ids.
contains(cleaned.data(), cleaned.size())
222 : INVALID_UNICHAR_ID;
234 if (encoding.
empty() || encoding[0] == INVALID_UNICHAR_ID)
return 0;
242 int *first_bad_position)
const {
244 return encode_string(str,
true, &encoding, NULL, first_bad_position);
259 int* encoded_length)
const {
264 int str_length = strlen(str);
267 while (str_pos < str_length) {
268 encode_string(str, str_pos, str_length, &working_encoding, &working_lengths,
269 &str_pos, encoding, &best_lengths);
270 if (str_pos < str_length) {
273 if (give_up_on_failure)
break;
274 int step = UNICHAR::utf8_step(str + str_pos);
275 if (step == 0) step = 1;
279 working_encoding = *encoding;
280 working_lengths = best_lengths;
283 if (lengths != NULL) *lengths = best_lengths;
284 if (encoded_length != NULL) *encoded_length = str_pos;
289 if (
id == INVALID_UNICHAR_ID) {
290 return INVALID_UNICHAR;
293 return unichars[id].representation;
297 if (
id == INVALID_UNICHAR_ID) {
298 return INVALID_UNICHAR;
311 return unichars[id].representation;
321 for (
int i = 0; str[i] !=
'\0'; i +=
step) {
322 char hex[
sizeof(int) * 2 + 1];
323 step = UNICHAR::utf8_step(str + i);
326 sprintf(hex,
"%x", str[i]);
371 unichars[unichar_id].properties.normed_ids.truncate(0);
373 unichars[unichar_id].properties.normed_ids.push_back(
UNICHAR_SPACE);
374 }
else if (!
encode_string(unichars[unichar_id].properties.normed.string(),
375 true, &unichars[unichar_id].properties.normed_ids,
377 unichars[unichar_id].properties.normed_ids.truncate(0);
378 unichars[unichar_id].properties.normed_ids.push_back(unichar_id);
388 return (uni >= 0xE000 && uni <= 0xF8FF);
394 for (
int id = 0;
id < size_used; ++id) {
395 unichars[id].properties.SetRangesEmpty();
404 for (
int ch = start_index; ch < size_used; ++ch) {
406 UNICHAR_PROPERTIES properties;
407 if (src.GetStrProperties(utf8, &properties)) {
411 const char* other_case = src.
id_to_unichar(properties.other_case);
415 properties.other_case = ch;
417 const char* mirror_str = src.
id_to_unichar(properties.mirror);
421 properties.mirror = ch;
423 unichars[ch].properties.CopyFrom(properties);
433 for (
int ch = 0; ch < size_used; ++ch) {
435 UNICHAR_PROPERTIES properties;
436 if (src.GetStrProperties(utf8, &properties)) {
438 unichars[ch].properties.ExpandRangesFrom(properties);
447 for (
int ch = 0; ch < src.size_used; ++ch) {
448 const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
451 unichars[ch].properties.ExpandRangesFrom(src_props);
462 int initial_used = size_used;
463 for (
int ch = 0; ch < src.size_used; ++ch) {
464 const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
470 unichars[id].properties.ExpandRangesFrom(src_props);
473 unichars[id].properties.SetRangesEmpty();
484 int overlap =
MIN(unichars[id1].properties.max_top,
485 unichars[id2].properties.max_top) -
486 MAX(unichars[id1].properties.min_top,
487 unichars[id2].properties.min_top);
502 int* best_total_length,
505 if (str_index > *best_total_length) {
507 *best_total_length = str_index;
508 *best_encoding = *encoding;
509 if (best_lengths != NULL)
510 *best_lengths = *lengths;
512 if (str_index == str_length)
return;
513 int encoding_index = encoding->
size();
515 int length = ids.
minmatch(str + str_index);
516 if (length == 0 || str_index + length > str_length)
return;
518 if (ids.
contains(str + str_index, length)) {
523 encode_string(str, str_index + length, str_length, encoding, lengths,
524 best_total_length, best_encoding, best_lengths);
525 if (*best_total_length == str_length)
531 int step = UNICHAR::utf8_step(str + str_index + length);
532 if (step == 0) step = 1;
534 }
while (length <=
UNICHAR_LEN && str_index + length <= str_length);
542 bool UNICHARSET::GetStrProperties(
const char* utf8_str,
543 UNICHAR_PROPERTIES* props)
const {
545 props->SetRangesEmpty();
546 int total_unicodes = 0;
550 for (
int i = 0; i < encoding.
size(); ++i) {
551 int id = encoding[i];
552 const UNICHAR_PROPERTIES& src_props = unichars[id].properties;
554 if (src_props.isalpha) props->isalpha =
true;
555 if (src_props.islower) props->islower =
true;
556 if (src_props.isupper) props->isupper =
true;
557 if (src_props.isdigit) props->isdigit =
true;
558 if (src_props.ispunctuation) props->ispunctuation =
true;
559 if (src_props.isngram) props->isngram =
true;
560 if (src_props.enabled) props->enabled =
true;
562 UpdateRange(src_props.min_bottom, &props->min_bottom, &props->max_bottom);
563 UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
564 UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
565 UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
566 float bearing = props->advance + src_props.bearing;
567 if (total_unicodes == 0 || bearing < props->bearing) {
568 props->bearing = bearing;
569 props->bearing_sd = props->advance_sd + src_props.bearing_sd;
571 props->advance += src_props.advance;
572 props->advance_sd += src_props.advance_sd;
574 props->width = src_props.width;
575 props->width_sd = src_props.width_sd;
578 if (total_unicodes == 0) {
579 props->script_id = src_props.script_id;
580 props->other_case = src_props.other_case;
581 props->mirror = src_props.mirror;
582 props->direction = src_props.direction;
586 props->normed += src_props.normed;
589 if (total_unicodes > 1) {
591 props->width = props->advance - props->bearing;
592 props->width_sd = props->advance_sd + props->bearing_sd;
594 return total_unicodes > 0;
600 unsigned int properties = 0;
602 properties |= ISALPHA_MASK;
604 properties |= ISLOWER_MASK;
606 properties |= ISUPPER_MASK;
608 properties |= ISDIGIT_MASK;
610 properties |= ISPUNCTUATION_MASK;
627 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
628 if (!cleaned.empty() && !ids.
contains(cleaned.data(), cleaned.size())) {
629 const char* str = cleaned.c_str();
631 if (!old_style_included_ &&
634 if (size_used == size_reserved) {
643 fprintf(stderr,
"Utf8 buffer too big, size>%d for %s\n",
UNICHAR_LEN,
647 unichars[size_used].representation[index++] = *str++;
648 }
while (*str !=
'\0');
649 unichars[size_used].representation[index] =
'\0';
657 this->unichars[size_used].properties.fragment = frag;
659 this->unichars[size_used].properties.script_id =
662 this->unichars[size_used].properties.enabled =
true;
663 ids.
insert(unichars[size_used].representation, size_used);
670 old_style_included_ ? unichar_repr :
CleanupString(unichar_repr);
671 return ids.
contains(cleaned.data(), cleaned.size());
679 string cleaned(unichar_repr, length);
680 if (!old_style_included_) cleaned =
CleanupString(unichar_repr, length);
681 return ids.
contains(cleaned.data(), cleaned.size());
685 const char*
const unichar_repr)
const {
686 return strcmp(this->
id_to_unichar(unichar_id), unichar_repr) == 0;
690 const int kFileBufSize = 1024;
691 char buffer[kFileBufSize + 1];
692 snprintf(buffer, kFileBufSize,
"%d\n", this->
size());
695 int min_bottom, max_bottom, min_top, max_top;
697 float width, width_sd;
699 float bearing, bearing_sd;
701 float advance, advance_sd;
705 snprintf(buffer, kFileBufSize,
"%s %x %s %d\n",
"NULL", properties,
709 snprintf(buffer, kFileBufSize,
710 "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
712 min_bottom, max_bottom, min_top, max_top, width, width_sd,
713 bearing, bearing_sd, advance, advance_sd,
728 : memory_(memory), fgets_ptr_(memory), mem_size_(mem_size) { }
731 const char *src_end = memory_ + mem_size_;
732 char *dst_end = orig_dst + size - 1;
734 return fgets_ptr_ < src_end ? orig_dst : NULL;
737 char *dst = orig_dst;
739 while (fgets_ptr_ < src_end && dst < dst_end && ch !=
'\n') {
740 ch = *dst++ = *fgets_ptr_++;
743 return (dst == orig_dst) ? NULL : orig_dst;
748 const char *fgets_ptr_;
753 const char *memory,
int mem_size,
bool skip_fragments) {
757 bool success = load_via_fgets(fgets_cb, skip_fragments);
766 return ::fgets(dst, size, fp_);
776 bool success = load_via_fgets(fgets_cb, skip_fragments);
784 bool success = load_via_fgets(fgets_cb, skip_fragments);
789 bool UNICHARSET::load_via_fgets(
791 bool skip_fragments) {
796 if (fgets_cb->
Run(buffer,
sizeof(buffer)) == NULL ||
797 sscanf(buffer,
"%d", &unicharset_size) != 1) {
800 this->
reserve(unicharset_size);
801 for (
UNICHAR_ID id = 0;
id < unicharset_size; ++id) {
803 unsigned int properties;
806 strcpy(script, null_script);
812 float width_sd = 0.0f;
813 float bearing = 0.0f;
814 float bearing_sd = 0.0f;
815 float advance = 0.0f;
816 float advance_sd = 0.0f;
824 if (fgets_cb->
Run(buffer, sizeof (buffer)) == NULL ||
826 "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
827 unichar, &properties,
828 &min_bottom, &max_bottom, &min_top, &max_top,
829 &width, &width_sd, &bearing, &bearing_sd,
830 &advance, &advance_sd, script, &other_case,
831 &direction, &mirror, normed)) != 17 &&
833 "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
834 unichar, &properties,
835 &min_bottom, &max_bottom, &min_top, &max_top,
836 &width, &width_sd, &bearing, &bearing_sd,
837 &advance, &advance_sd, script, &other_case,
838 &direction, &mirror)) != 16 &&
839 (v = sscanf(buffer,
"%s %x %d,%d,%d,%d %63s %d %d %d",
840 unichar, &properties,
841 &min_bottom, &max_bottom, &min_top, &max_top,
842 script, &other_case, &direction, &mirror)) != 10 &&
843 (v = sscanf(buffer,
"%s %x %d,%d,%d,%d %63s %d", unichar, &properties,
844 &min_bottom, &max_bottom, &min_top, &max_top,
845 script, &other_case)) != 8 &&
846 (v = sscanf(buffer,
"%s %x %63s %d", unichar, &properties,
847 script, &other_case)) != 4 &&
848 (v = sscanf(buffer,
"%s %x %63s",
849 unichar, &properties, script)) != 3 &&
850 (v = sscanf(buffer,
"%s %x", unichar, &properties)) != 2)) {
864 if (strcmp(unichar,
"NULL") == 0)
876 this->unichars[id].properties.enabled =
true;
877 this->
set_top_bottom(
id, min_bottom, max_bottom, min_top, max_top);
881 this->
set_direction(
id, static_cast<UNICHARSET::Direction>(direction));
883 id, (v > 3 && other_case < unicharset_size) ? other_case :
id);
884 this->
set_mirror(
id, (v > 8 && mirror < unicharset_size) ? mirror :
id);
885 this->
set_normed(
id, (v>16) ? normed : unichar);
897 int net_case_alphas = 0;
898 int x_height_alphas = 0;
899 int cap_height_alphas = 0;
900 top_bottom_set_ =
false;
901 for (
UNICHAR_ID id = 0;
id < size_used; ++id) {
908 top_bottom_set_ =
true;
914 if (min_top < kMeanlineThreshold && max_top < kMeanlineThreshold)
916 else if (min_top > kMeanlineThreshold && max_top > kMeanlineThreshold)
922 script_has_upper_lower_ = net_case_alphas > 0;
923 script_has_xheight_ = script_has_upper_lower_ ||
941 int* script_counts =
new int[script_table_size_used];
942 memset(script_counts, 0,
sizeof(*script_counts) * script_table_size_used);
943 for (
int id = 0;
id < size_used; ++id) {
949 for (
int s = 1; s < script_table_size_used; ++s) {
950 if (script_counts[s] > script_counts[default_sid_] && s != common_sid_)
953 delete [] script_counts;
963 for (
int id = 0;
id < size_used; ++id) {
970 return rtl_count > ltr_count;
978 const char* whitelist,
979 const char* unblacklist) {
980 bool def_enabled = whitelist == NULL || whitelist[0] ==
'\0';
982 for (
int ch = 0; ch < size_used; ++ch)
983 unichars[ch].properties.enabled = def_enabled;
988 for (
int i = 0; i < encoding.
size(); ++i) {
989 if (encoding[i] != INVALID_UNICHAR_ID)
990 unichars[encoding[i]].properties.enabled =
true;
993 if (blacklist != NULL && blacklist[0] !=
'\0') {
997 for (
int i = 0; i < encoding.
size(); ++i) {
998 if (encoding[i] != INVALID_UNICHAR_ID)
999 unichars[encoding[i]].properties.enabled =
false;
1002 if (unblacklist != NULL && unblacklist[0] !=
'\0') {
1006 for (
int i = 0; i < encoding.
size(); ++i) {
1007 if (encoding[i] != INVALID_UNICHAR_ID)
1008 unichars[encoding[i]].properties.enabled =
true;
1018 for (
int id = start_id;
id < size_used; ++id) {
1021 for (
int u = 1; u < unicodes.size(); ++u) {
1022 if (unicodes[u - 1] == unicodes[u])
return true;
1029 for (
int i = 0; i < script_table_size_used; ++i) {
1030 if (strcmp(script, script_table[i]) == 0)
1033 if (script_table_size_reserved == 0) {
1034 script_table_size_reserved = 8;
1035 script_table =
new char*[script_table_size_reserved];
1036 }
else if (script_table_size_used >= script_table_size_reserved) {
1037 assert(script_table_size_used == script_table_size_reserved);
1038 script_table_size_reserved += script_table_size_reserved;
1039 char** new_script_table =
new char*[script_table_size_reserved];
1040 memcpy(new_script_table, script_table,
1041 script_table_size_used *
sizeof(
char*));
1042 delete[] script_table;
1043 script_table = new_script_table;
1045 script_table[script_table_size_used] =
new char[strlen(script) + 1];
1046 strcpy(script_table[script_table_size_used], script);
1047 return script_table_size_used++;
1054 if (total == 1)
return STRING(unichar);
1056 result += kSeparator;
1058 char buffer[kMaxLen];
1059 snprintf(buffer, kMaxLen,
"%c%d%c%d", kSeparator, pos,
1060 natural ? kNaturalFlag : kSeparator, total);
1066 const char *ptr = string;
1067 int len = strlen(
string);
1068 if (len < kMinLen || *ptr != kSeparator) {
1073 while ((ptr + step) < (
string + len) && *(ptr + step) != kSeparator) {
1074 step += UNICHAR::utf8_step(ptr + step);
1080 strncpy(unichar, ptr, step);
1081 unichar[
step] =
'\0';
1085 bool natural =
false;
1086 char *end_ptr = NULL;
1087 for (
int i = 0; i < 2; i++) {
1088 if (ptr >
string + len || *ptr != kSeparator) {
1089 if (i == 1 && *ptr == kNaturalFlag)
1095 i == 0 ? pos =
static_cast<int>(strtol(ptr, &end_ptr, 10))
1096 : total = static_cast<int>(strtol(ptr, &end_ptr, 10));
1099 if (ptr !=
string + len) {
1103 fragment->
set_all(unichar, pos, total, natural);
1108 for (
int i = 0; i < script_table_size_used; ++i) {
1109 if (strcmp(script_name, script_table[i]) == 0)
1120 result.reserve(length);
1122 while ((ch = *utf8_str) !=
'\0' && --length >= 0) {
1125 while ((key = kCleanupMaps[key_index][0]) !=
nullptr) {
1127 while (key[match] !=
'\0' && key[match] == utf8_str[match]) ++match;
1128 if (key[match] ==
'\0') {
1134 if (key ==
nullptr) {
1135 result.push_back(ch);
1138 result.append(kCleanupMaps[key_index][1]);
int get_script_id_from_name(const char *script_name) const
const double kMinCapHeightFraction
void set_ispunctuation(UNICHAR_ID unichar_id, bool value)
static TESS_API const char * kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]
const char * id_to_unichar_ext(UNICHAR_ID id) const
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, float bearing_sd)
void set_islower(UNICHAR_ID unichar_id, bool value)
static CHAR_FRAGMENT * parse_from_string(const char *str)
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
void set_normed(UNICHAR_ID unichar_id, const char *normed)
bool get_ispunctuation(UNICHAR_ID unichar_id) const
void set_direction(UNICHAR_ID unichar_id, UNICHARSET::Direction value)
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
char * FGets(char *buffer, int buffer_size)
bool AnyRepeatedUnicodes() const
void set_isdigit(UNICHAR_ID unichar_id, bool value)
UNICHAR_ID unichar_to_id(const char *const unichar_repr, int length) const
unsigned int get_properties(UNICHAR_ID unichar_id) const
bool get_isdigit(UNICHAR_ID unichar_id) const
void set_advance_stats(UNICHAR_ID unichar_id, float advance, float advance_sd)
void ExpandRangesFromOther(const UNICHARSET &src)
bool get_isalpha(UNICHAR_ID unichar_id) const
const char * get_script_from_script_id(int id) const
void AppendOtherUnicharset(const UNICHARSET &src)
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
LocalFilePointer(FILE *stream)
const double kMinXHeightFraction
bool get_isprivate(UNICHAR_ID unichar_id) const
void get_advance_stats(UNICHAR_ID unichar_id, float *advance, float *advance_sd) const
static string CleanupString(const char *utf8_str)
void set_isngram(UNICHAR_ID unichar_id, bool value)
int direction(EDGEPT *point)
char get_chartype(UNICHAR_ID unichar_id) const
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
void insert(const char *const unichar_repr, UNICHAR_ID id)
bool contains_unichar(const char *const unichar_repr) const
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
bool has_special_codes() const
static STRING to_string(const char *unichar, int pos, int total, bool natural)
bool load_from_inmemory_file(const char *const memory, int mem_size, bool skip_fragments)
void set_isupper(UNICHAR_ID unichar_id, bool value)
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Direction get_direction(UNICHAR_ID unichar_id) const
bool get_islower(UNICHAR_ID unichar_id) const
int add_script(const char *script)
char * fgets(char *dst, int size)
void set_all(const char *unichar, int pos, int total, bool natural)
void get_width_stats(UNICHAR_ID unichar_id, float *width, float *width_sd) const
bool encodable_string(const char *str, int *first_bad_position) const
void set_mirror(UNICHAR_ID unichar_id, UNICHAR_ID mirror)
void set_normed_ids(UNICHAR_ID unichar_id)
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
InMemoryFilePointer(const char *memory, int mem_size)
bool major_right_to_left() const
void unichar_insert_backwards_compatible(const char *const unichar_repr)
static STRING debug_utf8_str(const char *str)
bool get_isupper(UNICHAR_ID unichar_id) const
void CopyFrom(const UNICHARSET &src)
STRING debug_str(UNICHAR_ID id) const
bool SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const
bool save_to_string(STRING *str) const
const char * get_unichar() const
bool load_from_file(const char *const filename, bool skip_fragments)
static TESS_API const char * kCustomLigatures[][2]
void set_script(UNICHAR_ID unichar_id, const char *value)
void set_black_and_whitelist(const char *blacklist, const char *whitelist, const char *unblacklist)
bool contains(const char *const unichar_repr, int length) const
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
void set_isalpha(UNICHAR_ID unichar_id, bool value)
void set_other_case(UNICHAR_ID unichar_id, UNICHAR_ID other_case)
void reserve(int unichars_number)
int minmatch(const char *const unichar_repr) const
int step(const char *str) const
char * fgets(char *orig_dst, int size)
void PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src)
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const
int get_script(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd)