105 if (radical_stroke_table !=
nullptr &&
106 !DecodeRadicalTable(radical_stroke_table, &radical_map))
121 int hangul_offset = unicharset.
size();
127 int han_offset = hangul_offset + kTotalJamos;
128 int max_num_strokes = -1;
129 for (
int u = 0; u <= unicharset.
size(); ++u) {
132 if (u == unicharset.
size() && u != null_id)
break;
135 std::vector<char32> unicodes;
137 if (u < unicharset.
size())
139 if (u < unicharset.
size() &&
142 int unicode = unicodes[0];
143 int leading, vowel, trailing;
144 auto it = radical_map.find(unicode);
145 if (it != radical_map.end()) {
147 int num_radicals = it->second->size();
148 for (
int c = 0; c < num_radicals; ++c) {
149 code.Set(c, han_offset + (*it->second)[c]);
151 int pre_hash = RadicalPreHash(*it->second);
152 int num_samples = radical_counts[pre_hash]++;
154 code.Set(num_radicals, han_offset + num_samples +
kRadicalRadix);
158 code.Set3(leading + hangul_offset, vowel +
kLCount + hangul_offset,
163 if (code.length() == 0) {
173 for (
int i = 0; i < unicodes.size(); ++i) {
174 int position = code.length();
176 tprintf(
"Unichar %d=%s is too long to encode!!\n", u,
180 int uni = unicodes[i];
181 UNICHAR unichar(uni);
182 char* utf8 = unichar.utf8_str();
187 if (direct_set.
size() >
190 tprintf(
"Code space expanded from original unicharset!!\n");
196 encoder_.push_back(code);
206 for (
int u = 0; u < unicharset.
size(); ++u) {
207 RecodedCharID* code = &encoder_[u];
208 if (code->length() <= i)
continue;
209 max_offset = std::max(max_offset, (*code)(i)-han_offset);
210 code->Set(i, (*code)(i) + code_offset);
212 if (max_offset == 0)
break;
213 code_offset += max_offset + 1;
215 DefragmentCodeValues(null_id >= 0 ? 1 : -1);
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
std::unordered_map< int, std::unique_ptr< std::vector< int > > > RSMap
static const int kMaxCodeLen
static string CleanupString(const char *utf8_str)
bool contains_unichar(const char *const unichar_repr) const
bool has_special_codes() const
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
std::unordered_map< int, int > RSCounts
const char * id_to_unichar(UNICHAR_ID id) const