110 {
112 if (radical_stroke_table != nullptr && !DecodeRadicalTable(*radical_stroke_table, &radical_map)) {
113 return false;
114 }
115 encoder_.clear();
116 UNICHARSET direct_set;
117
118 direct_set.clear();
119
121
122 if (null_id >= 0) {
123 direct_set.unichar_insert(kNullChar);
124 }
126
127
128 int hangul_offset = unicharset.size();
129
131
132
133
134 int han_offset = hangul_offset + kTotalJamos;
135 for (unsigned u = 0; u <= unicharset.size(); ++u) {
136
137
138 if (u == unicharset.size() && static_cast<int>(u) != null_id) {
139 break;
140 }
141 RecodedCharID code;
142
143 std::vector<char32> unicodes;
144 std::string cleaned;
145 if (u < unicharset.size()) {
147 }
149
150 int unicode = unicodes[0];
151 int leading, vowel, trailing;
152 auto it = radical_map.find(unicode);
153 if (it != radical_map.end()) {
154
155 int num_radicals = it->second->size();
156 for (int c = 0; c < num_radicals; ++c) {
157 code.Set(c, han_offset + (*it->second)[c]);
158 }
159 int pre_hash = RadicalPreHash(*it->second);
160 int num_samples = radical_counts[pre_hash]++;
161 if (num_samples > 0) {
162 code.Set(num_radicals, han_offset + num_samples +
kRadicalRadix);
163 }
165
166
167 code.Set3(leading + hangul_offset, vowel +
kLCount + hangul_offset,
169 }
170 }
171
172 if (code.empty()) {
173
175 code.Set(0, 0);
176 } else if (static_cast<int>(u) == null_id ||
178 code.Set(0, direct_set.unichar_to_id(kNullChar));
179 } else {
180
181
182 for (int uni : unicodes) {
183 int position = code.length();
185 tprintf(
"Unichar %d=%s is too long to encode!!\n", u, unicharset.id_to_unichar(u));
186 return false;
187 }
188 UNICHAR unichar(uni);
189 char *utf8 = unichar.utf8_str();
190 if (!direct_set.contains_unichar(utf8)) {
191 direct_set.unichar_insert(utf8);
192 }
193 code.Set(position, direct_set.unichar_to_id(utf8));
194 delete[] utf8;
195 if (direct_set.size() > unicharset.size() + !unicharset.has_special_codes()) {
196
197 tprintf(
"Code space expanded from original unicharset!!\n");
198 return false;
199 }
200 }
201 }
202 }
203 encoder_.push_back(code);
204 }
205
206
207 int code_offset = 0;
209 int max_offset = 0;
210 for (unsigned u = 0; u < unicharset.size(); ++u) {
211 RecodedCharID *code = &encoder_[u];
212 if (code->length() <=
i) {
213 continue;
214 }
215 max_offset = std::max(max_offset, (*code)(
i)-han_offset);
216 code->Set(
i, (*code)(
i) + code_offset);
217 }
218 if (max_offset == 0) {
219 break;
220 }
221 code_offset += max_offset + 1;
222 }
223 DefragmentCodeValues(null_id >= 0 ? 1 : -1);
224 SetupDecoder();
225 return true;
226}
std::unordered_map< int, std::unique_ptr< std::vector< int > > > RSMap
void tprintf(const char *format,...)
std::unordered_map< int, int > RSCounts
@ SPECIAL_UNICHAR_CODES_COUNT
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
static const int kMaxCodeLen
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
static std::string CleanupString(const char *utf8_str)