116 {
118 beam_search.Decode(
output, 3.5, -0.125, -25.0,
nullptr);
119
120
121 std::vector<int> labels, xcoords;
122 beam_search.ExtractBestPathAsLabels(&labels, &xcoords);
123 LOG(
INFO) <<
"Labels size = " << labels.size() <<
" coords " << xcoords.size() <<
"\n";
124
125 std::string decoded;
126 int end = 1;
127 for (unsigned start = 0; start < labels.size(); start = end) {
128 RecodedCharID code;
129 unsigned index = start;
130 int uni_id = INVALID_UNICHAR_ID;
131 do {
132 code.Set(code.length(), labels[index++]);
136 EXPECT_NE(INVALID_UNICHAR_ID, uni_id) <<
"index=" << index <<
"/" << labels.size();
137
138
139
142 }
143 end = index;
144 }
146
147
148 std::vector<int> unichar_ids;
149 std::vector<float> certainties, ratings;
150 beam_search.ExtractBestPathAsUnicharIds(
false, &
ccutil_.
unicharset, &unichar_ids, &certainties,
151 &ratings, &xcoords);
152 std::string u_decoded;
153 float total_rating = 0.0f;
154 for (unsigned u = 0; u < unichar_ids.size(); ++u) {
155
156
157
158 if (u_decoded.size() < truth_utf8.size()) {
160 total_rating += ratings[u];
161 LOG(
INFO) << u <<
":u_id=" << unichar_ids[u] <<
"=" << str <<
", c="
162 << certainties[u] << ", r=" << ratings[u] << "r_sum="
163 << total_rating << " @" << xcoords[u] << "\n";
164 if (str[0] == ' ') {
165 total_rating = 0.0f;
166 }
167 u_decoded += str;
168 }
169 }
171
172
173 TBOX line_box(0, 0, 100, 10);
174 for (
int i = 0;
i < 2; ++
i) {
175 beam_search.ExtractBestPathAsWords(line_box, 1.0f,
false, &
ccutil_.
unicharset, words);
176 std::string w_decoded;
177 for (int w = 0; w < words->size(); ++w) {
178 const WERD_RES *word = (*words)[w];
179 if (w_decoded.size() < truth_utf8.size()) {
180 if (!w_decoded.empty() && word->word->space()) {
181 w_decoded += " ";
182 }
183 w_decoded += word->best_choice->unichar_string().c_str();
184 }
185 LOG(
INFO) <<
"Word:" << w <<
" = " << word->best_choice->unichar_string()
186 << ", c=" << word->best_choice->certainty() << ", r=" << word->best_choice->rating()
187 << ", perm=" << word->best_choice->permuter() << "\n";
188 }
189 std::string w_trunc(w_decoded.data(), truth_utf8.size());
190 if (truth_utf8 != w_trunc) {
194 w_trunc.assign(w_decoded.data(), truth_utf8.size());
195 }
197 }
198 }
#define EXPECT_EQ(val1, val2)
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
bool IsValidFirstCode(int code) const
int DecodeUnichar(const RecodedCharID &code) const
const char * id_to_unichar(UNICHAR_ID id) const