tesseract v5.3.3.20231005
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
 ~RecodeBeamSearch ()
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void DecodeSecondaryBeams (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void ExtractBestPathAsLabels (std::vector< int > *labels, std::vector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, std::vector< int > *unichar_ids, std::vector< float > *certs, std::vector< float > *ratings, std::vector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 
void extractSymbolChoices (const UNICHARSET *unicharset)
 
void PrintBeam2 (bool uids, int num_outputs, const UNICHARSET *charset, bool secondary) const
 
void segmentTimestepsByCharacters ()
 
std::vector< std::vector< std::pair< const char *, float > > > combineSegmentedTimesteps (std::vector< std::vector< std::vector< std::pair< const char *, float > > > > *segmentedTimesteps)
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
 
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
 
std::vector< std::unordered_set< int > > excludedUnichars
 
std::vector< int > character_boundaries_
 

Static Public Attributes

static constexpr float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 181 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 58 of file recodebeam.cpp.

60 : recoder_(recoder),
61 beam_size_(0),
62 top_code_(-1),
63 second_code_(-1),
64 dict_(dict),
65 space_delimited_(true),
66 is_simple_text_(simple_text),
67 null_char_(null_char) {
68 if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
69 space_delimited_ = false;
70 }
71}
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:912

◆ ~RecodeBeamSearch()

tesseract::RecodeBeamSearch::~RecodeBeamSearch ( )

Definition at line 73 of file recodebeam.cpp.

73 {
74 for (auto data : beam_) {
75 delete data;
76 }
77 for (auto data : secondary_beam_) {
78 delete data;
79 }
80}

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 260 of file recodebeam.h.

260 {
261 return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
262 }
static const int kNumLengths
Definition: recodebeam.h:245

◆ combineSegmentedTimesteps()

std::vector< std::vector< std::pair< const char *, float > > > tesseract::RecodeBeamSearch::combineSegmentedTimesteps ( std::vector< std::vector< std::vector< std::pair< const char *, float > > > > *  segmentedTimesteps)

Definition at line 175 of file recodebeam.cpp.

177 {
178 std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
179 for (auto &segmentedTimestep : *segmentedTimesteps) {
180 for (auto &j : segmentedTimestep) {
181 combined_timesteps.push_back(j);
182 }
183 }
184 return combined_timesteps;
185}
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
Definition: recodebeam.h:232

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 253 of file recodebeam.h.

253 {
254 return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
255 }
NodeContinuation
Definition: recodebeam.h:72

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 516 of file recodebeam.cpp.

516 {
517 for (int p = 0; p < beam_size_; ++p) {
518 for (int d = 0; d < 2; ++d) {
519 for (int c = 0; c < NC_COUNT; ++c) {
520 auto cont = static_cast<NodeContinuation>(c);
521 int index = BeamIndex(d, cont, 0);
522 if (beam_[p]->beams_[index].empty()) {
523 continue;
524 }
525 // Print all the best scoring nodes for each unichar found.
526 tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
527 kNodeContNames[c]);
528 DebugBeamPos(unicharset, beam_[p]->beams_[index]);
529 }
530 }
531 }
532}
const char * p
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:260

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 100 of file recodebeam.cpp.

103 {
104 beam_size_ = 0;
105 int width = output.dim1();
106 for (int t = 0; t < width; ++t) {
107 ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
108 DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
109 }
110}

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 83 of file recodebeam.cpp.

85 {
86 beam_size_ = 0;
87 int width = output.Width();
88 if (lstm_choice_mode) {
89 timesteps.clear();
90 }
91 for (int t = 0; t < width; ++t) {
92 ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
93 DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
94 charset);
95 if (lstm_choice_mode) {
96 SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
97 }
98 }
99}
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:231

◆ DecodeSecondaryBeams()

void tesseract::RecodeBeamSearch::DecodeSecondaryBeams ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 112 of file recodebeam.cpp.

114 {
115 for (auto data : secondary_beam_) {
116 delete data;
117 }
118 secondary_beam_.clear();
119 if (character_boundaries_.size() < 2) {
120 return;
121 }
122 int width = output.Width();
123 unsigned bucketNumber = 0;
124 for (int t = 0; t < width; ++t) {
125 while ((bucketNumber + 1) < character_boundaries_.size() &&
126 t >= character_boundaries_[bucketNumber + 1]) {
127 ++bucketNumber;
128 }
129 ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t),
130 output.NumFeatures(), kBeamWidths[0]);
131 DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset,
132 worst_dict_cert, charset);
133 }
134}
std::vector< int > character_boundaries_
Definition: recodebeam.h:238
std::vector< std::unordered_set< int > > excludedUnichars
Definition: recodebeam.h:236

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( std::vector< int > *  labels,
std::vector< int > *  xcoords 
) const

Definition at line 201 of file recodebeam.cpp.

202 {
203 labels->clear();
204 xcoords->clear();
205 std::vector<const RecodeNode *> best_nodes;
206 ExtractBestPaths(&best_nodes, nullptr);
207 // Now just run CTC on the best nodes.
208 int t = 0;
209 int width = best_nodes.size();
210 while (t < width) {
211 int label = best_nodes[t]->code;
212 if (label != null_char_) {
213 labels->push_back(label);
214 xcoords->push_back(t);
215 }
216 while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
217 }
218 }
219 xcoords->push_back(width);
220}

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
std::vector< int > *  unichar_ids,
std::vector< float > *  certs,
std::vector< float > *  ratings,
std::vector< int > *  xcoords 
) const

Definition at line 224 of file recodebeam.cpp.

227 {
228 std::vector<const RecodeNode *> best_nodes;
229 ExtractBestPaths(&best_nodes, nullptr);
230 ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
231 if (debug) {
232 DebugPath(unicharset, best_nodes);
233 DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
234 *xcoords);
235 }
236}

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 239 of file recodebeam.cpp.

243 {
244 words->truncate(0);
245 std::vector<int> unichar_ids;
246 std::vector<float> certs;
247 std::vector<float> ratings;
248 std::vector<int> xcoords;
249 std::vector<const RecodeNode *> best_nodes;
250 std::vector<const RecodeNode *> second_nodes;
251 character_boundaries_.clear();
252 ExtractBestPaths(&best_nodes, &second_nodes);
253 if (debug) {
254 DebugPath(unicharset, best_nodes);
255 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
256 &xcoords);
257 tprintf("\nSecond choice path:\n");
258 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
259 xcoords);
260 }
261 // If lstm choice mode is required in granularity level 2, it stores the x
262 // Coordinates of every chosen character, to match the alternative choices to
263 // it.
264 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords,
266 int num_ids = unichar_ids.size();
267 if (debug) {
268 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
269 xcoords);
270 }
271 // Convert labels to unichar-ids.
272 int word_end = 0;
273 float prev_space_cert = 0.0f;
274 for (int word_start = 0; word_start < num_ids; word_start = word_end) {
275 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
276 // A word is terminated when a space character or start_of_word flag is
277 // hit. We also want to force a separate word for every non
278 // space-delimited character when not in a dictionary context.
279 if (unichar_ids[word_end] == UNICHAR_SPACE) {
280 break;
281 }
282 int index = xcoords[word_end];
283 if (best_nodes[index]->start_of_word) {
284 break;
285 }
286 if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
287 (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
288 !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) {
289 break;
290 }
291 }
292 float space_cert = 0.0f;
293 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
294 space_cert = certs[word_end];
295 }
296 bool leading_space =
297 word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
298 // Create a WERD_RES for the output word.
299 WERD_RES *word_res =
300 InitializeWord(leading_space, line_box, word_start, word_end,
301 std::min(space_cert, prev_space_cert), unicharset,
302 xcoords, scale_factor);
303 for (int i = word_start; i < word_end; ++i) {
304 auto *choices = new BLOB_CHOICE_LIST;
305 BLOB_CHOICE_IT bc_it(choices);
306 auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1,
307 1.0f, static_cast<float>(INT16_MAX), 0.0f,
309 int col = i - word_start;
310 choice->set_matrix_cell(col, col);
311 bc_it.add_after_then_move(choice);
312 word_res->ratings->put(col, col, choices);
313 }
314 int index = xcoords[word_end - 1];
315 word_res->FakeWordFromRatings(best_nodes[index]->permuter);
316 words->push_back(word_res);
317 prev_space_cert = space_cert;
318 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
319 ++word_end;
320 }
321 }
322}
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ TOP_CHOICE_PERM
Definition: ratngs.h:238

◆ extractSymbolChoices()

void tesseract::RecodeBeamSearch::extractSymbolChoices ( const UNICHARSET unicharset)

Definition at line 409 of file recodebeam.cpp.

409 {
410 if (character_boundaries_.size() < 2) {
411 return;
412 }
413 // For the first iteration the original beam is analyzed. After that a
414 // new beam is calculated based on the results from the original beam.
415 std::vector<RecodeBeam *> &currentBeam =
416 secondary_beam_.empty() ? beam_ : secondary_beam_;
418 for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
419 std::vector<int> unichar_ids;
420 std::vector<float> certs;
421 std::vector<float> ratings;
422 std::vector<int> xcoords;
423 int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
424 std::vector<tesseract::RecodePair> &heaps =
425 currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
426 std::vector<const RecodeNode *> best_nodes;
427 std::vector<const RecodeNode *> best;
428 // Scan the segmented node chain for valid unichar ids.
429 for (auto &&entry : heaps) {
430 bool validChar = false;
431 int backcounter = 0;
432 const RecodeNode *node = &entry.data();
433 while (node != nullptr && backcounter < backpath) {
434 if (node->code != null_char_ &&
435 node->unichar_id != INVALID_UNICHAR_ID) {
436 validChar = true;
437 break;
438 }
439 node = node->prev;
440 ++backcounter;
441 }
442 if (validChar) {
443 best.push_back(&entry.data());
444 }
445 }
446 // find the best rated segmented node chain and extract the unichar id.
447 if (!best.empty()) {
448 std::sort(best.begin(), best.end(), greater_than());
449 ExtractPath(best[0], &best_nodes, backpath);
450 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
451 &xcoords);
452 }
453 if (!unichar_ids.empty()) {
454 int bestPos = 0;
455 for (unsigned i = 1; i < unichar_ids.size(); ++i) {
456 if (ratings[i] < ratings[bestPos]) {
457 bestPos = i;
458 }
459 }
460#if 0 // TODO: bestCode is currently unused (see commit 2dd5d0d60).
461 int bestCode = -10;
462 for (auto &node : best_nodes) {
463 if (node->unichar_id == unichar_ids[bestPos]) {
464 bestCode = node->code;
465 }
466 }
467#endif
468 // Exclude the best choice for the followup decoding.
469 std::unordered_set<int> excludeCodeList;
470 for (auto &best_node : best_nodes) {
471 if (best_node->code != null_char_) {
472 excludeCodeList.insert(best_node->code);
473 }
474 }
475 if (j - 1 < excludedUnichars.size()) {
476 for (auto elem : excludeCodeList) {
477 excludedUnichars[j - 1].insert(elem);
478 }
479 } else {
480 excludedUnichars.push_back(excludeCodeList);
481 }
482 // Save the best choice for the choice iterator.
483 if (j - 1 < ctc_choices.size()) {
484 int id = unichar_ids[bestPos];
485 const char *result = unicharset->id_to_unichar_ext(id);
486 float rating = ratings[bestPos];
487 ctc_choices[j - 1].push_back(
488 std::pair<const char *, float>(result, rating));
489 } else {
490 std::vector<std::pair<const char *, float>> choice;
491 int id = unichar_ids[bestPos];
492 const char *result = unicharset->id_to_unichar_ext(id);
493 float rating = ratings[bestPos];
494 choice.emplace_back(result, rating);
495 ctc_choices.push_back(choice);
496 }
497 // fill the blank spot with an empty array
498 } else {
499 if (j - 1 >= excludedUnichars.size()) {
500 std::unordered_set<int> excludeCodeList;
501 excludedUnichars.push_back(excludeCodeList);
502 }
503 if (j - 1 >= ctc_choices.size()) {
504 std::vector<std::pair<const char *, float>> choice;
505 ctc_choices.push_back(choice);
506 }
507 }
508 }
509 for (auto data : secondary_beam_) {
510 delete data;
511 }
512 secondary_beam_.clear();
513}
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
Definition: recodebeam.h:234

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 256 of file recodebeam.h.

256 {
257 return index / (kNumLengths * NC_COUNT) > 0;
258 }

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 250 of file recodebeam.h.

250 {
251 return index % kNumLengths;
252 }

◆ PrintBeam2()

void tesseract::RecodeBeamSearch::PrintBeam2 ( bool  uids,
int  num_outputs,
const UNICHARSET charset,
bool  secondary 
) const

Definition at line 330 of file recodebeam.cpp.

332 {
333 std::vector<std::vector<const RecodeNode *>> topology;
334 std::unordered_set<const RecodeNode *> visited;
335 const std::vector<RecodeBeam *> &beam = !secondary ? beam_ : secondary_beam_;
336 // create the topology
337 for (int step = beam.size() - 1; step >= 0; --step) {
338 std::vector<const RecodeNode *> layer;
339 topology.push_back(layer);
340 }
341 // fill the topology with depths first
342 for (int step = beam.size() - 1; step >= 0; --step) {
343 std::vector<tesseract::RecodePair> &heaps = beam.at(step)->beams_->heap();
344 for (auto &&node : heaps) {
345 int backtracker = 0;
346 const RecodeNode *curr = &node.data();
347 while (curr != nullptr && !visited.count(curr)) {
348 visited.insert(curr);
349 topology[step - backtracker].push_back(curr);
350 curr = curr->prev;
351 ++backtracker;
352 }
353 }
354 }
355 int ct = 0;
356 unsigned cb = 1;
357 for (const std::vector<const RecodeNode *> &layer : topology) {
358 if (cb >= character_boundaries_.size()) {
359 break;
360 }
361 if (ct == character_boundaries_[cb]) {
362 tprintf("***\n");
363 ++cb;
364 }
365 for (const RecodeNode *node : layer) {
366 const char *code;
367 int intCode;
368 if (node->unichar_id != INVALID_UNICHAR_ID) {
369 code = charset->id_to_unichar(node->unichar_id);
370 intCode = node->unichar_id;
371 } else if (node->code == null_char_) {
372 intCode = 0;
373 code = " ";
374 } else {
375 intCode = 666;
376 code = "*";
377 }
378 int intPrevCode = 0;
379 const char *prevCode;
380 float prevScore = 0;
381 if (node->prev != nullptr) {
382 prevScore = node->prev->score;
383 if (node->prev->unichar_id != INVALID_UNICHAR_ID) {
384 prevCode = charset->id_to_unichar(node->prev->unichar_id);
385 intPrevCode = node->prev->unichar_id;
386 } else if (node->code == null_char_) {
387 intPrevCode = 0;
388 prevCode = " ";
389 } else {
390 prevCode = "*";
391 intPrevCode = 666;
392 }
393 } else {
394 prevCode = " ";
395 }
396 if (uids) {
397 tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode,
398 node->score);
399 } else {
400 tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
401 }
402 }
403 tprintf("-\n");
404 ++ct;
405 }
406 tprintf("***\n");
407}

◆ segmentTimestepsByCharacters()

void tesseract::RecodeBeamSearch::segmentTimestepsByCharacters ( )

Definition at line 164 of file recodebeam.cpp.

164 {
165 for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
166 std::vector<std::vector<std::pair<const char *, float>>> segment;
167 for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i];
168 ++j) {
169 segment.push_back(timesteps[j]);
170 }
171 segmentedTimesteps.push_back(segment);
172 }
173}

Member Data Documentation

◆ character_boundaries_

std::vector<int> tesseract::RecodeBeamSearch::character_boundaries_

Definition at line 238 of file recodebeam.h.

◆ ctc_choices

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::ctc_choices

Definition at line 234 of file recodebeam.h.

◆ excludedUnichars

std::vector<std::unordered_set<int> > tesseract::RecodeBeamSearch::excludedUnichars

Definition at line 236 of file recodebeam.h.

◆ kMinCertainty

constexpr float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
staticconstexpr

Definition at line 243 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 248 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 245 of file recodebeam.h.

◆ segmentedTimesteps

std::vector<std::vector<std::vector<std::pair<const char *, float> > > > tesseract::RecodeBeamSearch::segmentedTimesteps

Definition at line 232 of file recodebeam.h.

◆ timesteps

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 231 of file recodebeam.h.


The documentation for this class was generated from the following files: