25#ifndef DISABLED_LEGACY_ENGINE
37 if (tessedit_ambigs_training) {
38 tessedit_tess_adaption_mode.set_value(0);
39 tessedit_enable_doc_dict.set_value(
false);
41 getDict().stopper_no_acceptable_choices.set_value(
true);
44 std::string output_fname = filename;
45 const char *lastdot = strrchr(output_fname.c_str(),
'.');
46 if (lastdot !=
nullptr) {
47 output_fname[lastdot - output_fname.c_str()] =
'\0';
49 output_fname +=
".txt";
50 FILE *output_file = fopen(output_fname.c_str(),
"a+");
51 if (output_file ==
nullptr) {
52 tprintf(
"Error: Could not open file %s\n", output_fname.c_str());
60 while (page_res_it->
block() !=
nullptr && page_res_it->
word() ==
nullptr) {
64 if (page_res_it->
word() !=
nullptr) {
71 if (tbox->
left() < 0) {
72 tbox->
rotate(FCOORD(0.0, -1.0));
87 volatile ETEXT_DESC *monitor, FILE *output_file) {
88 std::string box_fname = filename;
89 const char *lastdot = strrchr(box_fname.c_str(),
'.');
90 if (lastdot !=
nullptr) {
91 box_fname[lastdot - box_fname.c_str()] =
'\0';
95 FILE *box_file = fopen(box_fname.c_str(),
"r");
96 if (box_file ==
nullptr) {
97 tprintf(
"Error: Could not open file %s\n", box_fname.c_str());
111 int examined_words = 0;
113 keep_going = read_t(&page_res_it, &tbox);
114 keep_going &=
ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
119 keep_going = read_t(&page_res_it, &tbox);
121 keep_going =
ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
127 keep_going = read_t(&page_res_it, &tbox);
129 keep_going =
ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
139 }
while (keep_going);
147 if (page_res_it.
word()) {
154 if (examined_words < 0.85 * total_words) {
156 "TODO(antonova): clean up recog_training_segmented; "
157 " It examined only a small fraction of the ambigs image.\n");
159 tprintf(
"recog_training_segmented: examined %d / %d words.\n", examined_words, total_words);
164 const char *label, FILE *output_file) {
166 float certainty = 0.0f;
167 for (
int i = 0;
i < length; ++
i) {
170 rating += blob_choice->
rating();
171 if (certainty > blob_choice->
certainty()) {
175 fprintf(output_file,
"\t%s\t%.4f\t%.4f\n", label, rating, certainty);
180static void PrintMatrixPaths(
int col,
int dim,
const MATRIX &ratings,
int length,
181 const BLOB_CHOICE **blob_choices,
const UNICHARSET &unicharset,
182 const char *label, FILE *output_file) {
183 for (
int row = col; row < dim && row - col < ratings.bandwidth(); ++row) {
185 BLOB_CHOICE_IT bc_it(ratings.get(col, row));
186 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
187 blob_choices[length] = bc_it.data();
189 PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices, unicharset, label,
192 PrintPath(length + 1, blob_choices, unicharset, label, output_file);
215 std::vector<UNICHAR_ID> encoding;
217 tprintf(
"Not outputting illegal unichar %s\n", label);
223 const auto **blob_choices =
new const BLOB_CHOICE *[dim];
224 PrintMatrixPaths(0, dim, *werd_res->
ratings, 0, blob_choices,
unicharset, label, output_file);
225 delete[] blob_choices;
void tprintf(const char *format,...)
const int16_t kMaxBoxEdgeDiff
bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box)
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)
void SetupWordPassN(int pass_n, WordData *word)
Dict & getDict() override
void recog_training_segmented(const char *filename, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
FILE * init_recog_training(const char *filename)
WERD_CHOICE * best_choice
const UNICHARSET * uch_set
void SetupFake(const UNICHARSET &uch)
BLOCK_RES * block() const
WERD_RES * restart_page()
UNICHAR_ID unichar_id() const
void rotate(const FCOORD &vec)
TDimension bottom() const
TBOX bounding_box() const
bool encode_string(const char *str, bool give_up_on_failure, std::vector< UNICHAR_ID > *encoding, std::vector< char > *lengths, unsigned *encoded_length) const
const char * id_to_unichar(UNICHAR_ID id) const