tesseract v5.3.3.20231005
tesseract::ErrorCounter Class Reference

#include <errorcounter.h>

Static Public Member Functions

static double ComputeErrorRate (ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, std::string *fonts_report)
 
static void DebugNewErrors (ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const std::vector< Image > &page_images, SampleIterator *it)
 

Detailed Description

Definition at line 94 of file errorcounter.h.

Member Function Documentation

◆ ComputeErrorRate()

double tesseract::ErrorCounter::ComputeErrorRate ( ShapeClassifier classifier,
int  report_level,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const std::vector< Image > &  page_images,
SampleIterator it,
double *  unichar_error,
double *  scaled_error,
std::string *  fonts_report 
)
static

Definition at line 44 of file errorcounter.cpp.

48 {
49 const int fontsize = it->sample_set()->NumFonts();
50 ErrorCounter counter(classifier->GetUnicharset(), fontsize);
51 std::vector<UnicharRating> results;
52
53 clock_t start = clock();
54 unsigned total_samples = 0;
55 double unscaled_error = 0.0;
56 // Set a number of samples on which to run the classify debug mode.
57 int error_samples = report_level > 3 ? report_level * report_level : 0;
58 // Iterate over all the samples, accumulating errors.
59 for (it->Begin(); !it->AtEnd(); it->Next()) {
60 TrainingSample *mutable_sample = it->MutableSample();
61 int page_index = mutable_sample->page_num();
62 Image page_pix =
63 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
64 // No debug, no keep this.
65 classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
66 bool debug_it = false;
67 int correct_id = mutable_sample->class_id();
68 if (counter.unicharset_.has_special_codes() &&
69 (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
70 correct_id == UNICHAR_BROKEN)) {
71 // This is junk so use the special counter.
72 debug_it = counter.AccumulateJunk(report_level > 3, results, mutable_sample);
73 } else {
74 debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, results,
75 mutable_sample);
76 }
77 if (debug_it && error_samples > 0) {
78 // Running debug, keep the correct answer, and debug the classifier.
79 tprintf("Error on sample %d: %s Classifier debug output:\n", it->GlobalSampleIndex(),
80 it->sample_set()->SampleToString(*mutable_sample).c_str());
81#ifndef GRAPHICS_DISABLED
82 classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
83#endif
84 --error_samples;
85 }
86 ++total_samples;
87 }
88 const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
89 // Create the appropriate error report.
90 unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
91 unichar_error, fonts_report);
92 if (scaled_error != nullptr) {
93 *scaled_error = counter.scaled_error_;
94 }
95 if (report_level > 1 && total_samples > 0) {
96 // It is useful to know the time in microseconds/char.
97 tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,
98 1000000.0 * total_time / total_samples);
99 }
100 return unscaled_error;
101}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ UNICHAR_BROKEN
Definition: unicharset.h:38
@ UNICHAR_JOINED
Definition: unicharset.h:37

◆ DebugNewErrors()

void tesseract::ErrorCounter::DebugNewErrors ( ShapeClassifier new_classifier,
ShapeClassifier old_classifier,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const std::vector< Image > &  page_images,
SampleIterator it 
)
static

Definition at line 109 of file errorcounter.cpp.

111 {
112 int fontsize = it->sample_set()->NumFonts();
113 ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
114 ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
115 std::vector<UnicharRating> results;
116
117 int total_samples = 0;
118 int error_samples = 25;
119 int total_new_errors = 0;
120 // Iterate over all the samples, accumulating errors.
121 for (it->Begin(); !it->AtEnd(); it->Next()) {
122 TrainingSample *mutable_sample = it->MutableSample();
123 int page_index = mutable_sample->page_num();
124 Image page_pix =
125 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
126 // No debug, no keep this.
127 old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
128 &results);
129 int correct_id = mutable_sample->class_id();
130 if (correct_id != 0 && !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
131 results, mutable_sample)) {
132 // old classifier was correct, check the new one.
133 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
134 &results);
135 if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
136 results, mutable_sample)) {
137 tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex());
138 ++total_new_errors;
139 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, correct_id, &results);
140 if (results.size() > 0 && error_samples > 0) {
141#ifndef GRAPHICS_DISABLED
142 new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
143#endif
144 --error_samples;
145 }
146 }
147 }
148 ++total_samples;
149 }
150 tprintf("Total new errors = %d\n", total_new_errors);
151}

The documentation for this class was generated from the following files: