31 std::vector<std::string> filenames;
33 tprintf(
"Failed to load list of eval filenames from %s\n", filenames_file);
54 if (total_pages_ == 0) {
55 result +=
"No test data at iteration " + std::to_string(iteration);
58 if (!LockIfNotRunning()) {
59 result +=
"Previous test incomplete, skipping test at iteration " + std::to_string(iteration);
63 std::string prev_result = test_result_;
65 if (training_errors !=
nullptr) {
66 test_iteration_ = iteration;
67 test_training_errors_ = training_errors;
68 test_model_mgr_ = model_mgr;
69 test_training_stage_ = training_stage;
70 std::thread t(&LSTMTester::ThreadFunc,
this);
87 return "Deserialize failed";
89 int eval_iteration = 0;
90 double char_error = 0.0;
91 double word_error = 0.0;
93 while (error_count < total_pages_) {
104 std::vector<int> ocr_labels;
105 std::vector<int> xcoords;
107 std::string ocr_text = trainer.
DecodeLabels(ocr_labels);
108 tprintf(
"OCR :%s\n", ocr_text.c_str());
110 tprintf(
"Line BCER=%f, BWER=%f\n\n",
117 char_error *= 100.0 / total_pages_;
118 word_error *= 100.0 / total_pages_;
119 std::stringstream result;
120 result.imbue(std::locale::classic());
121 result << std::fixed << std::setprecision(3);
122 if (iteration != 0 || training_stage != 0) {
123 result <<
"At iteration " << iteration
124 <<
", stage " << training_stage <<
", ";
126 result <<
"BCER eval=" << char_error <<
", BWER eval=" << word_error;
133void LSTMTester::ThreadFunc() {
135 RunEvalSync(test_iteration_, test_training_errors_, test_model_mgr_, test_training_stage_,
142bool LSTMTester::LockIfNotRunning() {
143 std::lock_guard<std::mutex> lock(running_mutex_);
144 if (async_running_) {
147 async_running_ =
true;
152void LSTMTester::UnlockRunning() {
153 std::lock_guard<std::mutex> lock(running_mutex_);
154 async_running_ =
false;
void tprintf(const char *format,...)
bool LoadFileLinesToStrings(const char *filename, std::vector< std::string > *lines)
const std::string & transcription() const
const ImageData * GetPageBySerial(int serial)
TESS_API bool LoadDocuments(const std::vector< std::string > &filenames, CachingStrategy cache_strategy, FileReader reader)
TESS_API int TotalPages()
bool GetComponent(TessdataType type, TFile *fp)
std::string DecodeLabels(const std::vector< int > &labels)
void LabelsFromOutputs(const NetworkIO &outputs, std::vector< int > *labels, std::vector< int > *xcoords)
void SetIteration(int iteration)
std::string RunEvalAsync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
std::string RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
LSTMTester(int64_t max_memory)
bool LoadAllEvalData(const char *filenames_file)
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, NetworkIO *targets)
bool InitCharSet(const std::string &traineddata_path)
double NewSingleError(ErrorTypes type) const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)