tesseract-ocr.github.io/5.3.3/a00953_source.html

// File:        lstmtrainer.cpp

// Description: Top-level line trainer class for LSTM-based networks.

// Author:      Ray Smith

//

// (C) Copyright 2013, Google Inc.

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.


#define _USE_MATH_DEFINES // needed to get definition of M_SQRT1_2


// Include automatically generated configuration file if running autoconf.

#ifdef HAVE_CONFIG_H

#  include "config_auto.h"

#endif


#include <cmath>

#include <iomanip>             // for std::setprecision

#include <locale>              // for std::locale::classic

#include <string>

#include "lstmtrainer.h"


#include <allheaders.h>

#include "boxread.h"

#include "ctc.h"

#include "imagedata.h"

#include "input.h"

#include "networkbuilder.h"

#include "ratngs.h"

#include "recodebeam.h"

#ifdef INCLUDE_TENSORFLOW

#  include "tfnetwork.h"

#endif

#include "tprintf.h"


namespace tesseract {


// Min actual error rate increase to constitute divergence.

const double kMinDivergenceRate = 50.0;

// Min iterations since last best before acting on a stall.

const int kMinStallIterations = 10000;

// Fraction of current char error rate that sub_trainer_ has to be ahead

// before we declare the sub_trainer_ a success and switch to it.

const double kSubTrainerMarginFraction = 3.0 / 128;

// Factor to reduce learning rate on divergence.

const double kLearningRateDecay = M_SQRT1_2;

// LR adjustment iterations.

const int kNumAdjustmentIterations = 100;

// How often to add data to the error_graph_.

const int kErrorGraphInterval = 1000;

// Number of training images to train between calls to MaintainCheckpoints.

const int kNumPagesPerBatch = 100;

// Min percent error rate to consider start-up phase over.

const int kMinStartedErrorRate = 75;

// Error rate at which to transition to stage 1.

const double kStageTransitionThreshold = 10.0;

// Confidence beyond which the truth is more likely wrong than the recognizer.

const double kHighConfidence = 0.9375; // 15/16.

// Fraction of weight sign-changing total to constitute a definite improvement.

const double kImprovementFraction = 15.0 / 16.0;

// Fraction of last written best to make it worth writing another.

const double kBestCheckpointFraction = 31.0 / 32.0;

#ifndef GRAPHICS_DISABLED

// Scale factor for display of target activations of CTC.

const int kTargetXScale = 5;

const int kTargetYScale = 100;

#endif // !GRAPHICS_DISABLED


LSTMTrainer::LSTMTrainer()

    : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {

  EmptyConstructor();

  debug_interval_ = 0;

}


LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name,

                         int debug_interval, int64_t max_memory)

    : randomly_rotate_(false),

      training_data_(max_memory),

      sub_trainer_(nullptr) {

  EmptyConstructor();

  debug_interval_ = debug_interval;

  model_base_ = model_base;

  checkpoint_name_ = checkpoint_name;

}


LSTMTrainer::~LSTMTrainer() {

#ifndef GRAPHICS_DISABLED

  delete align_win_;

  delete target_win_;

  delete ctc_win_;

  delete recon_win_;

#endif

}


// Tries to deserialize a trainer from the given file and silently returns

// false in case of failure.

bool LSTMTrainer::TryLoadingCheckpoint(const char *filename,

                                       const char *old_traineddata) {

  std::vector<char> data;

  if (!LoadDataFromFile(filename, &data)) {

    return false;

  }

  tprintf("Loaded file %s, unpacking...\n", filename);

  if (!ReadTrainingDump(data, *this)) {

    return false;

  }

  if (IsIntMode()) {

    tprintf("Error, %s is an integer (fast) model, cannot continue training\n",

            filename);

    return false;

  }

  if (((old_traineddata == nullptr || *old_traineddata == '\0') &&

       network_->NumOutputs() == recoder_.code_range()) ||

      filename == old_traineddata) {

    return true; // Normal checkpoint load complete.

  }

  tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),

          recoder_.code_range());

  if (old_traineddata == nullptr || *old_traineddata == '\0') {

    tprintf("Must supply the old traineddata for code conversion!\n");

    return false;

  }

  TessdataManager old_mgr;

  ASSERT_HOST(old_mgr.Init(old_traineddata));

  TFile fp;

  if (!old_mgr.GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) {

    return false;

  }

  UNICHARSET old_chset;

  if (!old_chset.load_from_file(&fp, false)) {

    return false;

  }

  if (!old_mgr.GetComponent(TESSDATA_LSTM_RECODER, &fp)) {

    return false;

  }

  UnicharCompress old_recoder;

  if (!old_recoder.DeSerialize(&fp)) {

    return false;

  }

  std::vector<int> code_map = MapRecoder(old_chset, old_recoder);

  // Set the null_char_ to the new value.

  int old_null_char = null_char_;

  SetNullChar();

  // Map the softmax(s) in the network.

  network_->RemapOutputs(old_recoder.code_range(), code_map);

  tprintf("Previous null char=%d mapped to %d\n", old_null_char, null_char_);

  return true;

}


// Initializes the trainer with a network_spec in the network description

// net_flags control network behavior according to the NetworkFlags enum.

// There isn't really much difference between them - only where the effects

// are implemented.

// For other args see NetworkBuilder::InitNetwork.

// Note: Be sure to call InitCharSet before InitNetwork!

bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index,

                              int net_flags, float weight_range,

                              float learning_rate, float momentum,

                              float adam_beta) {

  mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec);

  adam_beta_ = adam_beta;

  learning_rate_ = learning_rate;

  momentum_ = momentum;

  SetNullChar();

  if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec,

                                   append_index, net_flags, weight_range,

                                   &randomizer_, &network_)) {

    return false;

  }

  network_str_ += network_spec;

  tprintf("Built network:%s from request %s\n", network_->spec().c_str(),

          network_spec);

  tprintf(

      "Training parameters:\n  Debug interval = %d,"

      " weights = %g, learning rate = %g, momentum=%g\n",

      debug_interval_, weight_range, learning_rate_, momentum_);

  tprintf("null char=%d\n", null_char_);

  return true;

}


// Initializes a trainer from a serialized TFNetworkModel proto.

// Returns the global step of TensorFlow graph or 0 if failed.

#ifdef INCLUDE_TENSORFLOW

int LSTMTrainer::InitTensorFlowNetwork(const std::string &tf_proto) {

  delete network_;

  TFNetwork *tf_net = new TFNetwork("TensorFlow");

  training_iteration_ = tf_net->InitFromProtoStr(tf_proto);

  if (training_iteration_ == 0) {

    tprintf("InitFromProtoStr failed!!\n");

    return 0;

  }

  network_ = tf_net;

  ASSERT_HOST(recoder_.code_range() == tf_net->num_classes());

  return training_iteration_;

}

#endif


// Resets all the iteration counters for fine tuning or traininng a head,

// where we want the error reporting to reset.

void LSTMTrainer::InitIterations() {

  sample_iteration_ = 0;

  training_iteration_ = 0;

  learning_iteration_ = 0;

  prev_sample_iteration_ = 0;

  best_error_rate_ = 100.0;

  best_iteration_ = 0;

  worst_error_rate_ = 0.0;

  worst_iteration_ = 0;

  stall_iteration_ = kMinStallIterations;

  best_error_history_.clear();

  best_error_iterations_.clear();

  improvement_steps_ = kMinStallIterations;

  perfect_delay_ = 0;

  last_perfect_training_iteration_ = 0;

  for (int i = 0; i < ET_COUNT; ++i) {

    best_error_rates_[i] = 100.0;

    worst_error_rates_[i] = 0.0;

    error_buffers_[i].clear();

    error_buffers_[i].resize(kRollingBufferSize_);

    error_rates_[i] = 100.0;

  }

  error_rate_of_last_saved_best_ = kMinStartedErrorRate;

}


// If the training sample is usable, grid searches for the optimal

// dict_ratio/cert_offset, and returns the results in a string of space-

// separated triplets of ratio,offset=worderr.

Trainability LSTMTrainer::GridSearchDictParams(

    const ImageData *trainingdata, int iteration, double min_dict_ratio,

    double dict_ratio_step, double max_dict_ratio, double min_cert_offset,

    double cert_offset_step, double max_cert_offset, std::string &results) {

  sample_iteration_ = iteration;

  NetworkIO fwd_outputs, targets;

  Trainability result =

      PrepareForBackward(trainingdata, &fwd_outputs, &targets);

  if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) {

    return result;

  }


  // Encode/decode the truth to get the normalization.

  std::vector<int> truth_labels, ocr_labels, xcoords;

  ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));

  // NO-dict error.

  RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(),

                               nullptr);

  base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,

                     nullptr);

  base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);

  std::string truth_text = DecodeLabels(truth_labels);

  std::string ocr_text = DecodeLabels(ocr_labels);

  double baseline_error = ComputeWordError(&truth_text, &ocr_text);

  results += "0,0=" + std::to_string(baseline_error);


  RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);

  for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {

    for (double c = min_cert_offset; c < max_cert_offset;

         c += cert_offset_step) {

      search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty,

                    nullptr);

      search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);

      truth_text = DecodeLabels(truth_labels);

      ocr_text = DecodeLabels(ocr_labels);

      // This is destructive on both strings.

      double word_error = ComputeWordError(&truth_text, &ocr_text);

      if ((r == min_dict_ratio && c == min_cert_offset) ||

          !std::isfinite(word_error)) {

        std::string t = DecodeLabels(truth_labels);

        std::string o = DecodeLabels(ocr_labels);

        tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,

                t.c_str(), o.c_str(), word_error, truth_labels[0]);

      }

      results += " " + std::to_string(r);

      results += "," + std::to_string(c);

      results += "=" + std::to_string(word_error);

    }

  }

  return result;

}


// Provides output on the distribution of weight values.

void LSTMTrainer::DebugNetwork() {

  network_->DebugWeights();

}


// Loads a set of lstmf files that were created using the lstm.train config to

// tesseract into memory ready for training. Returns false if nothing was

// loaded.

bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,

                                      CachingStrategy cache_strategy,

                                      bool randomly_rotate) {

  randomly_rotate_ = randomly_rotate;

  training_data_.Clear();

  return training_data_.LoadDocuments(filenames, cache_strategy,

                                      LoadDataFromFile);

}


// Keeps track of best and locally worst char error_rate and launches tests

// using tester, when a new min or max is reached.

// Writes checkpoints at appropriate times and builds and returns a log message

// to indicate progress. Returns false if nothing interesting happened.

bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,

                                      std::stringstream &log_msg) {

  PrepareLogMsg(log_msg);

  double error_rate = CharError();

  int iteration = learning_iteration();

  if (iteration >= stall_iteration_ &&

      error_rate > best_error_rate_ * (1.0 + kSubTrainerMarginFraction) &&

      best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {

    // It hasn't got any better in a long while, and is a margin worse than the

    // best, so go back to the best model and try a different learning rate.

    StartSubtrainer(log_msg);

  }

  SubTrainerResult sub_trainer_result = STR_NONE;

  if (sub_trainer_ != nullptr) {

    sub_trainer_result = UpdateSubtrainer(log_msg);

    if (sub_trainer_result == STR_REPLACED) {

      // Reset the inputs, as we have overwritten *this.

      error_rate = CharError();

      iteration = learning_iteration();

      PrepareLogMsg(log_msg);

    }

  }

  bool result = true; // Something interesting happened.

  std::vector<char> rec_model_data;

  if (error_rate < best_error_rate_) {

    SaveRecognitionDump(&rec_model_data);

    log_msg << " New best BCER = " << error_rate;

    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);

    // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it

    // just overwrote *this. In either case, we have finished with it.

    sub_trainer_.reset();

    stall_iteration_ = learning_iteration() + kMinStallIterations;

    if (TransitionTrainingStage(kStageTransitionThreshold)) {

      log_msg << " Transitioned to stage " << CurrentTrainingStage();

    }

    SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);

    if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {

      std::string best_model_name = DumpFilename();

      if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {

        log_msg << " failed to write best model:";

      } else {

        log_msg << " wrote best model:";

        error_rate_of_last_saved_best_ = best_error_rate_;

      }

      log_msg << best_model_name;

    }

  } else if (error_rate > worst_error_rate_) {

    SaveRecognitionDump(&rec_model_data);

    log_msg << " New worst BCER = " << error_rate;

    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);

    if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&

        best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {

      // Error rate has ballooned. Go back to the best model.

      log_msg << "\nDivergence! ";

      // Copy best_trainer_ before reading it, as it will get overwritten.

      std::vector<char> revert_data(best_trainer_);

      if (ReadTrainingDump(revert_data, *this)) {

        LogIterations("Reverted to", log_msg);

        ReduceLearningRates(this, log_msg);

      } else {

        LogIterations("Failed to Revert at", log_msg);

      }

      // If it fails again, we will wait twice as long before reverting again.

      stall_iteration_ = iteration + 2 * (iteration - learning_iteration());

      // Re-save the best trainer with the new learning rates and stall

      // iteration.

      SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);

    }

  } else {

    // Something interesting happened only if the sub_trainer_ was trained.

    result = sub_trainer_result != STR_NONE;

  }

  if (checkpoint_name_.length() > 0) {

    // Write a current checkpoint.

    std::vector<char> checkpoint;

    if (!SaveTrainingDump(FULL, *this, &checkpoint) ||

        !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {

      log_msg << " failed to write checkpoint.";

    } else {

      log_msg << " wrote checkpoint.";

    }

  }

  return result;

}


// Builds a string containing a progress message with current error rates.

void LSTMTrainer::PrepareLogMsg(std::stringstream &log_msg) const {

  LogIterations("At", log_msg);

  log_msg << std::fixed << std::setprecision(3)

          << ", mean rms=" << error_rates_[ET_RMS]

          << "%, delta=" << error_rates_[ET_DELTA]

          << "%, BCER train=" << error_rates_[ET_CHAR_ERROR]

          << "%, BWER train=" << error_rates_[ET_WORD_RECERR]

          << "%, skip ratio=" << error_rates_[ET_SKIP_RATIO] << "%,";

}


// Appends <intro_str> iteration learning_iteration()/training_iteration()/

// sample_iteration() to the log_msg.

void LSTMTrainer::LogIterations(const char *intro_str,

                                std::stringstream &log_msg) const {

  log_msg << intro_str

          << " iteration " << learning_iteration()

          << "/" << training_iteration()

          << "/" << sample_iteration();

}


// Returns true and increments the training_stage_ if the error rate has just

// passed through the given threshold for the first time.

bool LSTMTrainer::TransitionTrainingStage(float error_threshold) {

  if (best_error_rate_ < error_threshold &&

      training_stage_ + 1 < num_training_stages_) {

    ++training_stage_;

    return true;

  }

  return false;

}


// Writes to the given file. Returns false in case of error.

bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,

                            const TessdataManager *mgr, TFile *fp) const {

  if (!LSTMRecognizer::Serialize(mgr, fp)) {

    return false;

  }

  if (!fp->Serialize(&learning_iteration_)) {

    return false;

  }

  if (!fp->Serialize(&prev_sample_iteration_)) {

    return false;

  }

  if (!fp->Serialize(&perfect_delay_)) {

    return false;

  }

  if (!fp->Serialize(&last_perfect_training_iteration_)) {

    return false;

  }

  for (const auto &error_buffer : error_buffers_) {

    if (!fp->Serialize(error_buffer)) {

      return false;

    }

  }

  if (!fp->Serialize(&error_rates_[0], countof(error_rates_))) {

    return false;

  }

  if (!fp->Serialize(&training_stage_)) {

    return false;

  }

  uint8_t amount = serialize_amount;

  if (!fp->Serialize(&amount)) {

    return false;

  }

  if (serialize_amount == LIGHT) {

    return true; // We are done.

  }

  if (!fp->Serialize(&best_error_rate_)) {

    return false;

  }

  if (!fp->Serialize(&best_error_rates_[0], countof(best_error_rates_))) {

    return false;

  }

  if (!fp->Serialize(&best_iteration_)) {

    return false;

  }

  if (!fp->Serialize(&worst_error_rate_)) {

    return false;

  }

  if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) {

    return false;

  }

  if (!fp->Serialize(&worst_iteration_)) {

    return false;

  }

  if (!fp->Serialize(&stall_iteration_)) {

    return false;

  }

  if (!fp->Serialize(best_model_data_)) {

    return false;

  }

  if (!fp->Serialize(worst_model_data_)) {

    return false;

  }

  if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_)) {

    return false;

  }

  std::vector<char> sub_data;

  if (sub_trainer_ != nullptr &&

      !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {

    return false;

  }

  if (!fp->Serialize(sub_data)) {

    return false;

  }

  if (!fp->Serialize(best_error_history_)) {

    return false;

  }

  if (!fp->Serialize(best_error_iterations_)) {

    return false;

  }

  return fp->Serialize(&improvement_steps_);

}


// Reads from the given file. Returns false in case of error.

// NOTE: It is assumed that the trainer is never read cross-endian.

bool LSTMTrainer::DeSerialize(const TessdataManager *mgr, TFile *fp) {

  if (!LSTMRecognizer::DeSerialize(mgr, fp)) {

    return false;

  }

  if (!fp->DeSerialize(&learning_iteration_)) {

    // Special case. If we successfully decoded the recognizer, but fail here

    // then it means we were just given a recognizer, so issue a warning and

    // allow it.

    tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");

    learning_iteration_ = 0;

    network_->SetEnableTraining(TS_ENABLED);

    return true;

  }

  if (!fp->DeSerialize(&prev_sample_iteration_)) {

    return false;

  }

  if (!fp->DeSerialize(&perfect_delay_)) {

    return false;

  }

  if (!fp->DeSerialize(&last_perfect_training_iteration_)) {

    return false;

  }

  for (auto &error_buffer : error_buffers_) {

    if (!fp->DeSerialize(error_buffer)) {

      return false;

    }

  }

  if (!fp->DeSerialize(&error_rates_[0], countof(error_rates_))) {

    return false;

  }

  if (!fp->DeSerialize(&training_stage_)) {

    return false;

  }

  uint8_t amount;

  if (!fp->DeSerialize(&amount)) {

    return false;

  }

  if (amount == LIGHT) {

    return true; // Don't read the rest.

  }

  if (!fp->DeSerialize(&best_error_rate_)) {

    return false;

  }

  if (!fp->DeSerialize(&best_error_rates_[0], countof(best_error_rates_))) {

    return false;

  }

  if (!fp->DeSerialize(&best_iteration_)) {

    return false;

  }

  if (!fp->DeSerialize(&worst_error_rate_)) {

    return false;

  }

  if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) {

    return false;

  }

  if (!fp->DeSerialize(&worst_iteration_)) {

    return false;

  }

  if (!fp->DeSerialize(&stall_iteration_)) {

    return false;

  }

  if (!fp->DeSerialize(best_model_data_)) {

    return false;

  }

  if (!fp->DeSerialize(worst_model_data_)) {

    return false;

  }

  if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) {

    return false;

  }

  std::vector<char> sub_data;

  if (!fp->DeSerialize(sub_data)) {

    return false;

  }

  if (sub_data.empty()) {

    sub_trainer_ = nullptr;

  } else {

    sub_trainer_ = std::make_unique<LSTMTrainer>();

    if (!ReadTrainingDump(sub_data, *sub_trainer_)) {

      return false;

    }

  }

  if (!fp->DeSerialize(best_error_history_)) {

    return false;

  }

  if (!fp->DeSerialize(best_error_iterations_)) {

    return false;

  }

  return fp->DeSerialize(&improvement_steps_);

}


// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the

// learning rates (by scaling reduction, or layer specific, according to

// NF_LAYER_SPECIFIC_LR).

void LSTMTrainer::StartSubtrainer(std::stringstream &log_msg) {

  sub_trainer_ = std::make_unique<LSTMTrainer>();

  if (!ReadTrainingDump(best_trainer_, *sub_trainer_)) {

    log_msg << " Failed to revert to previous best for trial!";

    sub_trainer_.reset();

  } else {

    log_msg << " Trial sub_trainer_ from iteration "

            << sub_trainer_->training_iteration();

    // Reduce learning rate so it doesn't diverge this time.

    sub_trainer_->ReduceLearningRates(this, log_msg);

    // If it fails again, we will wait twice as long before reverting again.

    int stall_offset =

        learning_iteration() - sub_trainer_->learning_iteration();

    stall_iteration_ = learning_iteration() + 2 * stall_offset;

    sub_trainer_->stall_iteration_ = stall_iteration_;

    // Re-save the best trainer with the new learning rates and stall iteration.

    SaveTrainingDump(NO_BEST_TRAINER, *sub_trainer_, &best_trainer_);

  }

}


// While the sub_trainer_ is behind the current training iteration and its

// training error is at least kSubTrainerMarginFraction better than the

// current training error, trains the sub_trainer_, and returns STR_UPDATED if

// it did anything. If it catches up, and has a better error rate than the

// current best, as well as a margin over the current error rate, then the

// trainer in *this is replaced with sub_trainer_, and STR_REPLACED is

// returned. STR_NONE is returned if the subtrainer wasn't good enough to

// receive any training iterations.

SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::stringstream &log_msg) {

  double training_error = CharError();

  double sub_error = sub_trainer_->CharError();

  double sub_margin = (training_error - sub_error) / sub_error;

  if (sub_margin >= kSubTrainerMarginFraction) {

    log_msg << " sub_trainer=" << sub_error

            << " margin=" << 100.0 * sub_margin << "\n";

    // Catch up to current iteration.

    int end_iteration = training_iteration();

    while (sub_trainer_->training_iteration() < end_iteration &&

           sub_margin >= kSubTrainerMarginFraction) {

      int target_iteration =

          sub_trainer_->training_iteration() + kNumPagesPerBatch;

      while (sub_trainer_->training_iteration() < target_iteration) {

        sub_trainer_->TrainOnLine(this, false);

      }

      std::stringstream batch_log("Sub:");

      batch_log.imbue(std::locale::classic());

      sub_trainer_->PrepareLogMsg(batch_log);

      batch_log << "\n";

      tprintf("UpdateSubtrainer:%s", batch_log.str().c_str());

      log_msg << batch_log.str();

      sub_error = sub_trainer_->CharError();

      sub_margin = (training_error - sub_error) / sub_error;

    }

    if (sub_error < best_error_rate_ &&

        sub_margin >= kSubTrainerMarginFraction) {

      // The sub_trainer_ has won the race to a new best. Switch to it.

      std::vector<char> updated_trainer;

      SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);

      ReadTrainingDump(updated_trainer, *this);

      log_msg << " Sub trainer wins at iteration "

              << training_iteration() << "\n";

      return STR_REPLACED;

    }

    return STR_UPDATED;

  }

  return STR_NONE;

}


// Reduces network learning rates, either for everything, or for layers

// independently, according to NF_LAYER_SPECIFIC_LR.

void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer,

                                      std::stringstream &log_msg) {

  if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {

    int num_reduced = ReduceLayerLearningRates(

        kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);

    log_msg << "\nReduced learning rate on layers: " << num_reduced;

  } else {

    ScaleLearningRate(kLearningRateDecay);

    log_msg << "\nReduced learning rate to :" << learning_rate_;

  }

  log_msg << "\n";

}


// Considers reducing the learning rate independently for each layer down by

// factor(<1), or leaving it the same, by double-training the given number of

// samples and minimizing the amount of changing of sign of weight updates.

// Even if it looks like all weights should remain the same, an adjustment

// will be made to guarantee a different result when reverting to an old best.

// Returns the number of layer learning rates that were reduced.

int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,

                                          LSTMTrainer *samples_trainer) {

  enum WhichWay {

    LR_DOWN, // Learning rate will go down by factor.

    LR_SAME, // Learning rate will stay the same.

    LR_COUNT // Size of arrays.

  };

  std::vector<std::string> layers = EnumerateLayers();

  int num_layers = layers.size();

  std::vector<int> num_weights(num_layers);

  std::vector<TFloat> bad_sums[LR_COUNT];

  std::vector<TFloat> ok_sums[LR_COUNT];

  for (int i = 0; i < LR_COUNT; ++i) {

    bad_sums[i].resize(num_layers, 0.0);

    ok_sums[i].resize(num_layers, 0.0);

  }

  auto momentum_factor = 1 / (1 - momentum_);

  std::vector<char> orig_trainer;

  samples_trainer->SaveTrainingDump(LIGHT, *this, &orig_trainer);

  for (int i = 0; i < num_layers; ++i) {

    Network *layer = GetLayer(layers[i]);

    num_weights[i] = layer->IsTraining() ? layer->num_weights() : 0;

  }

  int iteration = sample_iteration();

  for (int s = 0; s < num_samples; ++s) {

    // Which way will we modify the learning rate?

    for (int ww = 0; ww < LR_COUNT; ++ww) {

      // Transfer momentum to learning rate and adjust by the ww factor.

      auto ww_factor = momentum_factor;

      if (ww == LR_DOWN) {

        ww_factor *= factor;

      }

      // Make a copy of *this, so we can mess about without damaging anything.

      LSTMTrainer copy_trainer;

      samples_trainer->ReadTrainingDump(orig_trainer, copy_trainer);

      // Clear the updates, doing nothing else.

      copy_trainer.network_->Update(0.0, 0.0, 0.0, 0);

      // Adjust the learning rate in each layer.

      for (int i = 0; i < num_layers; ++i) {

        if (num_weights[i] == 0) {

          continue;

        }

        copy_trainer.ScaleLayerLearningRate(layers[i], ww_factor);

      }

      copy_trainer.SetIteration(iteration);

      // Train on the sample, but keep the update in updates_ instead of

      // applying to the weights.

      const ImageData *trainingdata =

          copy_trainer.TrainOnLine(samples_trainer, true);

      if (trainingdata == nullptr) {

        continue;

      }

      // We'll now use this trainer again for each layer.

      std::vector<char> updated_trainer;

      samples_trainer->SaveTrainingDump(LIGHT, copy_trainer, &updated_trainer);

      for (int i = 0; i < num_layers; ++i) {

        if (num_weights[i] == 0) {

          continue;

        }

        LSTMTrainer layer_trainer;

        samples_trainer->ReadTrainingDump(updated_trainer, layer_trainer);

        Network *layer = layer_trainer.GetLayer(layers[i]);

        // Update the weights in just the layer, using Adam if enabled.

        layer->Update(0.0, momentum_, adam_beta_,

                      layer_trainer.training_iteration_ + 1);

        // Zero the updates matrix again.

        layer->Update(0.0, 0.0, 0.0, 0);

        // Train again on the same sample, again holding back the updates.

        layer_trainer.TrainOnLine(trainingdata, true);

        // Count the sign changes in the updates in layer vs in copy_trainer.

        float before_bad = bad_sums[ww][i];

        float before_ok = ok_sums[ww][i];

        layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),

                                &ok_sums[ww][i], &bad_sums[ww][i]);

        float bad_frac =

            bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;

        if (bad_frac > 0.0f) {

          bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;

        }

      }

    }

    ++iteration;

  }

  int num_lowered = 0;

  for (int i = 0; i < num_layers; ++i) {

    if (num_weights[i] == 0) {

      continue;

    }

    Network *layer = GetLayer(layers[i]);

    float lr = GetLayerLearningRate(layers[i]);

    TFloat total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];

    TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];

    TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;

    TFloat frac_same = bad_sums[LR_SAME][i] / total_same;

    tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(),

            lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);

    if (frac_down < frac_same * kImprovementFraction) {

      tprintf(" REDUCED\n");

      ScaleLayerLearningRate(layers[i], factor);

      ++num_lowered;

    } else {

      tprintf(" SAME\n");

    }

  }

  if (num_lowered == 0) {

    // Just lower everything to make sure.

    for (int i = 0; i < num_layers; ++i) {

      if (num_weights[i] > 0) {

        ScaleLayerLearningRate(layers[i], factor);

        ++num_lowered;

      }

    }

  }

  return num_lowered;

}


// Converts the string to integer class labels, with appropriate null_char_s

// in between if not in SimpleTextOutput mode. Returns false on failure.

/* static */

bool LSTMTrainer::EncodeString(const std::string &str,

                               const UNICHARSET &unicharset,

                               const UnicharCompress *recoder, bool simple_text,

                               int null_char, std::vector<int> *labels) {

  if (str.c_str() == nullptr || str.length() <= 0) {

    tprintf("Empty truth string!\n");

    return false;

  }

  unsigned err_index;

  std::vector<int> internal_labels;

  labels->clear();

  if (!simple_text) {

    labels->push_back(null_char);

  }

  std::string cleaned = unicharset.CleanupString(str.c_str());

  if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr,

                               &err_index)) {

    bool success = true;

    for (auto internal_label : internal_labels) {

      if (recoder != nullptr) {

        // Re-encode labels via recoder.

        RecodedCharID code;

        int len = recoder->EncodeUnichar(internal_label, &code);

        if (len > 0) {

          for (int j = 0; j < len; ++j) {

            labels->push_back(code(j));

            if (!simple_text) {

              labels->push_back(null_char);

            }

          }

        } else {

          success = false;

          err_index = 0;

          break;

        }

      } else {

        labels->push_back(internal_label);

        if (!simple_text) {

          labels->push_back(null_char);

        }

      }

    }

    if (success) {

      return true;

    }

  }

  tprintf("Encoding of string failed! Failure bytes:");

  while (err_index < cleaned.size()) {

    tprintf(" %x", cleaned[err_index++] & 0xff);

  }

  tprintf("\n");

  return false;

}


// Performs forward-backward on the given trainingdata.

// Returns a Trainability enum to indicate the suitability of the sample.

Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata,

                                      bool batch) {

  NetworkIO fwd_outputs, targets;

  Trainability trainable =

      PrepareForBackward(trainingdata, &fwd_outputs, &targets);

  ++sample_iteration_;

  if (trainable == UNENCODABLE || trainable == NOT_BOXED) {

    return trainable; // Sample was unusable.

  }

  bool debug =

      debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;

  // Run backprop on the output.

  NetworkIO bp_deltas;

  if (network_->IsTraining() &&

      (trainable != PERFECT ||

       training_iteration() >

           last_perfect_training_iteration_ + perfect_delay_)) {

    network_->Backward(debug, targets, &scratch_space_, &bp_deltas);

    network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_,

                     training_iteration_ + 1);

  }

#ifndef GRAPHICS_DISABLED

  if (debug_interval_ == 1 && debug_win_ != nullptr) {

    debug_win_->AwaitEvent(SVET_CLICK);

  }

#endif // !GRAPHICS_DISABLED

  // Roll the memory of past means.

  RollErrorBuffers();

  return trainable;

}


// Prepares the ground truth, runs forward, and prepares the targets.

// Returns a Trainability enum to indicate the suitability of the sample.

Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata,

                                             NetworkIO *fwd_outputs,

                                             NetworkIO *targets) {

  if (trainingdata == nullptr) {

    tprintf("Null trainingdata.\n");

    return UNENCODABLE;

  }

  // Ensure repeatability of random elements even across checkpoints.

  bool debug =

      debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;

  std::vector<int> truth_labels;

  if (!EncodeString(trainingdata->transcription(), &truth_labels)) {

    tprintf("Can't encode transcription: '%s' in language '%s'\n",

            trainingdata->transcription().c_str(),

            trainingdata->language().c_str());

    return UNENCODABLE;

  }

  bool upside_down = false;

  if (randomly_rotate_) {

    // This ensures consistent training results.

    SetRandomSeed();

    upside_down = randomizer_.SignedRand(1.0) > 0.0;

    if (upside_down) {

      // Modify the truth labels to match the rotation:

      // Apart from space and null, increment the label. This changes the

      // script-id to the same script-id but upside-down.

      // The labels need to be reversed in order, as the first is now the last.

      for (auto truth_label : truth_labels) {

        if (truth_label != UNICHAR_SPACE && truth_label != null_char_) {

          ++truth_label;

        }

      }

      std::reverse(truth_labels.begin(), truth_labels.end());

    }

  }

  unsigned w = 0;

  while (w < truth_labels.size() &&

         (truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_)) {

    ++w;

  }

  if (w == truth_labels.size()) {

    tprintf("Blank transcription: %s\n", trainingdata->transcription().c_str());

    return UNENCODABLE;

  }

  float image_scale;

  NetworkIO inputs;

  bool invert = trainingdata->boxes().empty();

  if (!RecognizeLine(*trainingdata, invert ? 0.5f : 0.0f, debug, invert, upside_down,

                     &image_scale, &inputs, fwd_outputs)) {

    tprintf("Image %s not trainable\n", trainingdata->imagefilename().c_str());

    return UNENCODABLE;

  }

  targets->Resize(*fwd_outputs, network_->NumOutputs());

  LossType loss_type = OutputLossType();

  if (loss_type == LT_SOFTMAX) {

    if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {

      tprintf("Compute simple targets failed for %s!\n",

              trainingdata->imagefilename().c_str());

      return UNENCODABLE;

    }

  } else if (loss_type == LT_CTC) {

    if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {

      tprintf("Compute CTC targets failed for %s!\n",

              trainingdata->imagefilename().c_str());

      return UNENCODABLE;

    }

  } else {

    tprintf("Logistic outputs not implemented yet!\n");

    return UNENCODABLE;

  }

  std::vector<int> ocr_labels;

  std::vector<int> xcoords;

  LabelsFromOutputs(*fwd_outputs, &ocr_labels, &xcoords);

  // CTC does not produce correct target labels to begin with.

  if (loss_type != LT_CTC) {

    LabelsFromOutputs(*targets, &truth_labels, &xcoords);

  }

  if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,

                         *targets)) {

    tprintf("Input width was %d\n", inputs.Width());

    return UNENCODABLE;

  }

  std::string ocr_text = DecodeLabels(ocr_labels);

  std::string truth_text = DecodeLabels(truth_labels);

  targets->SubtractAllFromFloat(*fwd_outputs);

  if (debug_interval_ != 0) {

    if (truth_text != ocr_text) {

      tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),

              ocr_text.c_str());

    }

  }

  double char_error = ComputeCharError(truth_labels, ocr_labels);

  double word_error = ComputeWordError(&truth_text, &ocr_text);

  double delta_error = ComputeErrorRates(*targets, char_error, word_error);

  if (debug_interval_ != 0) {

    tprintf("File %s line %d %s:\n", trainingdata->imagefilename().c_str(),

            trainingdata->page_number(), delta_error == 0.0 ? "(Perfect)" : "");

  }

  if (delta_error == 0.0) {

    return PERFECT;

  }

  if (targets->AnySuspiciousTruth(kHighConfidence)) {

    return HI_PRECISION_ERR;

  }

  return TRAINABLE;

}


// Writes the trainer to memory, so that the current training state can be

// restored.  *this must always be the master trainer that retains the only

// copy of the training data and language model. trainer is the model that is

// actually serialized.

bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,

                                   const LSTMTrainer &trainer,

                                   std::vector<char> *data) const {

  TFile fp;

  fp.OpenWrite(data);

  return trainer.Serialize(serialize_amount, &mgr_, &fp);

}


// Restores the model to *this.

bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr,

                                        const char *data, int size) {

  if (size == 0) {

    tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n");

    return false;

  }

  TFile fp;

  fp.Open(data, size);

  return DeSerialize(mgr, &fp);

}


// Writes the full recognition traineddata to the given filename.

bool LSTMTrainer::SaveTraineddata(const char *filename) {

  std::vector<char> recognizer_data;

  SaveRecognitionDump(&recognizer_data);

  mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],

                      recognizer_data.size());

  return mgr_.SaveFile(filename, SaveDataToFile);

}


// Writes the recognizer to memory, so that it can be used for testing later.

void LSTMTrainer::SaveRecognitionDump(std::vector<char> *data) const {

  TFile fp;

  fp.OpenWrite(data);

  network_->SetEnableTraining(TS_TEMP_DISABLE);

  ASSERT_HOST(LSTMRecognizer::Serialize(&mgr_, &fp));

  network_->SetEnableTraining(TS_RE_ENABLE);

}


// Returns a suitable filename for a training dump, based on the model_base_,

// best_error_rate_, best_iteration_ and training_iteration_.

std::string LSTMTrainer::DumpFilename() const {

  std::stringstream filename;

  filename.imbue(std::locale::classic());

  filename << model_base_ << std::fixed << std::setprecision(3)

           << "_" << best_error_rate_

           << "_" << best_iteration_

           << "_" << training_iteration_

           << ".checkpoint";

  return filename.str();

}


// Fills the whole error buffer of the given type with the given value.

void LSTMTrainer::FillErrorBuffer(double new_error, ErrorTypes type) {

  for (int i = 0; i < kRollingBufferSize_; ++i) {

    error_buffers_[type][i] = new_error;

  }

  error_rates_[type] = 100.0 * new_error;

}


// Helper generates a map from each current recoder_ code (ie softmax index)

// to the corresponding old_recoder code, or -1 if there isn't one.

std::vector<int> LSTMTrainer::MapRecoder(

    const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const {

  int num_new_codes = recoder_.code_range();

  int num_new_unichars = GetUnicharset().size();

  std::vector<int> code_map(num_new_codes, -1);

  for (int c = 0; c < num_new_codes; ++c) {

    int old_code = -1;

    // Find all new unichar_ids that recode to something that includes c.

    // The <= is to include the null char, which may be beyond the unicharset.

    for (int uid = 0; uid <= num_new_unichars; ++uid) {

      RecodedCharID codes;

      int length = recoder_.EncodeUnichar(uid, &codes);

      int code_index = 0;

      while (code_index < length && codes(code_index) != c) {

        ++code_index;

      }

      if (code_index == length) {

        continue;

      }

      // The old unicharset must have the same unichar.

      int old_uid =

          uid < num_new_unichars

              ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))

              : old_chset.size() - 1;

      if (old_uid == INVALID_UNICHAR_ID) {

        continue;

      }

      // The encoding of old_uid at the same code_index is the old code.

      RecodedCharID old_codes;

      if (code_index < old_recoder.EncodeUnichar(old_uid, &old_codes)) {

        old_code = old_codes(code_index);

        break;

      }

    }

    code_map[c] = old_code;

  }

  return code_map;

}


// Private version of InitCharSet above finishes the job after initializing

// the mgr_ data member.

void LSTMTrainer::InitCharSet() {

  EmptyConstructor();

  training_flags_ = TF_COMPRESS_UNICHARSET;

  // Initialize the unicharset and recoder.

  if (!LoadCharsets(&mgr_)) {

    ASSERT_HOST(

        "Must provide a traineddata containing lstm_unicharset and"

        " lstm_recoder!\n" != nullptr);

  }

  SetNullChar();

}


// Helper computes and sets the null_char_.

void LSTMTrainer::SetNullChar() {

  null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN

                                                   : GetUnicharset().size();

  RecodedCharID code;

  recoder_.EncodeUnichar(null_char_, &code);

  null_char_ = code(0);

}


// Factored sub-constructor sets up reasonable default values.

void LSTMTrainer::EmptyConstructor() {

#ifndef GRAPHICS_DISABLED

  align_win_ = nullptr;

  target_win_ = nullptr;

  ctc_win_ = nullptr;

  recon_win_ = nullptr;

#endif

  checkpoint_iteration_ = 0;

  training_stage_ = 0;

  num_training_stages_ = 2;

  InitIterations();

}


// Outputs the string and periodically displays the given network inputs

// as an image in the given window, and the corresponding labels at the

// corresponding x_starts.

// Returns false if the truth string is empty.

bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs,

                                    const ImageData &trainingdata,

                                    const NetworkIO &fwd_outputs,

                                    const std::vector<int> &truth_labels,

                                    const NetworkIO &outputs) {

  const std::string &truth_text = DecodeLabels(truth_labels);

  if (truth_text.c_str() == nullptr || truth_text.length() <= 0) {

    tprintf("Empty truth string at decode time!\n");

    return false;

  }

  if (debug_interval_ != 0) {

    // Get class labels, xcoords and string.

    std::vector<int> labels;

    std::vector<int> xcoords;

    LabelsFromOutputs(outputs, &labels, &xcoords);

    std::string text = DecodeLabels(labels);

    tprintf("Iteration %d: GROUND  TRUTH : %s\n", training_iteration(),

            truth_text.c_str());

    if (truth_text != text) {

      tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(),

              text.c_str());

    }

    if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {

      tprintf("TRAINING activation path for truth string %s\n",

              truth_text.c_str());

      DebugActivationPath(outputs, labels, xcoords);

#ifndef GRAPHICS_DISABLED

      DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);

      if (OutputLossType() == LT_CTC) {

        DisplayTargets(fwd_outputs, "CTC Outputs", &ctc_win_);

        DisplayTargets(outputs, "CTC Targets", &target_win_);

      }

#endif

    }

  }

  return true;

}


#ifndef GRAPHICS_DISABLED


// Displays the network targets as line a line graph.

void LSTMTrainer::DisplayTargets(const NetworkIO &targets,

                                 const char *window_name, ScrollView **window) {

  int width = targets.Width();

  int num_features = targets.NumFeatures();

  Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,

                       window);

  for (int c = 0; c < num_features; ++c) {

    int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;

    (*window)->Pen(static_cast<ScrollView::Color>(color));

    int start_t = -1;

    for (int t = 0; t < width; ++t) {

      double target = targets.f(t)[c];

      target *= kTargetYScale;

      if (target >= 1) {

        if (start_t < 0) {

          (*window)->SetCursor(t - 1, 0);

          start_t = t;

        }

        (*window)->DrawTo(t, target);

      } else if (start_t >= 0) {

        (*window)->DrawTo(t, 0);

        (*window)->DrawTo(start_t - 1, 0);

        start_t = -1;

      }

    }

    if (start_t >= 0) {

      (*window)->DrawTo(width, 0);

      (*window)->DrawTo(start_t - 1, 0);

    }

  }

  (*window)->Update();

}


#endif // !GRAPHICS_DISABLED


// Builds a no-compromises target where the first positions should be the

// truth labels and the rest is padded with the null_char_.

bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs,

                                     const std::vector<int> &truth_labels,

                                     NetworkIO *targets) {

  if (truth_labels.size() > targets->Width()) {

    tprintf("Error: transcription %s too long to fit into target of width %d\n",

            DecodeLabels(truth_labels).c_str(), targets->Width());

    return false;

  }

  int i = 0;

  for (auto truth_label : truth_labels) {

    targets->SetActivations(i, truth_label, 1.0);

    ++i;

  }

  for (i = truth_labels.size(); i < targets->Width(); ++i) {

    targets->SetActivations(i, null_char_, 1.0);

  }

  return true;

}


// Builds a target using standard CTC. truth_labels should be pre-padded with

// nulls wherever desired. They don't have to be between all labels.

// outputs is input-output, as it gets clipped to minimum probability.

bool LSTMTrainer::ComputeCTCTargets(const std::vector<int> &truth_labels,

                                    NetworkIO *outputs, NetworkIO *targets) {

  // Bottom-clip outputs to a minimum probability.

  CTC::NormalizeProbs(outputs);

  return CTC::ComputeCTCTargets(truth_labels, null_char_,

                                outputs->float_array(), targets);

}


// Computes network errors, and stores the results in the rolling buffers,

// along with the supplied text_error.

// Returns the delta error of the current sample (not running average.)

double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas,

                                      double char_error, double word_error) {

  UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);

  // Delta error is the fraction of timesteps with >0.5 error in the top choice

  // score. If zero, then the top choice characters are guaranteed correct,

  // even when there is residue in the RMS error.

  double delta_error = ComputeWinnerError(deltas);

  UpdateErrorBuffer(delta_error, ET_DELTA);

  UpdateErrorBuffer(word_error, ET_WORD_RECERR);

  UpdateErrorBuffer(char_error, ET_CHAR_ERROR);

  // Skip ratio measures the difference between sample_iteration_ and

  // training_iteration_, which reflects the number of unusable samples,

  // usually due to unencodable truth text, or the text not fitting in the

  // space for the output.

  double skip_count = sample_iteration_ - prev_sample_iteration_;

  UpdateErrorBuffer(skip_count, ET_SKIP_RATIO);

  return delta_error;

}


// Computes the network activation RMS error rate.

double LSTMTrainer::ComputeRMSError(const NetworkIO &deltas) {

  double total_error = 0.0;

  int width = deltas.Width();

  int num_classes = deltas.NumFeatures();

  for (int t = 0; t < width; ++t) {

    const float *class_errs = deltas.f(t);

    for (int c = 0; c < num_classes; ++c) {

      double error = class_errs[c];

      total_error += error * error;

    }

  }

  return sqrt(total_error / (width * num_classes));

}


// Computes network activation winner error rate. (Number of values that are

// in error by >= 0.5 divided by number of time-steps.) More closely related

// to final character error than RMS, but still directly calculable from

// just the deltas. Because of the binary nature of the targets, zero winner

// error is a sufficient but not necessary condition for zero char error.

double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) {

  int num_errors = 0;

  int width = deltas.Width();

  int num_classes = deltas.NumFeatures();

  for (int t = 0; t < width; ++t) {

    const float *class_errs = deltas.f(t);

    for (int c = 0; c < num_classes; ++c) {

      float abs_delta = std::fabs(class_errs[c]);

      // TODO(rays) Filtering cases where the delta is very large to cut out

      // GT errors doesn't work. Find a better way or get better truth.

      if (0.5 <= abs_delta) {

        ++num_errors;

      }

    }

  }

  return static_cast<double>(num_errors) / width;

}


// Computes a very simple bag of chars char error rate.

double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,

                                     const std::vector<int> &ocr_str) {

  std::vector<int> label_counts(NumOutputs());

  unsigned truth_size = 0;

  for (auto ch : truth_str) {

    if (ch != null_char_) {

      ++label_counts[ch];

      ++truth_size;

    }

  }

  for (auto ch : ocr_str) {

    if (ch != null_char_) {

      --label_counts[ch];

    }

  }

  unsigned char_errors = 0;

  for (auto label_count : label_counts) {

    char_errors += abs(label_count);

  }

  // Limit BCER to interval [0,1] and avoid division by zero.

  if (truth_size <= char_errors) {

    return (char_errors == 0) ? 0.0 : 1.0;

  }

  return static_cast<double>(char_errors) / truth_size;

}


// Computes word recall error rate using a very simple bag of words algorithm.

// NOTE that this is destructive on both input strings.

double LSTMTrainer::ComputeWordError(std::string *truth_str,

                                     std::string *ocr_str) {

  using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;

  std::vector<std::string> truth_words = split(*truth_str, ' ');

  if (truth_words.empty()) {

    return 0.0;

  }

  std::vector<std::string> ocr_words = split(*ocr_str, ' ');

  StrMap word_counts;

  for (const auto &truth_word : truth_words) {

    std::string truth_word_string(truth_word.c_str());

    auto it = word_counts.find(truth_word_string);

    if (it == word_counts.end()) {

      word_counts.insert(std::make_pair(truth_word_string, 1));

    } else {

      ++it->second;

    }

  }

  for (const auto &ocr_word : ocr_words) {

    std::string ocr_word_string(ocr_word.c_str());

    auto it = word_counts.find(ocr_word_string);

    if (it == word_counts.end()) {

      word_counts.insert(std::make_pair(ocr_word_string, -1));

    } else {

      --it->second;

    }

  }

  int word_recall_errs = 0;

  for (const auto &word_count : word_counts) {

    if (word_count.second > 0) {

      word_recall_errs += word_count.second;

    }

  }

  return static_cast<double>(word_recall_errs) / truth_words.size();

}


// Updates the error buffer and corresponding mean of the given type with

// the new_error.

void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) {

  int index = training_iteration_ % kRollingBufferSize_;

  error_buffers_[type][index] = new_error;

  // Compute the mean error.

  int mean_count =

      std::min<int>(training_iteration_ + 1, error_buffers_[type].size());

  double buffer_sum = 0.0;

  for (int i = 0; i < mean_count; ++i) {

    buffer_sum += error_buffers_[type][i];

  }

  double mean = buffer_sum / mean_count;

  // Trim precision to 1/1000 of 1%.

  error_rates_[type] = IntCastRounded(100000.0 * mean) / 1000.0;

}


// Rolls error buffers and reports the current means.

void LSTMTrainer::RollErrorBuffers() {

  prev_sample_iteration_ = sample_iteration_;

  if (NewSingleError(ET_DELTA) > 0.0) {

    ++learning_iteration_;

  } else {

    last_perfect_training_iteration_ = training_iteration_;

  }

  ++training_iteration_;

  if (debug_interval_ != 0) {

    tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",

            error_rates_[ET_RMS], error_rates_[ET_DELTA],

            error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],

            error_rates_[ET_SKIP_RATIO]);

  }

}


// Given that error_rate is either a new min or max, updates the best/worst

// error rates, and record of progress.

// Tester is an externally supplied callback function that tests on some

// data set with a given model and records the error rates in a graph.

std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,

                                          const std::vector<char> &model_data,

                                          const TestCallback &tester) {

  if (error_rate > best_error_rate_ &&

      iteration < best_iteration_ + kErrorGraphInterval) {

    // Too soon to record a new point.

    if (tester != nullptr && !worst_model_data_.empty()) {

      mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],

                          worst_model_data_.size());

      return tester(worst_iteration_, nullptr, mgr_, CurrentTrainingStage());

    } else {

      return "";

    }

  }

  std::string result;

  // NOTE: there are 2 asymmetries here:

  // 1. We are computing the global minimum, but the local maximum in between.

  // 2. If the tester returns an empty string, indicating that it is busy,

  //    call it repeatedly on new local maxima to test the previous min, but

  //    not the other way around, as there is little point testing the maxima

  //    between very frequent minima.

  if (error_rate < best_error_rate_) {

    // This is a new (global) minimum.

    if (tester != nullptr && !worst_model_data_.empty()) {

      mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],

                          worst_model_data_.size());

      result = tester(worst_iteration_, worst_error_rates_, mgr_,

                      CurrentTrainingStage());

      worst_model_data_.clear();

      best_model_data_ = model_data;

    }

    best_error_rate_ = error_rate;

    memcpy(best_error_rates_, error_rates_, sizeof(error_rates_));

    best_iteration_ = iteration;

    best_error_history_.push_back(error_rate);

    best_error_iterations_.push_back(iteration);

    // Compute 2% decay time.

    double two_percent_more = error_rate + 2.0;

    int i;

    for (i = best_error_history_.size() - 1;

         i >= 0 && best_error_history_[i] < two_percent_more; --i) {

    }

    int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;

    improvement_steps_ = iteration - old_iteration;

    tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",

            improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,

            old_iteration);

  } else if (error_rate > best_error_rate_) {

    // This is a new (local) maximum.

    if (tester != nullptr) {

      if (!best_model_data_.empty()) {

        mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0],

                            best_model_data_.size());

        result = tester(best_iteration_, best_error_rates_, mgr_,

                        CurrentTrainingStage());

      } else if (!worst_model_data_.empty()) {

        // Allow for multiple data points with "worst" error rate.

        mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],

                            worst_model_data_.size());

        result = tester(worst_iteration_, worst_error_rates_, mgr_,

                        CurrentTrainingStage());

      }

      if (result.length() > 0) {

        best_model_data_.clear();

      }

      worst_model_data_ = model_data;

    }

  }

  worst_error_rate_ = error_rate;

  memcpy(worst_error_rates_, error_rates_, sizeof(error_rates_));

  worst_iteration_ = iteration;

  return result;

}


} // namespace tesseract.

ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:54

tprintf.h

ratngs.h

imagedata.h

boxread.h

ctc.h

networkbuilder.h

lstmtrainer.h

recodebeam.h

tfnetwork.h

input.h

i
int i
Definition: gmock-matchers_test.cc:718

ch
char ch
Definition: gmock-matchers_test.cc:4035

tesseract
Definition: baseapi.h:39

tesseract::TF_COMPRESS_UNICHARSET
@ TF_COMPRESS_UNICHARSET
Definition: lstmrecognizer.h:46

tesseract::ErrorTypes
ErrorTypes
Definition: lstmtrainer.h:41

tesseract::ET_COUNT
@ ET_COUNT
Definition: lstmtrainer.h:47

tesseract::ET_DELTA
@ ET_DELTA
Definition: lstmtrainer.h:43

tesseract::ET_WORD_RECERR
@ ET_WORD_RECERR
Definition: lstmtrainer.h:44

tesseract::ET_SKIP_RATIO
@ ET_SKIP_RATIO
Definition: lstmtrainer.h:46

tesseract::ET_RMS
@ ET_RMS
Definition: lstmtrainer.h:42

tesseract::ET_CHAR_ERROR
@ ET_CHAR_ERROR
Definition: lstmtrainer.h:45

tesseract::Trainability
Trainability
Definition: lstmtrainer.h:51

tesseract::PERFECT
@ PERFECT
Definition: lstmtrainer.h:53

tesseract::NOT_BOXED
@ NOT_BOXED
Definition: lstmtrainer.h:56

tesseract::HI_PRECISION_ERR
@ HI_PRECISION_ERR
Definition: lstmtrainer.h:55

tesseract::UNENCODABLE
@ UNENCODABLE
Definition: lstmtrainer.h:54

tesseract::TRAINABLE
@ TRAINABLE
Definition: lstmtrainer.h:52

tesseract::LossType
LossType
Definition: static_shape.h:29

tesseract::LT_CTC
@ LT_CTC
Definition: static_shape.h:31

tesseract::LT_SOFTMAX
@ LT_SOFTMAX
Definition: static_shape.h:32

tesseract::kLearningRateDecay
const double kLearningRateDecay
Definition: lstmtrainer.cpp:54

tesseract::kImprovementFraction
const double kImprovementFraction
Definition: lstmtrainer.cpp:68

tesseract::kTargetYScale
const int kTargetYScale
Definition: lstmtrainer.cpp:74

tesseract::SubTrainerResult
SubTrainerResult
Definition: lstmtrainer.h:67

tesseract::STR_UPDATED
@ STR_UPDATED
Definition: lstmtrainer.h:69

tesseract::STR_NONE
@ STR_NONE
Definition: lstmtrainer.h:68

tesseract::STR_REPLACED
@ STR_REPLACED
Definition: lstmtrainer.h:70

tesseract::kMinStartedErrorRate
const int kMinStartedErrorRate
Definition: lstmtrainer.cpp:62

tesseract::tprintf
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

tesseract::IntCastRounded
int IntCastRounded(double x)
Definition: helpers.h:170

tesseract::SVET_CLICK
@ SVET_CLICK
Definition: scrollview.h:56

tesseract::TESSDATA_LSTM_UNICHARSET
@ TESSDATA_LSTM_UNICHARSET
Definition: tessdatamanager.h:80

tesseract::TESSDATA_LSTM
@ TESSDATA_LSTM
Definition: tessdatamanager.h:76

tesseract::TESSDATA_LSTM_RECODER
@ TESSDATA_LSTM_RECODER
Definition: tessdatamanager.h:81

tesseract::kSubTrainerMarginFraction
const double kSubTrainerMarginFraction
Definition: lstmtrainer.cpp:52

tesseract::TestCallback
std::function< std::string(int, const double *, const TessdataManager &, int)> TestCallback
Definition: lstmtrainer.h:78

tesseract::kErrorGraphInterval
const int kErrorGraphInterval
Definition: lstmtrainer.cpp:58

tesseract::countof
constexpr size_t countof(T const (&)[N]) noexcept
Definition: serialis.h:34

tesseract::SaveDataToFile
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
Definition: genericvector.h:254

tesseract::UNICHAR_SPACE
@ UNICHAR_SPACE
Definition: unicharset.h:36

tesseract::UNICHAR_BROKEN
@ UNICHAR_BROKEN
Definition: unicharset.h:38

tesseract::TS_TEMP_DISABLE
@ TS_TEMP_DISABLE
Definition: network.h:95

tesseract::TS_ENABLED
@ TS_ENABLED
Definition: network.h:93

tesseract::TS_RE_ENABLE
@ TS_RE_ENABLE
Definition: network.h:97

tesseract::TFloat
double TFloat
Definition: tesstypes.h:39

tesseract::NF_LAYER_SPECIFIC_LR
@ NF_LAYER_SPECIFIC_LR
Definition: network.h:85

tesseract::search
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:211

tesseract::kMinDivergenceRate
const double kMinDivergenceRate
Definition: lstmtrainer.cpp:47

tesseract::kNumAdjustmentIterations
const int kNumAdjustmentIterations
Definition: lstmtrainer.cpp:56

tesseract::kHighConfidence
const double kHighConfidence
Definition: lstmtrainer.cpp:66

tesseract::CachingStrategy
CachingStrategy
Definition: imagedata.h:42

tesseract::kBestCheckpointFraction
const double kBestCheckpointFraction
Definition: lstmtrainer.cpp:70

tesseract::kNumPagesPerBatch
const int kNumPagesPerBatch
Definition: lstmtrainer.cpp:60

tesseract::kTargetXScale
const int kTargetXScale
Definition: lstmtrainer.cpp:73

tesseract::split
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:43

tesseract::kMinStallIterations
const int kMinStallIterations
Definition: lstmtrainer.cpp:49

tesseract::kStageTransitionThreshold
const double kStageTransitionThreshold
Definition: lstmtrainer.cpp:64

tesseract::SerializeAmount
SerializeAmount
Definition: lstmtrainer.h:60

tesseract::FULL
@ FULL
Definition: lstmtrainer.h:63

tesseract::NO_BEST_TRAINER
@ NO_BEST_TRAINER
Definition: lstmtrainer.h:62

tesseract::LIGHT
@ LIGHT
Definition: lstmtrainer.h:61

tesseract::LoadDataFromFile
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
Definition: genericvector.h:233

upload.type
type
Definition: upload.py:458

tesseract::ImageData
Definition: imagedata.h:62

tesseract::ImageData::page_number
int page_number() const
Definition: imagedata.h:89

tesseract::ImageData::imagefilename
const std::string & imagefilename() const
Definition: imagedata.h:83

tesseract::ImageData::transcription
const std::string & transcription() const
Definition: imagedata.h:104

tesseract::ImageData::language
const std::string & language() const
Definition: imagedata.h:98

tesseract::ImageData::boxes
const std::vector< TBOX > & boxes() const
Definition: imagedata.h:107

tesseract::DocumentCache::LoadDocuments
TESS_API bool LoadDocuments(const std::vector< std::string > &filenames, CachingStrategy cache_strategy, FileReader reader)
Definition: imagedata.cpp:614

tesseract::DocumentCache::Clear
void Clear()
Definition: imagedata.h:296

tesseract::TRand::SignedRand
double SignedRand(double range)
Definition: helpers.h:78

tesseract::TFile
Definition: serialis.h:61

tesseract::TFile::OpenWrite
void OpenWrite(std::vector< char > *data)
Definition: serialis.cpp:246

tesseract::TFile::DeSerialize
bool DeSerialize(std::string &data)
Definition: serialis.cpp:94

tesseract::TFile::Serialize
bool Serialize(const std::string &data)
Definition: serialis.cpp:107

tesseract::TFile::Open
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140

tesseract::TessdataManager
Definition: tessdatamanager.h:127

tesseract::TessdataManager::OverwriteEntry
void OverwriteEntry(TessdataType type, const char *data, int size)
Definition: tessdatamanager.cpp:157

tesseract::TessdataManager::VersionString
std::string VersionString() const
Definition: tessdatamanager.cpp:248

tesseract::TessdataManager::SetVersionString
void SetVersionString(const std::string &v_str)
Definition: tessdatamanager.cpp:253

tesseract::TessdataManager::GetComponent
bool GetComponent(TessdataType type, TFile *fp)
Definition: tessdatamanager.cpp:227

tesseract::TessdataManager::SaveFile
bool SaveFile(const char *filename, FileWriter writer) const
Definition: tessdatamanager.cpp:164

tesseract::TessdataManager::Init
bool Init(const char *data_file_name)
Definition: tessdatamanager.cpp:90

tesseract::RecodedCharID
Definition: unicharcompress.h:32

tesseract::UnicharCompress
Definition: unicharcompress.h:139

tesseract::UnicharCompress::EncodeUnichar
int EncodeUnichar(unsigned unichar_id, RecodedCharID *code) const
Definition: unicharcompress.cpp:295

tesseract::UnicharCompress::DeSerialize
bool DeSerialize(TFile *fp)
Definition: unicharcompress.cpp:323

tesseract::UnicharCompress::code_range
int code_range() const
Definition: unicharcompress.h:171

tesseract::UNICHARSET
Definition: unicharset.h:164

tesseract::UNICHARSET::encode_string
bool encode_string(const char *str, bool give_up_on_failure, std::vector< UNICHAR_ID > *encoding, std::vector< char > *lengths, unsigned *encoded_length) const
Definition: unicharset.cpp:239

tesseract::UNICHARSET::has_special_codes
bool has_special_codes() const
Definition: unicharset.h:756

tesseract::UNICHARSET::load_from_file
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:391

tesseract::UNICHARSET::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186

tesseract::UNICHARSET::size
size_t size() const
Definition: unicharset.h:355

tesseract::UNICHARSET::CleanupString
static std::string CleanupString(const char *utf8_str)
Definition: unicharset.h:265

tesseract::LSTMRecognizer::DebugActivationPath
void DebugActivationPath(const NetworkIO &outputs, const std::vector< int > &labels, const std::vector< int > &xcoords)
Definition: lstmrecognizer.cpp:451

tesseract::LSTMRecognizer::OutputLossType
LossType OutputLossType() const
Definition: lstmrecognizer.h:76

tesseract::LSTMRecognizer::recoder_
UnicharCompress recoder_
Definition: lstmrecognizer.h:336

tesseract::LSTMRecognizer::training_flags_
int32_t training_flags_
Definition: lstmrecognizer.h:342

tesseract::LSTMRecognizer::NumOutputs
int NumOutputs() const
Definition: lstmrecognizer.h:57

tesseract::LSTMRecognizer::DecodeLabels
std::string DecodeLabels(const std::vector< int > &labels)
Definition: lstmrecognizer.cpp:394

tesseract::LSTMRecognizer::SimpleTextOutput
bool SimpleTextOutput() const
Definition: lstmrecognizer.h:84

tesseract::LSTMRecognizer::adam_beta_
float adam_beta_
Definition: lstmrecognizer.h:354

tesseract::LSTMRecognizer::scratch_space_
NetworkScratch scratch_space_
Definition: lstmrecognizer.h:358

tesseract::LSTMRecognizer::network_
Network * network_
Definition: lstmrecognizer.h:329

tesseract::LSTMRecognizer::LoadCharsets
bool LoadCharsets(const TessdataManager *mgr)
Definition: lstmrecognizer.cpp:180

tesseract::LSTMRecognizer::debug_win_
ScrollView * debug_win_
Definition: lstmrecognizer.h:366

tesseract::LSTMRecognizer::LabelsFromOutputs
void LabelsFromOutputs(const NetworkIO &outputs, std::vector< int > *labels, std::vector< int > *xcoords)
Definition: lstmrecognizer.cpp:519

tesseract::LSTMRecognizer::RecognizeLine
void RecognizeLine(const ImageData &image_data, float invert_threshold, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0, int lstm_choice_amount=5)
Definition: lstmrecognizer.cpp:247

tesseract::LSTMRecognizer::DisplayForward
void DisplayForward(const NetworkIO &inputs, const std::vector< int > &labels, const std::vector< int > &label_coords, const char *window_name, ScrollView **window)
Definition: lstmrecognizer.cpp:411

tesseract::LSTMRecognizer::SetIteration
void SetIteration(int iteration)
Definition: lstmrecognizer.h:209

tesseract::LSTMRecognizer::dict_
Dict * dict_
Definition: lstmrecognizer.h:360

tesseract::LSTMRecognizer::network_str_
std::string network_str_
Definition: lstmrecognizer.h:339

tesseract::LSTMRecognizer::IsIntMode
bool IsIntMode() const
Definition: lstmrecognizer.h:87

tesseract::LSTMRecognizer::SetRandomSeed
void SetRandomSeed()
Definition: lstmrecognizer.h:288

tesseract::LSTMRecognizer::null_char
int null_char() const
Definition: lstmrecognizer.h:218

tesseract::LSTMRecognizer::ScaleLearningRate
void ScaleLearningRate(double factor)
Definition: lstmrecognizer.h:142

tesseract::LSTMRecognizer::ScaleLayerLearningRate
void ScaleLayerLearningRate(const std::string &id, double factor)
Definition: lstmrecognizer.h:153

tesseract::LSTMRecognizer::learning_rate
float learning_rate() const
Definition: lstmrecognizer.h:72

tesseract::LSTMRecognizer::training_iteration_
int32_t training_iteration_
Definition: lstmrecognizer.h:344

tesseract::LSTMRecognizer::randomizer_
TRand randomizer_
Definition: lstmrecognizer.h:357

tesseract::LSTMRecognizer::null_char_
int32_t null_char_
Definition: lstmrecognizer.h:349

tesseract::LSTMRecognizer::training_iteration
int training_iteration() const
Definition: lstmrecognizer.h:62

tesseract::LSTMRecognizer::sample_iteration
int sample_iteration() const
Definition: lstmrecognizer.h:67

tesseract::LSTMRecognizer::EnumerateLayers
std::vector< std::string > EnumerateLayers() const
Definition: lstmrecognizer.h:100

tesseract::LSTMRecognizer::GetLayerLearningRate
float GetLayerLearningRate(const std::string &id) const
Definition: lstmrecognizer.h:115

tesseract::LSTMRecognizer::GetLayer
Network * GetLayer(const std::string &id) const
Definition: lstmrecognizer.h:108

tesseract::LSTMRecognizer::learning_rate_
float learning_rate_
Definition: lstmrecognizer.h:351

tesseract::LSTMRecognizer::Serialize
bool Serialize(const TessdataManager *mgr, TFile *fp) const
Definition: lstmrecognizer.cpp:93

tesseract::LSTMRecognizer::GetUnicharset
const UNICHARSET & GetUnicharset() const
Definition: lstmrecognizer.h:189

tesseract::LSTMRecognizer::momentum_
float momentum_
Definition: lstmrecognizer.h:352

tesseract::LSTMRecognizer::sample_iteration_
int32_t sample_iteration_
Definition: lstmrecognizer.h:346

tesseract::LSTMRecognizer::DeSerialize
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
Definition: lstmrecognizer.cpp:133

tesseract::Network
Definition: network.h:103

tesseract::Network::RemapOutputs
virtual int RemapOutputs(int old_no, const std::vector< int > &code_map)
Definition: network.h:190

tesseract::Network::name
const std::string & name() const
Definition: network.h:140

tesseract::Network::NumOutputs
int NumOutputs() const
Definition: network.h:125

tesseract::Network::num_weights
int num_weights() const
Definition: network.h:119

tesseract::Network::ClearWindow
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:350

tesseract::Network::SetEnableTraining
virtual void SetEnableTraining(TrainingState state)
Definition: network.cpp:113

tesseract::Network::Backward
virtual bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas)=0

tesseract::Network::IsTraining
bool IsTraining() const
Definition: network.h:113

tesseract::Network::Update
virtual void Update(float learning_rate, float momentum, float adam_beta, int num_samples)
Definition: network.h:235

tesseract::Network::DebugWeights
virtual void DebugWeights()=0

tesseract::Network::TestFlag
bool TestFlag(NetworkFlags flag) const
Definition: network.h:146

tesseract::Network::spec
virtual std::string spec() const
Definition: network.h:143

tesseract::Network::CountAlternators
virtual void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const
Definition: network.h:242

tesseract::NetworkIO
Definition: networkio.h:38

tesseract::NetworkIO::Resize
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:44

tesseract::NetworkIO::f
float * f(int t)
Definition: networkio.h:110

tesseract::NetworkIO::Width
int Width() const
Definition: networkio.h:102

tesseract::NetworkIO::SetActivations
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:548

tesseract::NetworkIO::AnySuspiciousTruth
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:591

tesseract::NetworkIO::SubtractAllFromFloat
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:838

tesseract::NetworkIO::float_array
const GENERIC_2D_ARRAY< float > & float_array() const
Definition: networkio.h:134

tesseract::NetworkIO::NumFeatures
int NumFeatures() const
Definition: networkio.h:106

tesseract::RecodeBeamSearch
Definition: recodebeam.h:181

tesseract::RecodeBeamSearch::Decode
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:83

tesseract::RecodeBeamSearch::ExtractBestPathAsLabels
void ExtractBestPathAsLabels(std::vector< int > *labels, std::vector< int > *xcoords) const
Definition: recodebeam.cpp:201

tesseract::RecodeBeamSearch::kMinCertainty
static constexpr float kMinCertainty
Definition: recodebeam.h:243

tesseract::CTC::ComputeCTCTargets
static bool ComputeCTCTargets(const std::vector< int > &truth_labels, int null_char, const GENERIC_2D_ARRAY< float > &outputs, NetworkIO *targets)
Definition: ctc.cpp:53

tesseract::CTC::NormalizeProbs
static void NormalizeProbs(NetworkIO *probs)
Definition: ctc.h:36

tesseract::NetworkBuilder::InitNetwork
static bool InitNetwork(int num_outputs, const char *network_spec, int append_index, int net_flags, float weight_range, TRand *randomizer, Network **network)
Definition: networkbuilder.cpp:45

tesseract::LSTMTrainer
Definition: lstmtrainer.h:84

tesseract::LSTMTrainer::TransitionTrainingStage
bool TransitionTrainingStage(float error_threshold)
Definition: lstmtrainer.cpp:417

tesseract::LSTMTrainer::best_error_iterations_
std::vector< int32_t > best_error_iterations_
Definition: lstmtrainer.h:463

tesseract::LSTMTrainer::worst_iteration_
int worst_iteration_
Definition: lstmtrainer.h:445

tesseract::LSTMTrainer::worst_model_data_
std::vector< char > worst_model_data_
Definition: lstmtrainer.h:450

tesseract::LSTMTrainer::PrepareForBackward
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, NetworkIO *targets)
Definition: lstmtrainer.cpp:902

tesseract::LSTMTrainer::ReadLocalTrainingDump
bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size)
Definition: lstmtrainer.cpp:1022

tesseract::LSTMTrainer::MaintainCheckpoints
bool MaintainCheckpoints(const TestCallback &tester, std::stringstream &log_msg)
Definition: lstmtrainer.cpp:309

tesseract::LSTMTrainer::UpdateErrorGraph
std::string UpdateErrorGraph(int iteration, double error_rate, const std::vector< char > &model_data, const TestCallback &tester)
Definition: lstmtrainer.cpp:1425

tesseract::LSTMTrainer::target_win_
ScrollView * target_win_
Definition: lstmtrainer.h:410

tesseract::LSTMTrainer::stall_iteration_
int stall_iteration_
Definition: lstmtrainer.h:447

tesseract::LSTMTrainer::EncodeString
bool EncodeString(const std::string &str, std::vector< int > *labels) const
Definition: lstmtrainer.h:254

tesseract::LSTMTrainer::error_rates_
double error_rates_[ET_COUNT]
Definition: lstmtrainer.h:486

tesseract::LSTMTrainer::RollErrorBuffers
void RollErrorBuffers()
Definition: lstmtrainer.cpp:1405

tesseract::LSTMTrainer::LoadAllTrainingData
bool LoadAllTrainingData(const std::vector< std::string > &filenames, CachingStrategy cache_strategy, bool randomly_rotate)
Definition: lstmtrainer.cpp:296

tesseract::LSTMTrainer::ComputeErrorRates
double ComputeErrorRates(const NetworkIO &deltas, double char_error, double word_error)
Definition: lstmtrainer.cpp:1265

tesseract::LSTMTrainer::InitTensorFlowNetwork
int InitTensorFlowNetwork(const std::string &tf_proto)

tesseract::LSTMTrainer::LogIterations
void LogIterations(const char *intro_str, std::stringstream &log_msg) const
Definition: lstmtrainer.cpp:407

tesseract::LSTMTrainer::ComputeWordError
double ComputeWordError(std::string *truth_str, std::string *ocr_str)
Definition: lstmtrainer.cpp:1351

tesseract::LSTMTrainer::model_base_
std::string model_base_
Definition: lstmtrainer.h:421

tesseract::LSTMTrainer::NewSingleError
double NewSingleError(ErrorTypes type) const
Definition: lstmtrainer.h:158

tesseract::LSTMTrainer::CharError
double CharError() const
Definition: lstmtrainer.h:133

tesseract::LSTMTrainer::StartSubtrainer
void StartSubtrainer(std::stringstream &log_msg)
Definition: lstmtrainer.cpp:605

tesseract::LSTMTrainer::best_error_rate_
double best_error_rate_
Definition: lstmtrainer.h:435

tesseract::LSTMTrainer::ComputeCTCTargets
bool ComputeCTCTargets(const std::vector< int > &truth_labels, NetworkIO *outputs, NetworkIO *targets)
Definition: lstmtrainer.cpp:1254

tesseract::LSTMTrainer::best_trainer_
std::vector< char > best_trainer_
Definition: lstmtrainer.h:452

tesseract::LSTMTrainer::worst_error_rates_
double worst_error_rates_[ET_COUNT]
Definition: lstmtrainer.h:443

tesseract::LSTMTrainer::learning_iteration_
int learning_iteration_
Definition: lstmtrainer.h:469

tesseract::LSTMTrainer::SaveRecognitionDump
void SaveRecognitionDump(std::vector< char > *data) const
Definition: lstmtrainer.cpp:1043

tesseract::LSTMTrainer::perfect_delay_
int perfect_delay_
Definition: lstmtrainer.h:477

tesseract::LSTMTrainer::Serialize
bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, TFile *fp) const
Definition: lstmtrainer.cpp:427

tesseract::LSTMTrainer::ComputeTextTargets
bool ComputeTextTargets(const NetworkIO &outputs, const std::vector< int > &truth_labels, NetworkIO *targets)
Definition: lstmtrainer.cpp:1232

tesseract::LSTMTrainer::error_rate_of_last_saved_best_
float error_rate_of_last_saved_best_
Definition: lstmtrainer.h:457

tesseract::LSTMTrainer::best_iteration_
int best_iteration_
Definition: lstmtrainer.h:439

tesseract::LSTMTrainer::recon_win_
ScrollView * recon_win_
Definition: lstmtrainer.h:414

tesseract::LSTMTrainer::last_perfect_training_iteration_
int last_perfect_training_iteration_
Definition: lstmtrainer.h:480

tesseract::LSTMTrainer::FillErrorBuffer
void FillErrorBuffer(double new_error, ErrorTypes type)
Definition: lstmtrainer.cpp:1065

tesseract::LSTMTrainer::learning_iteration
int learning_iteration() const
Definition: lstmtrainer.h:145

tesseract::LSTMTrainer::SaveTraineddata
bool SaveTraineddata(const char *filename)
Definition: lstmtrainer.cpp:1034

tesseract::LSTMTrainer::UpdateSubtrainer
SubTrainerResult UpdateSubtrainer(std::stringstream &log_msg)
Definition: lstmtrainer.cpp:633

tesseract::LSTMTrainer::DeSerialize
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
Definition: lstmtrainer.cpp:511

tesseract::LSTMTrainer::ComputeRMSError
double ComputeRMSError(const NetworkIO &deltas)
Definition: lstmtrainer.cpp:1285

tesseract::LSTMTrainer::InitCharSet
void InitCharSet()
Definition: lstmtrainer.cpp:1115

tesseract::LSTMTrainer::GridSearchDictParams
Trainability GridSearchDictParams(const ImageData *trainingdata, int iteration, double min_dict_ratio, double dict_ratio_step, double max_dict_ratio, double min_cert_offset, double cert_offset_step, double max_cert_offset, std::string &results)
Definition: lstmtrainer.cpp:236

tesseract::LSTMTrainer::InitIterations
void InitIterations()
Definition: lstmtrainer.cpp:208

tesseract::LSTMTrainer::ComputeWinnerError
double ComputeWinnerError(const NetworkIO &deltas)
Definition: lstmtrainer.cpp:1304

tesseract::LSTMTrainer::randomly_rotate_
bool randomly_rotate_
Definition: lstmtrainer.h:425

tesseract::LSTMTrainer::debug_interval_
int debug_interval_
Definition: lstmtrainer.h:417

tesseract::LSTMTrainer::training_stage_
int training_stage_
Definition: lstmtrainer.h:459

tesseract::LSTMTrainer::num_training_stages_
int num_training_stages_
Definition: lstmtrainer.h:430

tesseract::LSTMTrainer::checkpoint_name_
std::string checkpoint_name_
Definition: lstmtrainer.h:423

tesseract::LSTMTrainer::InitNetwork
bool InitNetwork(const char *network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum, float adam_beta)
Definition: lstmtrainer.cpp:164

tesseract::LSTMTrainer::UpdateErrorBuffer
void UpdateErrorBuffer(double new_error, ErrorTypes type)
Definition: lstmtrainer.cpp:1389

tesseract::LSTMTrainer::improvement_steps_
int32_t improvement_steps_
Definition: lstmtrainer.h:465

tesseract::LSTMTrainer::ctc_win_
ScrollView * ctc_win_
Definition: lstmtrainer.h:412

tesseract::LSTMTrainer::CurrentTrainingStage
int CurrentTrainingStage() const
Definition: lstmtrainer.h:217

tesseract::LSTMTrainer::DebugNetwork
void DebugNetwork()
Definition: lstmtrainer.cpp:289

tesseract::LSTMTrainer::DumpFilename
std::string DumpFilename() const
Definition: lstmtrainer.cpp:1053

tesseract::LSTMTrainer::best_model_data_
std::vector< char > best_model_data_
Definition: lstmtrainer.h:449

tesseract::LSTMTrainer::SetNullChar
void SetNullChar()
Definition: lstmtrainer.cpp:1128

tesseract::LSTMTrainer::SaveTrainingDump
bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer, std::vector< char > *data) const
Definition: lstmtrainer.cpp:1013

tesseract::LSTMTrainer::DebugLSTMTraining
bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata, const NetworkIO &fwd_outputs, const std::vector< int > &truth_labels, const NetworkIO &outputs)
Definition: lstmtrainer.cpp:1154

tesseract::LSTMTrainer::EmptyConstructor
void EmptyConstructor()
Definition: lstmtrainer.cpp:1137

tesseract::LSTMTrainer::training_data_
DocumentCache training_data_
Definition: lstmtrainer.h:426

tesseract::LSTMTrainer::checkpoint_iteration_
int checkpoint_iteration_
Definition: lstmtrainer.h:419

tesseract::LSTMTrainer::kRollingBufferSize_
static const int kRollingBufferSize_
Definition: lstmtrainer.h:483

tesseract::LSTMTrainer::prev_sample_iteration_
int prev_sample_iteration_
Definition: lstmtrainer.h:471

tesseract::LSTMTrainer::error_buffers_
std::vector< double > error_buffers_[ET_COUNT]
Definition: lstmtrainer.h:484

tesseract::LSTMTrainer::sub_trainer_
std::unique_ptr< LSTMTrainer > sub_trainer_
Definition: lstmtrainer.h:455

tesseract::LSTMTrainer::~LSTMTrainer
virtual ~LSTMTrainer()
Definition: lstmtrainer.cpp:94

tesseract::LSTMTrainer::ComputeCharError
double ComputeCharError(const std::vector< int > &truth_str, const std::vector< int > &ocr_str)
Definition: lstmtrainer.cpp:1323

tesseract::LSTMTrainer::DisplayTargets
void DisplayTargets(const NetworkIO &targets, const char *window_name, ScrollView **window)
Definition: lstmtrainer.cpp:1195

tesseract::LSTMTrainer::ReadTrainingDump
bool ReadTrainingDump(const std::vector< char > &data, LSTMTrainer &trainer) const
Definition: lstmtrainer.h:300

tesseract::LSTMTrainer::ReduceLayerLearningRates
int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer)
Definition: lstmtrainer.cpp:694

tesseract::LSTMTrainer::TrainOnLine
const ImageData * TrainOnLine(LSTMTrainer *samples_trainer, bool batch)
Definition: lstmtrainer.h:268

tesseract::LSTMTrainer::best_error_history_
std::vector< double > best_error_history_
Definition: lstmtrainer.h:462

tesseract::LSTMTrainer::PrepareLogMsg
void PrepareLogMsg(std::stringstream &log_msg) const
Definition: lstmtrainer.cpp:395

tesseract::LSTMTrainer::mgr_
TessdataManager mgr_
Definition: lstmtrainer.h:488

tesseract::LSTMTrainer::align_win_
ScrollView * align_win_
Definition: lstmtrainer.h:408

tesseract::LSTMTrainer::LSTMTrainer
LSTMTrainer()
Definition: lstmtrainer.cpp:77

tesseract::LSTMTrainer::worst_error_rate_
double worst_error_rate_
Definition: lstmtrainer.h:441

tesseract::LSTMTrainer::TryLoadingCheckpoint
bool TryLoadingCheckpoint(const char *filename, const char *old_traineddata)
Definition: lstmtrainer.cpp:105

tesseract::LSTMTrainer::ReduceLearningRates
void ReduceLearningRates(LSTMTrainer *samples_trainer, std::stringstream &log_msg)
Definition: lstmtrainer.cpp:675

tesseract::LSTMTrainer::best_error_rates_
double best_error_rates_[ET_COUNT]
Definition: lstmtrainer.h:437

tesseract::LSTMTrainer::MapRecoder
std::vector< int > MapRecoder(const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const
Definition: lstmtrainer.cpp:1074

tesseract::ScrollView
Definition: scrollview.h:109

tesseract::ScrollView::AwaitEvent
std::unique_ptr< SVEvent > AwaitEvent(SVEventType type)
Definition: scrollview.cpp:432

tesseract::ScrollView::Color
Color
Definition: scrollview.h:112

tesseract::ScrollView::GREEN_YELLOW
@ GREEN_YELLOW
Definition: scrollview.h:161