#include <lstmtrainer.h>

Inheritance diagram for tesseract::LSTMTrainer:

Public Member Functions
	LSTMTrainer ()

	LSTMTrainer (const char model_base, const char checkpoint_name, int debug_interval, int64_t max_memory)

virtual	~LSTMTrainer ()

bool	TryLoadingCheckpoint (const char filename, const char old_traineddata)

bool	InitCharSet (const std::string &traineddata_path)

void	InitCharSet (const TessdataManager &mgr)

bool	InitNetwork (const char *network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum, float adam_beta)

int	InitTensorFlowNetwork (const std::string &tf_proto)

void	InitIterations ()

double	ActivationError () const

double	CharError () const

const double *	error_rates () const

double	best_error_rate () const

int	best_iteration () const

int	learning_iteration () const

int32_t	improvement_steps () const

void	set_perfect_delay (int delay)

const std::vector< char > &	best_trainer () const

double	NewSingleError (ErrorTypes type) const

double	LastSingleError (ErrorTypes type) const

const DocumentCache &	training_data () const

DocumentCache *	mutable_training_data ()

Trainability	GridSearchDictParams (const ImageData *trainingdata, int iteration, double min_dict_ratio, double dict_ratio_step, double max_dict_ratio, double min_cert_offset, double cert_offset_step, double max_cert_offset, std::string &results)

void	DebugNetwork ()

bool	LoadAllTrainingData (const std::vector< std::string > &filenames, CachingStrategy cache_strategy, bool randomly_rotate)

bool	MaintainCheckpoints (const TestCallback &tester, std::stringstream &log_msg)

bool	MaintainCheckpointsSpecific (int iteration, const std::vector< char > train_model, const std::vector< char > rec_model, TestCallback tester, std::stringstream &log_msg)

void	PrepareLogMsg (std::stringstream &log_msg) const

void	LogIterations (const char *intro_str, std::stringstream &log_msg) const

bool	TransitionTrainingStage (float error_threshold)

int	CurrentTrainingStage () const

bool	Serialize (SerializeAmount serialize_amount, const TessdataManager mgr, TFile fp) const

bool	DeSerialize (const TessdataManager mgr, TFile fp)

void	StartSubtrainer (std::stringstream &log_msg)

SubTrainerResult	UpdateSubtrainer (std::stringstream &log_msg)

void	ReduceLearningRates (LSTMTrainer *samples_trainer, std::stringstream &log_msg)

int	ReduceLayerLearningRates (TFloat factor, int num_samples, LSTMTrainer *samples_trainer)

bool	EncodeString (const std::string &str, std::vector< int > *labels) const

const ImageData *	TrainOnLine (LSTMTrainer *samples_trainer, bool batch)

Trainability	TrainOnLine (const ImageData *trainingdata, bool batch)

Trainability	PrepareForBackward (const ImageData trainingdata, NetworkIO fwd_outputs, NetworkIO *targets)

bool	SaveTrainingDump (SerializeAmount serialize_amount, const LSTMTrainer &trainer, std::vector< char > *data) const

bool	ReadTrainingDump (const std::vector< char > &data, LSTMTrainer &trainer) const

bool	ReadSizedTrainingDump (const char *data, int size, LSTMTrainer &trainer) const

bool	ReadLocalTrainingDump (const TessdataManager mgr, const char data, int size)

void	SetupCheckpointInfo ()

bool	SaveTraineddata (const char *filename)

void	SaveRecognitionDump (std::vector< char > *data) const

std::string	DumpFilename () const

void	FillErrorBuffer (double new_error, ErrorTypes type)

std::vector< int >	MapRecoder (const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const

Public Member Functions inherited from tesseract::LSTMRecognizer
	LSTMRecognizer ()

	LSTMRecognizer (const std::string &language_data_path_prefix)

	~LSTMRecognizer ()

int	NumOutputs () const

int	training_iteration () const

int	sample_iteration () const

float	learning_rate () const

LossType	OutputLossType () const

bool	SimpleTextOutput () const

bool	IsIntMode () const

bool	IsRecoding () const

bool	IsTensorFlow () const

std::vector< std::string >	EnumerateLayers () const

Network *	GetLayer (const std::string &id) const

float	GetLayerLearningRate (const std::string &id) const

const char *	GetNetwork () const

float	GetAdamBeta () const

float	GetMomentum () const

void	ScaleLearningRate (double factor)

void	ScaleLayerLearningRate (const std::string &id, double factor)

void	SetLearningRate (float learning_rate)

void	SetLayerLearningRate (const std::string &id, float learning_rate)

void	ConvertToInt ()

const UNICHARSET &	GetUnicharset () const

UNICHARSET &	GetUnicharset ()

const UnicharCompress &	GetRecoder () const

const Dict *	GetDict () const

Dict *	GetDict ()

void	SetIteration (int iteration)

int	NumInputs () const

int	null_char () const

bool	Load (const ParamsVectors params, const std::string &lang, TessdataManager mgr)

bool	Serialize (const TessdataManager mgr, TFile fp) const

bool	DeSerialize (const TessdataManager mgr, TFile fp)

bool	LoadCharsets (const TessdataManager *mgr)

bool	LoadRecoder (TFile *fp)

bool	LoadDictionary (const ParamsVectors params, const std::string &lang, TessdataManager mgr)

void	RecognizeLine (const ImageData &image_data, float invert_threshold, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0, int lstm_choice_amount=5)

void	OutputStats (const NetworkIO &outputs, float min_output, float mean_output, float *sd)

bool	RecognizeLine (const ImageData &image_data, float invert_threshold, bool debug, bool re_invert, bool upside_down, float scale_factor, NetworkIO inputs, NetworkIO *outputs)

std::string	DecodeLabels (const std::vector< int > &labels)

void	DisplayForward (const NetworkIO &inputs, const std::vector< int > &labels, const std::vector< int > &label_coords, const char window_name, ScrollView *window)

void	LabelsFromOutputs (const NetworkIO &outputs, std::vector< int > labels, std::vector< int > xcoords)

Static Public Member Functions
static bool	EncodeString (const std::string &str, const UNICHARSET &unicharset, const UnicharCompress recoder, bool simple_text, int null_char, std::vector< int > labels)

Protected Member Functions
void	InitCharSet ()

void	SetNullChar ()

void	EmptyConstructor ()

bool	DebugLSTMTraining (const NetworkIO &inputs, const ImageData &trainingdata, const NetworkIO &fwd_outputs, const std::vector< int > &truth_labels, const NetworkIO &outputs)

void	DisplayTargets (const NetworkIO &targets, const char window_name, ScrollView *window)

bool	ComputeTextTargets (const NetworkIO &outputs, const std::vector< int > &truth_labels, NetworkIO *targets)

bool	ComputeCTCTargets (const std::vector< int > &truth_labels, NetworkIO outputs, NetworkIO targets)

double	ComputeErrorRates (const NetworkIO &deltas, double char_error, double word_error)

double	ComputeRMSError (const NetworkIO &deltas)

double	ComputeWinnerError (const NetworkIO &deltas)

double	ComputeCharError (const std::vector< int > &truth_str, const std::vector< int > &ocr_str)

double	ComputeWordError (std::string truth_str, std::string ocr_str)

void	UpdateErrorBuffer (double new_error, ErrorTypes type)

void	RollErrorBuffers ()

std::string	UpdateErrorGraph (int iteration, double error_rate, const std::vector< char > &model_data, const TestCallback &tester)

Protected Member Functions inherited from tesseract::LSTMRecognizer
void	SetRandomSeed ()

void	DisplayLSTMOutput (const std::vector< int > &labels, const std::vector< int > &xcoords, int height, ScrollView *window)

void	DebugActivationPath (const NetworkIO &outputs, const std::vector< int > &labels, const std::vector< int > &xcoords)

void	DebugActivationRange (const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)

void	LabelsViaReEncode (const NetworkIO &output, std::vector< int > labels, std::vector< int > xcoords)

void	LabelsViaSimpleText (const NetworkIO &output, std::vector< int > labels, std::vector< int > xcoords)

const char *	DecodeLabel (const std::vector< int > &labels, unsigned start, unsigned end, int decoded)

const char *	DecodeSingleLabel (int label)

Protected Attributes
ScrollView *	align_win_

ScrollView *	target_win_

ScrollView *	ctc_win_

ScrollView *	recon_win_

int	debug_interval_

int	checkpoint_iteration_

std::string	model_base_

std::string	checkpoint_name_

bool	randomly_rotate_

DocumentCache	training_data_

std::string	best_model_name_

int	num_training_stages_

double	best_error_rate_

double	best_error_rates_ [ET_COUNT]

int	best_iteration_

double	worst_error_rate_

double	worst_error_rates_ [ET_COUNT]

int	worst_iteration_

int	stall_iteration_

std::vector< char >	best_model_data_

std::vector< char >	worst_model_data_

std::vector< char >	best_trainer_

std::unique_ptr< LSTMTrainer >	sub_trainer_

float	error_rate_of_last_saved_best_

int	training_stage_

std::vector< double >	best_error_history_

std::vector< int32_t >	best_error_iterations_

int32_t	improvement_steps_

int	learning_iteration_

int	prev_sample_iteration_

int	perfect_delay_

int	last_perfect_training_iteration_

std::vector< double >	error_buffers_ [ET_COUNT]

double	error_rates_ [ET_COUNT]

TessdataManager	mgr_

Protected Attributes inherited from tesseract::LSTMRecognizer
Network *	network_

CCUtil	ccutil_

UnicharCompress	recoder_

std::string	network_str_

int32_t	training_flags_

int32_t	training_iteration_

int32_t	sample_iteration_

int32_t	null_char_

float	learning_rate_

float	momentum_

float	adam_beta_

TRand	randomizer_

NetworkScratch	scratch_space_

Dict *	dict_

RecodeBeamSearch *	search_

ScrollView *	debug_win_

Static Protected Attributes
static const int	kRollingBufferSize_ = 1000

Detailed Description

Definition at line 84 of file lstmtrainer.h.

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

tesseract::LSTMTrainer::LSTMTrainer ( )

Definition at line 77 of file lstmtrainer.cpp.

    : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {
  EmptyConstructor();
  debug_interval_ = 0;
}

◆ LSTMTrainer() [2/2]

tesseract::LSTMTrainer::LSTMTrainer	(	const char *	model_base,
		const char *	checkpoint_name,
		int	debug_interval,
		int64_t	max_memory
	)

Definition at line 83 of file lstmtrainer.cpp.

    : randomly_rotate_(false),
      training_data_(max_memory),
      sub_trainer_(nullptr) {
  EmptyConstructor();
  debug_interval_ = debug_interval;
  model_base_ = model_base;
  checkpoint_name_ = checkpoint_name;
}

◆ ~LSTMTrainer()

tesseract::LSTMTrainer::~LSTMTrainer ( )

virtual

Definition at line 94 of file lstmtrainer.cpp.

                          {
#ifndef GRAPHICS_DISABLED
  delete align_win_;
  delete target_win_;
  delete ctc_win_;
  delete recon_win_;
#endif
}

Member Function Documentation

◆ ActivationError()

double tesseract::LSTMTrainer::ActivationError ( ) const

inline

Definition at line 130 of file lstmtrainer.h.

                                 {
    return error_rates_[ET_DELTA];
  }

◆ best_error_rate()

double tesseract::LSTMTrainer::best_error_rate ( ) const

inline

Definition at line 139 of file lstmtrainer.h.

                                 {
    return best_error_rate_;
  }

◆ best_iteration()

int tesseract::LSTMTrainer::best_iteration ( ) const

inline

Definition at line 142 of file lstmtrainer.h.

                             {
    return best_iteration_;
  }

◆ best_trainer()

const std::vector< char > & tesseract::LSTMTrainer::best_trainer ( ) const

inline

Definition at line 154 of file lstmtrainer.h.

                                              {
    return best_trainer_;
  }

◆ CharError()

double tesseract::LSTMTrainer::CharError ( ) const

inline

Definition at line 133 of file lstmtrainer.h.

                           {
    return error_rates_[ET_CHAR_ERROR];
  }

◆ ComputeCharError()

double tesseract::LSTMTrainer::ComputeCharError	(	const std::vector< int > &	truth_str,
		const std::vector< int > &	ocr_str
	)

protected

Definition at line 1323 of file lstmtrainer.cpp.

                                                                    {
  std::vector<int> label_counts(NumOutputs());
  unsigned truth_size = 0;
  for (auto ch : truth_str) {
    if (ch != null_char_) {
      ++label_counts[ch];
      ++truth_size;
    }
  }
  for (auto ch : ocr_str) {
    if (ch != null_char_) {
      --label_counts[ch];
    }
  }
  unsigned char_errors = 0;
  for (auto label_count : label_counts) {
    char_errors += abs(label_count);
  }
  // Limit BCER to interval [0,1] and avoid division by zero.
  if (truth_size <= char_errors) {
    return (char_errors == 0) ? 0.0 : 1.0;
  }
  return static_cast<double>(char_errors) / truth_size;
}

◆ ComputeCTCTargets()

bool tesseract::LSTMTrainer::ComputeCTCTargets	(	const std::vector< int > &	truth_labels,
		NetworkIO *	outputs,
		NetworkIO *	targets
	)

protected

Definition at line 1254 of file lstmtrainer.cpp.

                                                                            {
  // Bottom-clip outputs to a minimum probability.
  CTC::NormalizeProbs(outputs);
  return CTC::ComputeCTCTargets(truth_labels, null_char_,
                                outputs->float_array(), targets);
}

◆ ComputeErrorRates()

double tesseract::LSTMTrainer::ComputeErrorRates	(	const NetworkIO &	deltas,
		double	char_error,
		double	word_error
	)

protected

Definition at line 1265 of file lstmtrainer.cpp.

                                                                            {
  UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);
  // Delta error is the fraction of timesteps with >0.5 error in the top choice
  // score. If zero, then the top choice characters are guaranteed correct,
  // even when there is residue in the RMS error.
  double delta_error = ComputeWinnerError(deltas);
  UpdateErrorBuffer(delta_error, ET_DELTA);
  UpdateErrorBuffer(word_error, ET_WORD_RECERR);
  UpdateErrorBuffer(char_error, ET_CHAR_ERROR);
  // Skip ratio measures the difference between sample_iteration_ and
  // training_iteration_, which reflects the number of unusable samples,
  // usually due to unencodable truth text, or the text not fitting in the
  // space for the output.
  double skip_count = sample_iteration_ - prev_sample_iteration_;
  UpdateErrorBuffer(skip_count, ET_SKIP_RATIO);
  return delta_error;
}

◆ ComputeRMSError()

double tesseract::LSTMTrainer::ComputeRMSError ( const NetworkIO & deltas )

protected

Definition at line 1285 of file lstmtrainer.cpp.

                                                           {
  double total_error = 0.0;
  int width = deltas.Width();
  int num_classes = deltas.NumFeatures();
  for (int t = 0; t < width; ++t) {
    const float *class_errs = deltas.f(t);
    for (int c = 0; c < num_classes; ++c) {
      double error = class_errs[c];
      total_error += error * error;
    }
  }
  return sqrt(total_error / (width * num_classes));
}

◆ ComputeTextTargets()

bool tesseract::LSTMTrainer::ComputeTextTargets	(	const NetworkIO &	outputs,
		const std::vector< int > &	truth_labels,
		NetworkIO *	targets
	)

protected

Definition at line 1232 of file lstmtrainer.cpp.

                                                         {
  if (truth_labels.size() > targets->Width()) {
    tprintf("Error: transcription %s too long to fit into target of width %d\n",
            DecodeLabels(truth_labels).c_str(), targets->Width());
    return false;
  }
  int i = 0;
  for (auto truth_label : truth_labels) {
    targets->SetActivations(i, truth_label, 1.0);
    ++i;
  }
  for (i = truth_labels.size(); i < targets->Width(); ++i) {
    targets->SetActivations(i, null_char_, 1.0);
  }
  return true;
}

◆ ComputeWinnerError()

double tesseract::LSTMTrainer::ComputeWinnerError ( const NetworkIO & deltas )

protected

Definition at line 1304 of file lstmtrainer.cpp.

                                                              {
  int num_errors = 0;
  int width = deltas.Width();
  int num_classes = deltas.NumFeatures();
  for (int t = 0; t < width; ++t) {
    const float *class_errs = deltas.f(t);
    for (int c = 0; c < num_classes; ++c) {
      float abs_delta = std::fabs(class_errs[c]);
      // TODO(rays) Filtering cases where the delta is very large to cut out
      // GT errors doesn't work. Find a better way or get better truth.
      if (0.5 <= abs_delta) {
        ++num_errors;
      }
    }
  }
  return static_cast<double>(num_errors) / width;
}

◆ ComputeWordError()

double tesseract::LSTMTrainer::ComputeWordError	(	std::string *	truth_str,
		std::string *	ocr_str
	)

protected

Definition at line 1351 of file lstmtrainer.cpp.

                                                         {
  using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
  std::vector<std::string> truth_words = split(*truth_str, ' ');
  if (truth_words.empty()) {
    return 0.0;
  }
  std::vector<std::string> ocr_words = split(*ocr_str, ' ');
  StrMap word_counts;
  for (const auto &truth_word : truth_words) {
    std::string truth_word_string(truth_word.c_str());
    auto it = word_counts.find(truth_word_string);
    if (it == word_counts.end()) {
      word_counts.insert(std::make_pair(truth_word_string, 1));
    } else {
      ++it->second;
    }
  }
  for (const auto &ocr_word : ocr_words) {
    std::string ocr_word_string(ocr_word.c_str());
    auto it = word_counts.find(ocr_word_string);
    if (it == word_counts.end()) {
      word_counts.insert(std::make_pair(ocr_word_string, -1));
    } else {
      --it->second;
    }
  }
  int word_recall_errs = 0;
  for (const auto &word_count : word_counts) {
    if (word_count.second > 0) {
      word_recall_errs += word_count.second;
    }
  }
  return static_cast<double>(word_recall_errs) / truth_words.size();
}

◆ CurrentTrainingStage()

int tesseract::LSTMTrainer::CurrentTrainingStage ( ) const

inline

Definition at line 217 of file lstmtrainer.h.

                                   {
    return training_stage_;
  }

◆ DebugLSTMTraining()

bool tesseract::LSTMTrainer::DebugLSTMTraining	(	const NetworkIO &	inputs,
		const ImageData &	trainingdata,
		const NetworkIO &	fwd_outputs,
		const std::vector< int > &	truth_labels,
		const NetworkIO &	outputs
	)

protected

Definition at line 1154 of file lstmtrainer.cpp.

                                                              {
  const std::string &truth_text = DecodeLabels(truth_labels);
  if (truth_text.c_str() == nullptr || truth_text.length() <= 0) {
    tprintf("Empty truth string at decode time!\n");
    return false;
  }
  if (debug_interval_ != 0) {
    // Get class labels, xcoords and string.
    std::vector<int> labels;
    std::vector<int> xcoords;
    LabelsFromOutputs(outputs, &labels, &xcoords);
    std::string text = DecodeLabels(labels);
    tprintf("Iteration %d: GROUND  TRUTH : %s\n", training_iteration(),
            truth_text.c_str());
    if (truth_text != text) {
      tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(),
              text.c_str());
    }
    if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {
      tprintf("TRAINING activation path for truth string %s\n",
              truth_text.c_str());
      DebugActivationPath(outputs, labels, xcoords);
#ifndef GRAPHICS_DISABLED
      DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);
      if (OutputLossType() == LT_CTC) {
        DisplayTargets(fwd_outputs, "CTC Outputs", &ctc_win_);
        DisplayTargets(outputs, "CTC Targets", &target_win_);
      }
#endif
    }
  }
  return true;
}

◆ DebugNetwork()

void tesseract::LSTMTrainer::DebugNetwork ( )

Definition at line 289 of file lstmtrainer.cpp.

                               {
  network_->DebugWeights();
}

◆ DeSerialize()

bool tesseract::LSTMTrainer::DeSerialize	(	const TessdataManager *	mgr,
		TFile *	fp
	)

Definition at line 511 of file lstmtrainer.cpp.

                                                                   {
  if (!LSTMRecognizer::DeSerialize(mgr, fp)) {
    return false;
  }
  if (!fp->DeSerialize(&learning_iteration_)) {
    // Special case. If we successfully decoded the recognizer, but fail here
    // then it means we were just given a recognizer, so issue a warning and
    // allow it.
    tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");
    learning_iteration_ = 0;
    network_->SetEnableTraining(TS_ENABLED);
    return true;
  }
  if (!fp->DeSerialize(&prev_sample_iteration_)) {
    return false;
  }
  if (!fp->DeSerialize(&perfect_delay_)) {
    return false;
  }
  if (!fp->DeSerialize(&last_perfect_training_iteration_)) {
    return false;
  }
  for (auto &error_buffer : error_buffers_) {
    if (!fp->DeSerialize(error_buffer)) {
      return false;
    }
  }
  if (!fp->DeSerialize(&error_rates_[0], countof(error_rates_))) {
    return false;
  }
  if (!fp->DeSerialize(&training_stage_)) {
    return false;
  }
  uint8_t amount;
  if (!fp->DeSerialize(&amount)) {
    return false;
  }
  if (amount == LIGHT) {
    return true; // Don't read the rest.
  }
  if (!fp->DeSerialize(&best_error_rate_)) {
    return false;
  }
  if (!fp->DeSerialize(&best_error_rates_[0], countof(best_error_rates_))) {
    return false;
  }
  if (!fp->DeSerialize(&best_iteration_)) {
    return false;
  }
  if (!fp->DeSerialize(&worst_error_rate_)) {
    return false;
  }
  if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) {
    return false;
  }
  if (!fp->DeSerialize(&worst_iteration_)) {
    return false;
  }
  if (!fp->DeSerialize(&stall_iteration_)) {
    return false;
  }
  if (!fp->DeSerialize(best_model_data_)) {
    return false;
  }
  if (!fp->DeSerialize(worst_model_data_)) {
    return false;
  }
  if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) {
    return false;
  }
  std::vector<char> sub_data;
  if (!fp->DeSerialize(sub_data)) {
    return false;
  }
  if (sub_data.empty()) {
    sub_trainer_ = nullptr;
  } else {
    sub_trainer_ = std::make_unique<LSTMTrainer>();
    if (!ReadTrainingDump(sub_data, *sub_trainer_)) {
      return false;
    }
  }
  if (!fp->DeSerialize(best_error_history_)) {
    return false;
  }
  if (!fp->DeSerialize(best_error_iterations_)) {
    return false;
  }
  return fp->DeSerialize(&improvement_steps_);
}

◆ DisplayTargets()

void tesseract::LSTMTrainer::DisplayTargets	(	const NetworkIO &	targets,
		const char *	window_name,
		ScrollView **	window
	)

protected

Definition at line 1195 of file lstmtrainer.cpp.

                                                                               {
  int width = targets.Width();
  int num_features = targets.NumFeatures();
  Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,
                       window);
  for (int c = 0; c < num_features; ++c) {
    int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;
    (*window)->Pen(static_cast<ScrollView::Color>(color));
    int start_t = -1;
    for (int t = 0; t < width; ++t) {
      double target = targets.f(t)[c];
      target *= kTargetYScale;
      if (target >= 1) {
        if (start_t < 0) {
          (*window)->SetCursor(t - 1, 0);
          start_t = t;
        }
        (*window)->DrawTo(t, target);
      } else if (start_t >= 0) {
        (*window)->DrawTo(t, 0);
        (*window)->DrawTo(start_t - 1, 0);
        start_t = -1;
      }
    }
    if (start_t >= 0) {
      (*window)->DrawTo(width, 0);
      (*window)->DrawTo(start_t - 1, 0);
    }
  }
  (*window)->Update();
}

◆ DumpFilename()

std::string tesseract::LSTMTrainer::DumpFilename ( ) const

Definition at line 1053 of file lstmtrainer.cpp.

                                          {
  std::stringstream filename;
  filename.imbue(std::locale::classic());
  filename << model_base_ << std::fixed << std::setprecision(3)
           << "_" << best_error_rate_
           << "_" << best_iteration_
           << "_" << training_iteration_
           << ".checkpoint";
  return filename.str();
}

◆ EmptyConstructor()

void tesseract::LSTMTrainer::EmptyConstructor ( )

protected

Definition at line 1137 of file lstmtrainer.cpp.

                                   {
#ifndef GRAPHICS_DISABLED
  align_win_ = nullptr;
  target_win_ = nullptr;
  ctc_win_ = nullptr;
  recon_win_ = nullptr;
#endif
  checkpoint_iteration_ = 0;
  training_stage_ = 0;
  num_training_stages_ = 2;
  InitIterations();
}

◆ EncodeString() [1/2]

bool tesseract::LSTMTrainer::EncodeString	(	const std::string &	str,
		const UNICHARSET &	unicharset,
		const UnicharCompress *	recoder,
		bool	simple_text,
		int	null_char,
		std::vector< int > *	labels
	)

static

Definition at line 813 of file lstmtrainer.cpp.

                                                                      {
  if (str.c_str() == nullptr || str.length() <= 0) {
    tprintf("Empty truth string!\n");
    return false;
  }
  unsigned err_index;
  std::vector<int> internal_labels;
  labels->clear();
  if (!simple_text) {
    labels->push_back(null_char);
  }
  std::string cleaned = unicharset.CleanupString(str.c_str());
  if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr,
                               &err_index)) {
    bool success = true;
    for (auto internal_label : internal_labels) {
      if (recoder != nullptr) {
        // Re-encode labels via recoder.
        RecodedCharID code;
        int len = recoder->EncodeUnichar(internal_label, &code);
        if (len > 0) {
          for (int j = 0; j < len; ++j) {
            labels->push_back(code(j));
            if (!simple_text) {
              labels->push_back(null_char);
            }
          }
        } else {
          success = false;
          err_index = 0;
          break;
        }
      } else {
        labels->push_back(internal_label);
        if (!simple_text) {
          labels->push_back(null_char);
        }
      }
    }
    if (success) {
      return true;
    }
  }
  tprintf("Encoding of string failed! Failure bytes:");
  while (err_index < cleaned.size()) {
    tprintf(" %x", cleaned[err_index++] & 0xff);
  }
  tprintf("\n");
  return false;
}

◆ EncodeString() [2/2]

bool tesseract::LSTMTrainer::EncodeString	(	const std::string &	str,
		std::vector< int > *	labels
	)		const

inline

Definition at line 254 of file lstmtrainer.h.

                                                                        {
    return EncodeString(str, GetUnicharset(),
                        IsRecoding() ? &recoder_ : nullptr, SimpleTextOutput(),
                        null_char_, labels);
  }

◆ error_rates()

const double * tesseract::LSTMTrainer::error_rates ( ) const

inline

Definition at line 136 of file lstmtrainer.h.

                                    {
    return error_rates_;
  }

◆ FillErrorBuffer()

void tesseract::LSTMTrainer::FillErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

Definition at line 1065 of file lstmtrainer.cpp.

                                                                   {
  for (int i = 0; i < kRollingBufferSize_; ++i) {
    error_buffers_[type][i] = new_error;
  }
  error_rates_[type] = 100.0 * new_error;
}

◆ GridSearchDictParams()

Trainability tesseract::LSTMTrainer::GridSearchDictParams	(	const ImageData *	trainingdata,
		int	iteration,
		double	min_dict_ratio,
		double	dict_ratio_step,
		double	max_dict_ratio,
		double	min_cert_offset,
		double	cert_offset_step,
		double	max_cert_offset,
		std::string &	results
	)

Definition at line 236 of file lstmtrainer.cpp.

                                                                         {
  sample_iteration_ = iteration;
  NetworkIO fwd_outputs, targets;
  Trainability result =
      PrepareForBackward(trainingdata, &fwd_outputs, &targets);
  if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) {
    return result;
  }
 
  // Encode/decode the truth to get the normalization.
  std::vector<int> truth_labels, ocr_labels, xcoords;
  ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));
  // NO-dict error.
  RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(),
                               nullptr);
  base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,
                     nullptr);
  base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
  std::string truth_text = DecodeLabels(truth_labels);
  std::string ocr_text = DecodeLabels(ocr_labels);
  double baseline_error = ComputeWordError(&truth_text, &ocr_text);
  results += "0,0=" + std::to_string(baseline_error);
 
  RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
  for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
    for (double c = min_cert_offset; c < max_cert_offset;
         c += cert_offset_step) {
      search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty,
                    nullptr);
      search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
      truth_text = DecodeLabels(truth_labels);
      ocr_text = DecodeLabels(ocr_labels);
      // This is destructive on both strings.
      double word_error = ComputeWordError(&truth_text, &ocr_text);
      if ((r == min_dict_ratio && c == min_cert_offset) ||
          !std::isfinite(word_error)) {
        std::string t = DecodeLabels(truth_labels);
        std::string o = DecodeLabels(ocr_labels);
        tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,
                t.c_str(), o.c_str(), word_error, truth_labels[0]);
      }
      results += " " + std::to_string(r);
      results += "," + std::to_string(c);
      results += "=" + std::to_string(word_error);
    }
  }
  return result;
}

◆ improvement_steps()

int32_t tesseract::LSTMTrainer::improvement_steps ( ) const

inline

Definition at line 148 of file lstmtrainer.h.

                                    {
    return improvement_steps_;
  }

◆ InitCharSet() [1/3]

void tesseract::LSTMTrainer::InitCharSet ( )

protected

Definition at line 1115 of file lstmtrainer.cpp.

                              {
  EmptyConstructor();
  training_flags_ = TF_COMPRESS_UNICHARSET;
  // Initialize the unicharset and recoder.
  if (!LoadCharsets(&mgr_)) {
    ASSERT_HOST(
        "Must provide a traineddata containing lstm_unicharset and"
        " lstm_recoder!\n" != nullptr);
  }
  SetNullChar();
}

◆ InitCharSet() [2/3]

bool tesseract::LSTMTrainer::InitCharSet ( const std::string & traineddata_path )

inline

Definition at line 100 of file lstmtrainer.h.

                                                      {
    bool success = mgr_.Init(traineddata_path.c_str());
    if (success) {
      InitCharSet();
    }
    return success;
  }

◆ InitCharSet() [3/3]

void tesseract::LSTMTrainer::InitCharSet ( const TessdataManager & mgr )

inline

Definition at line 107 of file lstmtrainer.h.

                                               {
    mgr_ = mgr;
    InitCharSet();
  }

◆ InitIterations()

void tesseract::LSTMTrainer::InitIterations ( )

Definition at line 208 of file lstmtrainer.cpp.

                                 {
  sample_iteration_ = 0;
  training_iteration_ = 0;
  learning_iteration_ = 0;
  prev_sample_iteration_ = 0;
  best_error_rate_ = 100.0;
  best_iteration_ = 0;
  worst_error_rate_ = 0.0;
  worst_iteration_ = 0;
  stall_iteration_ = kMinStallIterations;
  best_error_history_.clear();
  best_error_iterations_.clear();
  improvement_steps_ = kMinStallIterations;
  perfect_delay_ = 0;
  last_perfect_training_iteration_ = 0;
  for (int i = 0; i < ET_COUNT; ++i) {
    best_error_rates_[i] = 100.0;
    worst_error_rates_[i] = 0.0;
    error_buffers_[i].clear();
    error_buffers_[i].resize(kRollingBufferSize_);
    error_rates_[i] = 100.0;
  }
  error_rate_of_last_saved_best_ = kMinStartedErrorRate;
}

◆ InitNetwork()

bool tesseract::LSTMTrainer::InitNetwork	(	const char *	network_spec,
		int	append_index,
		int	net_flags,
		float	weight_range,
		float	learning_rate,
		float	momentum,
		float	adam_beta
	)

Definition at line 164 of file lstmtrainer.cpp.

                                               {
  mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec);
  adam_beta_ = adam_beta;
  learning_rate_ = learning_rate;
  momentum_ = momentum;
  SetNullChar();
  if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec,
                                   append_index, net_flags, weight_range,
                                   &randomizer_, &network_)) {
    return false;
  }
  network_str_ += network_spec;
  tprintf("Built network:%s from request %s\n", network_->spec().c_str(),
          network_spec);
  tprintf(
      "Training parameters:\n  Debug interval = %d,"
      " weights = %g, learning rate = %g, momentum=%g\n",
      debug_interval_, weight_range, learning_rate_, momentum_);
  tprintf("null char=%d\n", null_char_);
  return true;
}

◆ InitTensorFlowNetwork()

int tesseract::LSTMTrainer::InitTensorFlowNetwork ( const std::string & tf_proto )

◆ LastSingleError()

double tesseract::LSTMTrainer::LastSingleError ( ErrorTypes type ) const

inline

Definition at line 164 of file lstmtrainer.h.

                                                {
    return error_buffers_[type]
                         [(training_iteration() + kRollingBufferSize_ - 1) %
                          kRollingBufferSize_];
  }

◆ learning_iteration()

int tesseract::LSTMTrainer::learning_iteration ( ) const

inline

Definition at line 145 of file lstmtrainer.h.

                                 {
    return learning_iteration_;
  }

◆ LoadAllTrainingData()

bool tesseract::LSTMTrainer::LoadAllTrainingData	(	const std::vector< std::string > &	filenames,
		CachingStrategy	cache_strategy,
		bool	randomly_rotate
	)

Definition at line 296 of file lstmtrainer.cpp.

                                                            {
  randomly_rotate_ = randomly_rotate;
  training_data_.Clear();
  return training_data_.LoadDocuments(filenames, cache_strategy,
                                      LoadDataFromFile);
}

◆ LogIterations()

void tesseract::LSTMTrainer::LogIterations	(	const char *	intro_str,
		std::stringstream &	log_msg
	)		const

Definition at line 407 of file lstmtrainer.cpp.

                                                                {
  log_msg << intro_str
          << " iteration " << learning_iteration()
          << "/" << training_iteration()
          << "/" << sample_iteration();
}

◆ MaintainCheckpoints()

bool tesseract::LSTMTrainer::MaintainCheckpoints	(	const TestCallback &	tester,
		std::stringstream &	log_msg
	)

Definition at line 309 of file lstmtrainer.cpp.

                                                                {
  PrepareLogMsg(log_msg);
  double error_rate = CharError();
  int iteration = learning_iteration();
  if (iteration >= stall_iteration_ &&
      error_rate > best_error_rate_ * (1.0 + kSubTrainerMarginFraction) &&
      best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
    // It hasn't got any better in a long while, and is a margin worse than the
    // best, so go back to the best model and try a different learning rate.
    StartSubtrainer(log_msg);
  }
  SubTrainerResult sub_trainer_result = STR_NONE;
  if (sub_trainer_ != nullptr) {
    sub_trainer_result = UpdateSubtrainer(log_msg);
    if (sub_trainer_result == STR_REPLACED) {
      // Reset the inputs, as we have overwritten *this.
      error_rate = CharError();
      iteration = learning_iteration();
      PrepareLogMsg(log_msg);
    }
  }
  bool result = true; // Something interesting happened.
  std::vector<char> rec_model_data;
  if (error_rate < best_error_rate_) {
    SaveRecognitionDump(&rec_model_data);
    log_msg << " New best BCER = " << error_rate;
    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
    // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
    // just overwrote *this. In either case, we have finished with it.
    sub_trainer_.reset();
    stall_iteration_ = learning_iteration() + kMinStallIterations;
    if (TransitionTrainingStage(kStageTransitionThreshold)) {
      log_msg << " Transitioned to stage " << CurrentTrainingStage();
    }
    SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
    if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
      std::string best_model_name = DumpFilename();
      if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
        log_msg << " failed to write best model:";
      } else {
        log_msg << " wrote best model:";
        error_rate_of_last_saved_best_ = best_error_rate_;
      }
      log_msg << best_model_name;
    }
  } else if (error_rate > worst_error_rate_) {
    SaveRecognitionDump(&rec_model_data);
    log_msg << " New worst BCER = " << error_rate;
    log_msg << UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
    if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
        best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
      // Error rate has ballooned. Go back to the best model.
      log_msg << "\nDivergence! ";
      // Copy best_trainer_ before reading it, as it will get overwritten.
      std::vector<char> revert_data(best_trainer_);
      if (ReadTrainingDump(revert_data, *this)) {
        LogIterations("Reverted to", log_msg);
        ReduceLearningRates(this, log_msg);
      } else {
        LogIterations("Failed to Revert at", log_msg);
      }
      // If it fails again, we will wait twice as long before reverting again.
      stall_iteration_ = iteration + 2 * (iteration - learning_iteration());
      // Re-save the best trainer with the new learning rates and stall
      // iteration.
      SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
    }
  } else {
    // Something interesting happened only if the sub_trainer_ was trained.
    result = sub_trainer_result != STR_NONE;
  }
  if (checkpoint_name_.length() > 0) {
    // Write a current checkpoint.
    std::vector<char> checkpoint;
    if (!SaveTrainingDump(FULL, *this, &checkpoint) ||
        !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
      log_msg << " failed to write checkpoint.";
    } else {
      log_msg << " wrote checkpoint.";
    }
  }
  return result;
}

◆ MaintainCheckpointsSpecific()

bool tesseract::LSTMTrainer::MaintainCheckpointsSpecific	(	int	iteration,
		const std::vector< char > *	train_model,
		const std::vector< char > *	rec_model,
		TestCallback	tester,
		std::stringstream &	log_msg
	)

◆ MapRecoder()

std::vector< int > tesseract::LSTMTrainer::MapRecoder	(	const UNICHARSET &	old_chset,
		const UnicharCompress &	old_recoder
	)		const

Definition at line 1074 of file lstmtrainer.cpp.

                                                                           {
  int num_new_codes = recoder_.code_range();
  int num_new_unichars = GetUnicharset().size();
  std::vector<int> code_map(num_new_codes, -1);
  for (int c = 0; c < num_new_codes; ++c) {
    int old_code = -1;
    // Find all new unichar_ids that recode to something that includes c.
    // The <= is to include the null char, which may be beyond the unicharset.
    for (int uid = 0; uid <= num_new_unichars; ++uid) {
      RecodedCharID codes;
      int length = recoder_.EncodeUnichar(uid, &codes);
      int code_index = 0;
      while (code_index < length && codes(code_index) != c) {
        ++code_index;
      }
      if (code_index == length) {
        continue;
      }
      // The old unicharset must have the same unichar.
      int old_uid =
          uid < num_new_unichars
              ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))
              : old_chset.size() - 1;
      if (old_uid == INVALID_UNICHAR_ID) {
        continue;
      }
      // The encoding of old_uid at the same code_index is the old code.
      RecodedCharID old_codes;
      if (code_index < old_recoder.EncodeUnichar(old_uid, &old_codes)) {
        old_code = old_codes(code_index);
        break;
      }
    }
    code_map[c] = old_code;
  }
  return code_map;
}

◆ mutable_training_data()

DocumentCache * tesseract::LSTMTrainer::mutable_training_data ( )

inline

Definition at line 172 of file lstmtrainer.h.

                                         {
    return &training_data_;
  }

◆ NewSingleError()

double tesseract::LSTMTrainer::NewSingleError ( ErrorTypes type ) const

inline

Definition at line 158 of file lstmtrainer.h.

                                               {
    return error_buffers_[type][training_iteration() % kRollingBufferSize_];
  }

◆ PrepareForBackward()

Trainability tesseract::LSTMTrainer::PrepareForBackward	(	const ImageData *	trainingdata,
		NetworkIO *	fwd_outputs,
		NetworkIO *	targets
	)

Definition at line 902 of file lstmtrainer.cpp.

                                                                 {
  if (trainingdata == nullptr) {
    tprintf("Null trainingdata.\n");
    return UNENCODABLE;
  }
  // Ensure repeatability of random elements even across checkpoints.
  bool debug =
      debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
  std::vector<int> truth_labels;
  if (!EncodeString(trainingdata->transcription(), &truth_labels)) {
    tprintf("Can't encode transcription: '%s' in language '%s'\n",
            trainingdata->transcription().c_str(),
            trainingdata->language().c_str());
    return UNENCODABLE;
  }
  bool upside_down = false;
  if (randomly_rotate_) {
    // This ensures consistent training results.
    SetRandomSeed();
    upside_down = randomizer_.SignedRand(1.0) > 0.0;
    if (upside_down) {
      // Modify the truth labels to match the rotation:
      // Apart from space and null, increment the label. This changes the
      // script-id to the same script-id but upside-down.
      // The labels need to be reversed in order, as the first is now the last.
      for (auto truth_label : truth_labels) {
        if (truth_label != UNICHAR_SPACE && truth_label != null_char_) {
          ++truth_label;
        }
      }
      std::reverse(truth_labels.begin(), truth_labels.end());
    }
  }
  unsigned w = 0;
  while (w < truth_labels.size() &&
         (truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_)) {
    ++w;
  }
  if (w == truth_labels.size()) {
    tprintf("Blank transcription: %s\n", trainingdata->transcription().c_str());
    return UNENCODABLE;
  }
  float image_scale;
  NetworkIO inputs;
  bool invert = trainingdata->boxes().empty();
  if (!RecognizeLine(*trainingdata, invert ? 0.5f : 0.0f, debug, invert, upside_down,
                     &image_scale, &inputs, fwd_outputs)) {
    tprintf("Image %s not trainable\n", trainingdata->imagefilename().c_str());
    return UNENCODABLE;
  }
  targets->Resize(*fwd_outputs, network_->NumOutputs());
  LossType loss_type = OutputLossType();
  if (loss_type == LT_SOFTMAX) {
    if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
      tprintf("Compute simple targets failed for %s!\n",
              trainingdata->imagefilename().c_str());
      return UNENCODABLE;
    }
  } else if (loss_type == LT_CTC) {
    if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {
      tprintf("Compute CTC targets failed for %s!\n",
              trainingdata->imagefilename().c_str());
      return UNENCODABLE;
    }
  } else {
    tprintf("Logistic outputs not implemented yet!\n");
    return UNENCODABLE;
  }
  std::vector<int> ocr_labels;
  std::vector<int> xcoords;
  LabelsFromOutputs(*fwd_outputs, &ocr_labels, &xcoords);
  // CTC does not produce correct target labels to begin with.
  if (loss_type != LT_CTC) {
    LabelsFromOutputs(*targets, &truth_labels, &xcoords);
  }
  if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
                         *targets)) {
    tprintf("Input width was %d\n", inputs.Width());
    return UNENCODABLE;
  }
  std::string ocr_text = DecodeLabels(ocr_labels);
  std::string truth_text = DecodeLabels(truth_labels);
  targets->SubtractAllFromFloat(*fwd_outputs);
  if (debug_interval_ != 0) {
    if (truth_text != ocr_text) {
      tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),
              ocr_text.c_str());
    }
  }
  double char_error = ComputeCharError(truth_labels, ocr_labels);
  double word_error = ComputeWordError(&truth_text, &ocr_text);
  double delta_error = ComputeErrorRates(*targets, char_error, word_error);
  if (debug_interval_ != 0) {
    tprintf("File %s line %d %s:\n", trainingdata->imagefilename().c_str(),
            trainingdata->page_number(), delta_error == 0.0 ? "(Perfect)" : "");
  }
  if (delta_error == 0.0) {
    return PERFECT;
  }
  if (targets->AnySuspiciousTruth(kHighConfidence)) {
    return HI_PRECISION_ERR;
  }
  return TRAINABLE;
}

◆ PrepareLogMsg()

void tesseract::LSTMTrainer::PrepareLogMsg ( std::stringstream & log_msg ) const

Definition at line 395 of file lstmtrainer.cpp.

                                                              {
  LogIterations("At", log_msg);
  log_msg << std::fixed << std::setprecision(3)
          << ", mean rms=" << error_rates_[ET_RMS]
          << "%, delta=" << error_rates_[ET_DELTA]
          << "%, BCER train=" << error_rates_[ET_CHAR_ERROR]
          << "%, BWER train=" << error_rates_[ET_WORD_RECERR]
          << "%, skip ratio=" << error_rates_[ET_SKIP_RATIO] << "%,";
}

◆ ReadLocalTrainingDump()

bool tesseract::LSTMTrainer::ReadLocalTrainingDump	(	const TessdataManager *	mgr,
		const char *	data,
		int	size
	)

Definition at line 1022 of file lstmtrainer.cpp.

                                                                    {
  if (size == 0) {
    tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n");
    return false;
  }
  TFile fp;
  fp.Open(data, size);
  return DeSerialize(mgr, &fp);
}

◆ ReadSizedTrainingDump()

bool tesseract::LSTMTrainer::ReadSizedTrainingDump	(	const char *	data,
		int	size,
		LSTMTrainer &	trainer
	)		const

inline

Definition at line 307 of file lstmtrainer.h.

                                                         {
    return trainer.ReadLocalTrainingDump(&mgr_, data, size);
  }

◆ ReadTrainingDump()

bool tesseract::LSTMTrainer::ReadTrainingDump	(	const std::vector< char > &	data,
		LSTMTrainer &	trainer
	)		const

inline

Definition at line 300 of file lstmtrainer.h.

                                                    {
    if (data.empty()) {
      return false;
    }
    return ReadSizedTrainingDump(&data[0], data.size(), trainer);
  }

◆ ReduceLayerLearningRates()

int tesseract::LSTMTrainer::ReduceLayerLearningRates	(	TFloat	factor,
		int	num_samples,
		LSTMTrainer *	samples_trainer
	)

Definition at line 694 of file lstmtrainer.cpp.

                                                                        {
  enum WhichWay {
    LR_DOWN, // Learning rate will go down by factor.
    LR_SAME, // Learning rate will stay the same.
    LR_COUNT // Size of arrays.
  };
  std::vector<std::string> layers = EnumerateLayers();
  int num_layers = layers.size();
  std::vector<int> num_weights(num_layers);
  std::vector<TFloat> bad_sums[LR_COUNT];
  std::vector<TFloat> ok_sums[LR_COUNT];
  for (int i = 0; i < LR_COUNT; ++i) {
    bad_sums[i].resize(num_layers, 0.0);
    ok_sums[i].resize(num_layers, 0.0);
  }
  auto momentum_factor = 1 / (1 - momentum_);
  std::vector<char> orig_trainer;
  samples_trainer->SaveTrainingDump(LIGHT, *this, &orig_trainer);
  for (int i = 0; i < num_layers; ++i) {
    Network *layer = GetLayer(layers[i]);
    num_weights[i] = layer->IsTraining() ? layer->num_weights() : 0;
  }
  int iteration = sample_iteration();
  for (int s = 0; s < num_samples; ++s) {
    // Which way will we modify the learning rate?
    for (int ww = 0; ww < LR_COUNT; ++ww) {
      // Transfer momentum to learning rate and adjust by the ww factor.
      auto ww_factor = momentum_factor;
      if (ww == LR_DOWN) {
        ww_factor *= factor;
      }
      // Make a copy of *this, so we can mess about without damaging anything.
      LSTMTrainer copy_trainer;
      samples_trainer->ReadTrainingDump(orig_trainer, copy_trainer);
      // Clear the updates, doing nothing else.
      copy_trainer.network_->Update(0.0, 0.0, 0.0, 0);
      // Adjust the learning rate in each layer.
      for (int i = 0; i < num_layers; ++i) {
        if (num_weights[i] == 0) {
          continue;
        }
        copy_trainer.ScaleLayerLearningRate(layers[i], ww_factor);
      }
      copy_trainer.SetIteration(iteration);
      // Train on the sample, but keep the update in updates_ instead of
      // applying to the weights.
      const ImageData *trainingdata =
          copy_trainer.TrainOnLine(samples_trainer, true);
      if (trainingdata == nullptr) {
        continue;
      }
      // We'll now use this trainer again for each layer.
      std::vector<char> updated_trainer;
      samples_trainer->SaveTrainingDump(LIGHT, copy_trainer, &updated_trainer);
      for (int i = 0; i < num_layers; ++i) {
        if (num_weights[i] == 0) {
          continue;
        }
        LSTMTrainer layer_trainer;
        samples_trainer->ReadTrainingDump(updated_trainer, layer_trainer);
        Network *layer = layer_trainer.GetLayer(layers[i]);
        // Update the weights in just the layer, using Adam if enabled.
        layer->Update(0.0, momentum_, adam_beta_,
                      layer_trainer.training_iteration_ + 1);
        // Zero the updates matrix again.
        layer->Update(0.0, 0.0, 0.0, 0);
        // Train again on the same sample, again holding back the updates.
        layer_trainer.TrainOnLine(trainingdata, true);
        // Count the sign changes in the updates in layer vs in copy_trainer.
        float before_bad = bad_sums[ww][i];
        float before_ok = ok_sums[ww][i];
        layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),
                                &ok_sums[ww][i], &bad_sums[ww][i]);
        float bad_frac =
            bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
        if (bad_frac > 0.0f) {
          bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;
        }
      }
    }
    ++iteration;
  }
  int num_lowered = 0;
  for (int i = 0; i < num_layers; ++i) {
    if (num_weights[i] == 0) {
      continue;
    }
    Network *layer = GetLayer(layers[i]);
    float lr = GetLayerLearningRate(layers[i]);
    TFloat total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
    TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
    TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;
    TFloat frac_same = bad_sums[LR_SAME][i] / total_same;
    tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(),
            lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);
    if (frac_down < frac_same * kImprovementFraction) {
      tprintf(" REDUCED\n");
      ScaleLayerLearningRate(layers[i], factor);
      ++num_lowered;
    } else {
      tprintf(" SAME\n");
    }
  }
  if (num_lowered == 0) {
    // Just lower everything to make sure.
    for (int i = 0; i < num_layers; ++i) {
      if (num_weights[i] > 0) {
        ScaleLayerLearningRate(layers[i], factor);
        ++num_lowered;
      }
    }
  }
  return num_lowered;
}

◆ ReduceLearningRates()

void tesseract::LSTMTrainer::ReduceLearningRates	(	LSTMTrainer *	samples_trainer,
		std::stringstream &	log_msg
	)

Definition at line 675 of file lstmtrainer.cpp.

                                                                {
  if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
    int num_reduced = ReduceLayerLearningRates(
        kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
    log_msg << "\nReduced learning rate on layers: " << num_reduced;
  } else {
    ScaleLearningRate(kLearningRateDecay);
    log_msg << "\nReduced learning rate to :" << learning_rate_;
  }
  log_msg << "\n";
}

◆ RollErrorBuffers()

void tesseract::LSTMTrainer::RollErrorBuffers ( )

protected

Definition at line 1405 of file lstmtrainer.cpp.

                                   {
  prev_sample_iteration_ = sample_iteration_;
  if (NewSingleError(ET_DELTA) > 0.0) {
    ++learning_iteration_;
  } else {
    last_perfect_training_iteration_ = training_iteration_;
  }
  ++training_iteration_;
  if (debug_interval_ != 0) {
    tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",
            error_rates_[ET_RMS], error_rates_[ET_DELTA],
            error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
            error_rates_[ET_SKIP_RATIO]);
  }
}

◆ SaveRecognitionDump()

void tesseract::LSTMTrainer::SaveRecognitionDump ( std::vector< char > * data ) const

Definition at line 1043 of file lstmtrainer.cpp.

                                                                 {
  TFile fp;
  fp.OpenWrite(data);
  network_->SetEnableTraining(TS_TEMP_DISABLE);
  ASSERT_HOST(LSTMRecognizer::Serialize(&mgr_, &fp));
  network_->SetEnableTraining(TS_RE_ENABLE);
}

◆ SaveTraineddata()

bool tesseract::LSTMTrainer::SaveTraineddata ( const char * filename )

Definition at line 1034 of file lstmtrainer.cpp.

                                                      {
  std::vector<char> recognizer_data;
  SaveRecognitionDump(&recognizer_data);
  mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
                      recognizer_data.size());
  return mgr_.SaveFile(filename, SaveDataToFile);
}

◆ SaveTrainingDump()

bool tesseract::LSTMTrainer::SaveTrainingDump	(	SerializeAmount	serialize_amount,
		const LSTMTrainer &	trainer,
		std::vector< char > *	data
	)		const

Definition at line 1013 of file lstmtrainer.cpp.

                                                                {
  TFile fp;
  fp.OpenWrite(data);
  return trainer.Serialize(serialize_amount, &mgr_, &fp);
}

◆ Serialize()

bool tesseract::LSTMTrainer::Serialize	(	SerializeAmount	serialize_amount,
		const TessdataManager *	mgr,
		TFile *	fp
	)		const

Definition at line 427 of file lstmtrainer.cpp.

                                                                         {
  if (!LSTMRecognizer::Serialize(mgr, fp)) {
    return false;
  }
  if (!fp->Serialize(&learning_iteration_)) {
    return false;
  }
  if (!fp->Serialize(&prev_sample_iteration_)) {
    return false;
  }
  if (!fp->Serialize(&perfect_delay_)) {
    return false;
  }
  if (!fp->Serialize(&last_perfect_training_iteration_)) {
    return false;
  }
  for (const auto &error_buffer : error_buffers_) {
    if (!fp->Serialize(error_buffer)) {
      return false;
    }
  }
  if (!fp->Serialize(&error_rates_[0], countof(error_rates_))) {
    return false;
  }
  if (!fp->Serialize(&training_stage_)) {
    return false;
  }
  uint8_t amount = serialize_amount;
  if (!fp->Serialize(&amount)) {
    return false;
  }
  if (serialize_amount == LIGHT) {
    return true; // We are done.
  }
  if (!fp->Serialize(&best_error_rate_)) {
    return false;
  }
  if (!fp->Serialize(&best_error_rates_[0], countof(best_error_rates_))) {
    return false;
  }
  if (!fp->Serialize(&best_iteration_)) {
    return false;
  }
  if (!fp->Serialize(&worst_error_rate_)) {
    return false;
  }
  if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) {
    return false;
  }
  if (!fp->Serialize(&worst_iteration_)) {
    return false;
  }
  if (!fp->Serialize(&stall_iteration_)) {
    return false;
  }
  if (!fp->Serialize(best_model_data_)) {
    return false;
  }
  if (!fp->Serialize(worst_model_data_)) {
    return false;
  }
  if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_)) {
    return false;
  }
  std::vector<char> sub_data;
  if (sub_trainer_ != nullptr &&
      !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {
    return false;
  }
  if (!fp->Serialize(sub_data)) {
    return false;
  }
  if (!fp->Serialize(best_error_history_)) {
    return false;
  }
  if (!fp->Serialize(best_error_iterations_)) {
    return false;
  }
  return fp->Serialize(&improvement_steps_);
}

◆ set_perfect_delay()

void tesseract::LSTMTrainer::set_perfect_delay ( int delay )

inline

Definition at line 151 of file lstmtrainer.h.

                                    {
    perfect_delay_ = delay;
  }

◆ SetNullChar()

void tesseract::LSTMTrainer::SetNullChar ( )

protected

Definition at line 1128 of file lstmtrainer.cpp.

                              {
  null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
                                                   : GetUnicharset().size();
  RecodedCharID code;
  recoder_.EncodeUnichar(null_char_, &code);
  null_char_ = code(0);
}

◆ SetupCheckpointInfo()

void tesseract::LSTMTrainer::SetupCheckpointInfo ( )

◆ StartSubtrainer()

void tesseract::LSTMTrainer::StartSubtrainer ( std::stringstream & log_msg )

Definition at line 605 of file lstmtrainer.cpp.

                                                          {
  sub_trainer_ = std::make_unique<LSTMTrainer>();
  if (!ReadTrainingDump(best_trainer_, *sub_trainer_)) {
    log_msg << " Failed to revert to previous best for trial!";
    sub_trainer_.reset();
  } else {
    log_msg << " Trial sub_trainer_ from iteration "
            << sub_trainer_->training_iteration();
    // Reduce learning rate so it doesn't diverge this time.
    sub_trainer_->ReduceLearningRates(this, log_msg);
    // If it fails again, we will wait twice as long before reverting again.
    int stall_offset =
        learning_iteration() - sub_trainer_->learning_iteration();
    stall_iteration_ = learning_iteration() + 2 * stall_offset;
    sub_trainer_->stall_iteration_ = stall_iteration_;
    // Re-save the best trainer with the new learning rates and stall iteration.
    SaveTrainingDump(NO_BEST_TRAINER, *sub_trainer_, &best_trainer_);
  }
}

◆ training_data()

const DocumentCache & tesseract::LSTMTrainer::training_data ( ) const

inline

Definition at line 169 of file lstmtrainer.h.

                                             {
    return training_data_;
  }

◆ TrainOnLine() [1/2]

Trainability tesseract::LSTMTrainer::TrainOnLine	(	const ImageData *	trainingdata,
		bool	batch
	)

Definition at line 869 of file lstmtrainer.cpp.

                                                  {
  NetworkIO fwd_outputs, targets;
  Trainability trainable =
      PrepareForBackward(trainingdata, &fwd_outputs, &targets);
  ++sample_iteration_;
  if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
    return trainable; // Sample was unusable.
  }
  bool debug =
      debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
  // Run backprop on the output.
  NetworkIO bp_deltas;
  if (network_->IsTraining() &&
      (trainable != PERFECT ||
       training_iteration() >
           last_perfect_training_iteration_ + perfect_delay_)) {
    network_->Backward(debug, targets, &scratch_space_, &bp_deltas);
    network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_,
                     training_iteration_ + 1);
  }
#ifndef GRAPHICS_DISABLED
  if (debug_interval_ == 1 && debug_win_ != nullptr) {
    debug_win_->AwaitEvent(SVET_CLICK);
  }
#endif // !GRAPHICS_DISABLED
  // Roll the memory of past means.
  RollErrorBuffers();
  return trainable;
}

◆ TrainOnLine() [2/2]

const ImageData * tesseract::LSTMTrainer::TrainOnLine	(	LSTMTrainer *	samples_trainer,
		bool	batch
	)

inline

Definition at line 268 of file lstmtrainer.h.

                                                                         {
    int sample_index = sample_iteration();
    const ImageData *image =
        samples_trainer->training_data_.GetPageBySerial(sample_index);
    if (image != nullptr) {
      Trainability trainable = TrainOnLine(image, batch);
      if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
        return nullptr; // Sample was unusable.
      }
    } else {
      ++sample_iteration_;
    }
    return image;
  }

◆ TransitionTrainingStage()

bool tesseract::LSTMTrainer::TransitionTrainingStage ( float error_threshold )

Definition at line 417 of file lstmtrainer.cpp.

                                                               {
  if (best_error_rate_ < error_threshold &&
      training_stage_ + 1 < num_training_stages_) {
    ++training_stage_;
    return true;
  }
  return false;
}

◆ TryLoadingCheckpoint()

bool tesseract::LSTMTrainer::TryLoadingCheckpoint	(	const char *	filename,
		const char *	old_traineddata
	)

Definition at line 105 of file lstmtrainer.cpp.

                                                                    {
  std::vector<char> data;
  if (!LoadDataFromFile(filename, &data)) {
    return false;
  }
  tprintf("Loaded file %s, unpacking...\n", filename);
  if (!ReadTrainingDump(data, *this)) {
    return false;
  }
  if (IsIntMode()) {
    tprintf("Error, %s is an integer (fast) model, cannot continue training\n",
            filename);
    return false;
  }
  if (((old_traineddata == nullptr || *old_traineddata == '\0') &&
       network_->NumOutputs() == recoder_.code_range()) ||
      filename == old_traineddata) {
    return true; // Normal checkpoint load complete.
  }
  tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
          recoder_.code_range());
  if (old_traineddata == nullptr || *old_traineddata == '\0') {
    tprintf("Must supply the old traineddata for code conversion!\n");
    return false;
  }
  TessdataManager old_mgr;
  ASSERT_HOST(old_mgr.Init(old_traineddata));
  TFile fp;
  if (!old_mgr.GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) {
    return false;
  }
  UNICHARSET old_chset;
  if (!old_chset.load_from_file(&fp, false)) {
    return false;
  }
  if (!old_mgr.GetComponent(TESSDATA_LSTM_RECODER, &fp)) {
    return false;
  }
  UnicharCompress old_recoder;
  if (!old_recoder.DeSerialize(&fp)) {
    return false;
  }
  std::vector<int> code_map = MapRecoder(old_chset, old_recoder);
  // Set the null_char_ to the new value.
  int old_null_char = null_char_;
  SetNullChar();
  // Map the softmax(s) in the network.
  network_->RemapOutputs(old_recoder.code_range(), code_map);
  tprintf("Previous null char=%d mapped to %d\n", old_null_char, null_char_);
  return true;
}

◆ UpdateErrorBuffer()

void tesseract::LSTMTrainer::UpdateErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

protected

Definition at line 1389 of file lstmtrainer.cpp.

                                                                     {
  int index = training_iteration_ % kRollingBufferSize_;
  error_buffers_[type][index] = new_error;
  // Compute the mean error.
  int mean_count =
      std::min<int>(training_iteration_ + 1, error_buffers_[type].size());
  double buffer_sum = 0.0;
  for (int i = 0; i < mean_count; ++i) {
    buffer_sum += error_buffers_[type][i];
  }
  double mean = buffer_sum / mean_count;
  // Trim precision to 1/1000 of 1%.
  error_rates_[type] = IntCastRounded(100000.0 * mean) / 1000.0;
}

◆ UpdateErrorGraph()

std::string tesseract::LSTMTrainer::UpdateErrorGraph	(	int	iteration,
		double	error_rate,
		const std::vector< char > &	model_data,
		const TestCallback &	tester
	)

protected

Definition at line 1425 of file lstmtrainer.cpp.

                                                                      {
  if (error_rate > best_error_rate_ &&
      iteration < best_iteration_ + kErrorGraphInterval) {
    // Too soon to record a new point.
    if (tester != nullptr && !worst_model_data_.empty()) {
      mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                          worst_model_data_.size());
      return tester(worst_iteration_, nullptr, mgr_, CurrentTrainingStage());
    } else {
      return "";
    }
  }
  std::string result;
  // NOTE: there are 2 asymmetries here:
  // 1. We are computing the global minimum, but the local maximum in between.
  // 2. If the tester returns an empty string, indicating that it is busy,
  //    call it repeatedly on new local maxima to test the previous min, but
  //    not the other way around, as there is little point testing the maxima
  //    between very frequent minima.
  if (error_rate < best_error_rate_) {
    // This is a new (global) minimum.
    if (tester != nullptr && !worst_model_data_.empty()) {
      mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                          worst_model_data_.size());
      result = tester(worst_iteration_, worst_error_rates_, mgr_,
                      CurrentTrainingStage());
      worst_model_data_.clear();
      best_model_data_ = model_data;
    }
    best_error_rate_ = error_rate;
    memcpy(best_error_rates_, error_rates_, sizeof(error_rates_));
    best_iteration_ = iteration;
    best_error_history_.push_back(error_rate);
    best_error_iterations_.push_back(iteration);
    // Compute 2% decay time.
    double two_percent_more = error_rate + 2.0;
    int i;
    for (i = best_error_history_.size() - 1;
         i >= 0 && best_error_history_[i] < two_percent_more; --i) {
    }
    int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;
    improvement_steps_ = iteration - old_iteration;
    tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",
            improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,
            old_iteration);
  } else if (error_rate > best_error_rate_) {
    // This is a new (local) maximum.
    if (tester != nullptr) {
      if (!best_model_data_.empty()) {
        mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0],
                            best_model_data_.size());
        result = tester(best_iteration_, best_error_rates_, mgr_,
                        CurrentTrainingStage());
      } else if (!worst_model_data_.empty()) {
        // Allow for multiple data points with "worst" error rate.
        mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                            worst_model_data_.size());
        result = tester(worst_iteration_, worst_error_rates_, mgr_,
                        CurrentTrainingStage());
      }
      if (result.length() > 0) {
        best_model_data_.clear();
      }
      worst_model_data_ = model_data;
    }
  }
  worst_error_rate_ = error_rate;
  memcpy(worst_error_rates_, error_rates_, sizeof(error_rates_));
  worst_iteration_ = iteration;
  return result;
}

◆ UpdateSubtrainer()

SubTrainerResult tesseract::LSTMTrainer::UpdateSubtrainer ( std::stringstream & log_msg )

Definition at line 633 of file lstmtrainer.cpp.

                                                                       {
  double training_error = CharError();
  double sub_error = sub_trainer_->CharError();
  double sub_margin = (training_error - sub_error) / sub_error;
  if (sub_margin >= kSubTrainerMarginFraction) {
    log_msg << " sub_trainer=" << sub_error
            << " margin=" << 100.0 * sub_margin << "\n";
    // Catch up to current iteration.
    int end_iteration = training_iteration();
    while (sub_trainer_->training_iteration() < end_iteration &&
           sub_margin >= kSubTrainerMarginFraction) {
      int target_iteration =
          sub_trainer_->training_iteration() + kNumPagesPerBatch;
      while (sub_trainer_->training_iteration() < target_iteration) {
        sub_trainer_->TrainOnLine(this, false);
      }
      std::stringstream batch_log("Sub:");
      batch_log.imbue(std::locale::classic());
      sub_trainer_->PrepareLogMsg(batch_log);
      batch_log << "\n";
      tprintf("UpdateSubtrainer:%s", batch_log.str().c_str());
      log_msg << batch_log.str();
      sub_error = sub_trainer_->CharError();
      sub_margin = (training_error - sub_error) / sub_error;
    }
    if (sub_error < best_error_rate_ &&
        sub_margin >= kSubTrainerMarginFraction) {
      // The sub_trainer_ has won the race to a new best. Switch to it.
      std::vector<char> updated_trainer;
      SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);
      ReadTrainingDump(updated_trainer, *this);
      log_msg << " Sub trainer wins at iteration "
              << training_iteration() << "\n";
      return STR_REPLACED;
    }
    return STR_UPDATED;
  }
  return STR_NONE;
}

Member Data Documentation

◆ align_win_

ScrollView* tesseract::LSTMTrainer::align_win_

protected

Definition at line 408 of file lstmtrainer.h.

◆ best_error_history_

std::vector<double> tesseract::LSTMTrainer::best_error_history_

protected

Definition at line 462 of file lstmtrainer.h.

◆ best_error_iterations_

std::vector<int32_t> tesseract::LSTMTrainer::best_error_iterations_

protected

Definition at line 463 of file lstmtrainer.h.

◆ best_error_rate_

double tesseract::LSTMTrainer::best_error_rate_

protected

Definition at line 435 of file lstmtrainer.h.

◆ best_error_rates_

double tesseract::LSTMTrainer::best_error_rates_[ET_COUNT]

protected

Definition at line 437 of file lstmtrainer.h.

◆ best_iteration_

int tesseract::LSTMTrainer::best_iteration_

protected

Definition at line 439 of file lstmtrainer.h.

◆ best_model_data_

std::vector<char> tesseract::LSTMTrainer::best_model_data_

protected

Definition at line 449 of file lstmtrainer.h.

◆ best_model_name_

std::string tesseract::LSTMTrainer::best_model_name_

protected

Definition at line 428 of file lstmtrainer.h.

◆ best_trainer_

std::vector<char> tesseract::LSTMTrainer::best_trainer_

protected

Definition at line 452 of file lstmtrainer.h.

◆ checkpoint_iteration_

int tesseract::LSTMTrainer::checkpoint_iteration_

protected

Definition at line 419 of file lstmtrainer.h.

◆ checkpoint_name_

std::string tesseract::LSTMTrainer::checkpoint_name_

protected

Definition at line 423 of file lstmtrainer.h.

◆ ctc_win_

ScrollView* tesseract::LSTMTrainer::ctc_win_

protected

Definition at line 412 of file lstmtrainer.h.

◆ debug_interval_

int tesseract::LSTMTrainer::debug_interval_

protected

Definition at line 417 of file lstmtrainer.h.

◆ error_buffers_

std::vector<double> tesseract::LSTMTrainer::error_buffers_[ET_COUNT]

protected

Definition at line 484 of file lstmtrainer.h.

◆ error_rate_of_last_saved_best_

float tesseract::LSTMTrainer::error_rate_of_last_saved_best_

protected

Definition at line 457 of file lstmtrainer.h.

◆ error_rates_

double tesseract::LSTMTrainer::error_rates_[ET_COUNT]

protected

Definition at line 486 of file lstmtrainer.h.

◆ improvement_steps_

int32_t tesseract::LSTMTrainer::improvement_steps_

protected

Definition at line 465 of file lstmtrainer.h.

◆ kRollingBufferSize_

const int tesseract::LSTMTrainer::kRollingBufferSize_ = 1000

staticprotected

Definition at line 483 of file lstmtrainer.h.

◆ last_perfect_training_iteration_

int tesseract::LSTMTrainer::last_perfect_training_iteration_

protected

Definition at line 480 of file lstmtrainer.h.

◆ learning_iteration_

int tesseract::LSTMTrainer::learning_iteration_

protected

Definition at line 469 of file lstmtrainer.h.

◆ mgr_

TessdataManager tesseract::LSTMTrainer::mgr_

protected

Definition at line 488 of file lstmtrainer.h.

◆ model_base_

std::string tesseract::LSTMTrainer::model_base_

protected

Definition at line 421 of file lstmtrainer.h.

◆ num_training_stages_

int tesseract::LSTMTrainer::num_training_stages_

protected

Definition at line 430 of file lstmtrainer.h.

◆ perfect_delay_

int tesseract::LSTMTrainer::perfect_delay_

protected

Definition at line 477 of file lstmtrainer.h.

◆ prev_sample_iteration_

int tesseract::LSTMTrainer::prev_sample_iteration_

protected

Definition at line 471 of file lstmtrainer.h.

◆ randomly_rotate_

bool tesseract::LSTMTrainer::randomly_rotate_

protected

Definition at line 425 of file lstmtrainer.h.

◆ recon_win_

ScrollView* tesseract::LSTMTrainer::recon_win_

protected

Definition at line 414 of file lstmtrainer.h.

◆ stall_iteration_

int tesseract::LSTMTrainer::stall_iteration_

protected

Definition at line 447 of file lstmtrainer.h.

◆ sub_trainer_

std::unique_ptr<LSTMTrainer> tesseract::LSTMTrainer::sub_trainer_

protected

Definition at line 455 of file lstmtrainer.h.

◆ target_win_

ScrollView* tesseract::LSTMTrainer::target_win_

protected

Definition at line 410 of file lstmtrainer.h.

◆ training_data_

DocumentCache tesseract::LSTMTrainer::training_data_

protected

Definition at line 426 of file lstmtrainer.h.

◆ training_stage_

int tesseract::LSTMTrainer::training_stage_

protected

Definition at line 459 of file lstmtrainer.h.

◆ worst_error_rate_

double tesseract::LSTMTrainer::worst_error_rate_

protected

Definition at line 441 of file lstmtrainer.h.

◆ worst_error_rates_

double tesseract::LSTMTrainer::worst_error_rates_[ET_COUNT]

protected

Definition at line 443 of file lstmtrainer.h.

◆ worst_iteration_

int tesseract::LSTMTrainer::worst_iteration_

protected

Definition at line 445 of file lstmtrainer.h.

◆ worst_model_data_

std::vector<char> tesseract::LSTMTrainer::worst_model_data_

protected

Definition at line 450 of file lstmtrainer.h.

The documentation for this class was generated from the following files:

/media/home/debian/src/github/tesseract-ocr/tesseract/src/training/unicharset/lstmtrainer.h
/media/home/debian/src/github/tesseract-ocr/tesseract/src/training/unicharset/lstmtrainer.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Static Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

◆ LSTMTrainer() [2/2]

◆ ~LSTMTrainer()

Member Function Documentation

◆ ActivationError()

◆ best_error_rate()

◆ best_iteration()

◆ best_trainer()

◆ CharError()

◆ ComputeCharError()

◆ ComputeCTCTargets()

◆ ComputeErrorRates()

◆ ComputeRMSError()

◆ ComputeTextTargets()

◆ ComputeWinnerError()

◆ ComputeWordError()

◆ CurrentTrainingStage()

◆ DebugLSTMTraining()

◆ DebugNetwork()

◆ DeSerialize()

◆ DisplayTargets()

◆ DumpFilename()

◆ EmptyConstructor()

◆ EncodeString() [1/2]

◆ EncodeString() [2/2]

◆ error_rates()

◆ FillErrorBuffer()

◆ GridSearchDictParams()

◆ improvement_steps()

◆ InitCharSet() [1/3]

◆ InitCharSet() [2/3]

◆ InitCharSet() [3/3]

◆ InitIterations()

◆ InitNetwork()

◆ InitTensorFlowNetwork()

◆ LastSingleError()

◆ learning_iteration()

◆ LoadAllTrainingData()

◆ LogIterations()

◆ MaintainCheckpoints()

◆ MaintainCheckpointsSpecific()

◆ MapRecoder()

◆ mutable_training_data()

◆ NewSingleError()

◆ PrepareForBackward()

◆ PrepareLogMsg()

◆ ReadLocalTrainingDump()

◆ ReadSizedTrainingDump()

◆ ReadTrainingDump()

◆ ReduceLayerLearningRates()

◆ ReduceLearningRates()

◆ RollErrorBuffers()

◆ SaveRecognitionDump()

◆ SaveTraineddata()

◆ SaveTrainingDump()

◆ Serialize()

◆ set_perfect_delay()

◆ SetNullChar()

◆ SetupCheckpointInfo()

◆ StartSubtrainer()

◆ training_data()

◆ TrainOnLine() [1/2]

◆ TrainOnLine() [2/2]

◆ TransitionTrainingStage()

◆ TryLoadingCheckpoint()

◆ UpdateErrorBuffer()

◆ UpdateErrorGraph()

◆ UpdateSubtrainer()

Member Data Documentation

◆ align_win_

◆ best_error_history_

◆ best_error_iterations_

◆ best_error_rate_