#include <lstm.h>

Inheritance diagram for tesseract::LSTM:

Public Types
enum	WeightType { CI , GI , GF1 , GO , GFS , WT_COUNT }

Public Member Functions
TESS_API	LSTM (const std::string &name, int num_inputs, int num_states, int num_outputs, bool two_dimensional, NetworkType type)

	~LSTM () override

StaticShape	OutputShape (const StaticShape &input_shape) const override

std::string	spec () const override

void	SetEnableTraining (TrainingState state) override

int	InitWeights (float range, TRand *randomizer) override

int	RemapOutputs (int old_no, const std::vector< int > &code_map) override

void	ConvertToInt () override

void	DebugWeights () override

bool	Serialize (TFile *fp) const override

bool	DeSerialize (TFile *fp) override

void	Forward (bool debug, const NetworkIO &input, const TransposedArray input_transpose, NetworkScratch scratch, NetworkIO *output) override

bool	Backward (bool debug, const NetworkIO &fwd_deltas, NetworkScratch scratch, NetworkIO back_deltas) override

void	Update (float learning_rate, float momentum, float adam_beta, int num_samples) override

void	CountAlternators (const Network &other, TFloat same, TFloat changed) const override

void	PrintW ()

void	PrintDW ()

bool	Is2D () const

Public Member Functions inherited from tesseract::Network
	Network ()

	Network (NetworkType type, const std::string &name, int ni, int no)

virtual	~Network ()=default

NetworkType	type () const

bool	IsTraining () const

bool	needs_to_backprop () const

int	num_weights () const

int	NumInputs () const

int	NumOutputs () const

virtual StaticShape	InputShape () const

virtual StaticShape	OutputShape (const StaticShape &input_shape) const

const std::string &	name () const

virtual std::string	spec () const

bool	TestFlag (NetworkFlags flag) const

virtual bool	IsPlumbingType () const

virtual void	SetEnableTraining (TrainingState state)

virtual void	SetNetworkFlags (uint32_t flags)

virtual int	InitWeights (float range, TRand *randomizer)

virtual int	RemapOutputs (int old_no, const std::vector< int > &code_map)

virtual void	ConvertToInt ()

virtual void	SetRandomizer (TRand *randomizer)

virtual bool	SetupNeedsBackprop (bool needs_backprop)

virtual int	XScaleFactor () const

virtual void	CacheXScaleFactor (int factor)

virtual void	DebugWeights ()=0

virtual bool	Serialize (TFile *fp) const

virtual bool	DeSerialize (TFile *fp)=0

virtual void	Update (float learning_rate, float momentum, float adam_beta, int num_samples)

virtual void	CountAlternators (const Network &other, TFloat same, TFloat changed) const

virtual void	Forward (bool debug, const NetworkIO &input, const TransposedArray input_transpose, NetworkScratch scratch, NetworkIO *output)=0

virtual bool	Backward (bool debug, const NetworkIO &fwd_deltas, NetworkScratch scratch, NetworkIO back_deltas)=0

void	DisplayForward (const NetworkIO &matrix)

void	DisplayBackward (const NetworkIO &matrix)

Additional Inherited Members
Static Public Member Functions inherited from tesseract::Network
static Network *	CreateFromFile (TFile *fp)

static void	ClearWindow (bool tess_coords, const char window_name, int width, int height, ScrollView *window)

static int	DisplayImage (Image pix, ScrollView *window)

Protected Member Functions inherited from tesseract::Network
TFloat	Random (TFloat range)

Protected Attributes inherited from tesseract::Network
NetworkType	type_

TrainingState	training_

bool	needs_to_backprop_

int32_t	network_flags_

int32_t	ni_

int32_t	no_

int32_t	num_weights_

std::string	name_

ScrollView *	forward_win_

ScrollView *	backward_win_

TRand *	randomizer_

Detailed Description

Definition at line 27 of file lstm.h.

Member Enumeration Documentation

◆ WeightType

enum tesseract::LSTM::WeightType

Enumerator
CI
GI
GF1
GO
GFS
WT_COUNT

Definition at line 32 of file lstm.h.

                  {
    CI,  // Cell Inputs.
    GI,  // Gate at the input.
    GF1, // Forget gate at the memory (1-d or looking back 1 timestep).
    GO,  // Gate at the output.
    GFS, // Forget gate at the memory, looking back in the other dimension.
 
    WT_COUNT // Number of WeightTypes.
  };

Constructor & Destructor Documentation

◆ LSTM()

tesseract::LSTM::LSTM	(	const std::string &	name,
		int	num_inputs,
		int	num_states,
		int	num_outputs,
		bool	two_dimensional,
		NetworkType	type
	)

Definition at line 101 of file lstm.cpp.

    : Network(type, name, ni, no)
    , na_(ni + ns)
    , ns_(ns)
    , nf_(0)
    , is_2d_(two_dimensional)
    , softmax_(nullptr)
    , input_width_(0) {
  if (two_dimensional) {
    na_ += ns_;
  }
  if (type_ == NT_LSTM || type_ == NT_LSTM_SUMMARY) {
    nf_ = 0;
    // networkbuilder ensures this is always true.
    ASSERT_HOST(no == ns);
  } else if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) {
    nf_ = type_ == NT_LSTM_SOFTMAX ? no_ : ceil_log2(no_);
    softmax_ = new FullyConnected("LSTM Softmax", ns_, no_, NT_SOFTMAX);
  } else {
    tprintf("%d is invalid type of LSTM!\n", type);
    ASSERT_HOST(false);
  }
  na_ += nf_;
}

◆ ~LSTM()

tesseract::LSTM::~LSTM ( )

override

Definition at line 126 of file lstm.cpp.

            {
  delete softmax_;
}

Member Function Documentation

◆ Backward()

bool tesseract::LSTM::Backward	(	bool	debug,
		const NetworkIO &	fwd_deltas,
		NetworkScratch *	scratch,
		NetworkIO *	back_deltas
	)

overridevirtual

Implements tesseract::Network.

Definition at line 507 of file lstm.cpp.

                                            {
#ifndef GRAPHICS_DISABLED
  if (debug) {
    DisplayBackward(fwd_deltas);
  }
#endif
  back_deltas->ResizeToMap(fwd_deltas.int_mode(), input_map_, ni_);
  // ======Scratch space.======
  // Output errors from deltas with recurrence from sourceerr.
  NetworkScratch::FloatVec outputerr;
  outputerr.Init(ns_, scratch);
  // Recurrent error in the state/source.
  NetworkScratch::FloatVec curr_stateerr, curr_sourceerr;
  curr_stateerr.Init(ns_, scratch);
  curr_sourceerr.Init(na_, scratch);
  ZeroVector<TFloat>(ns_, curr_stateerr);
  ZeroVector<TFloat>(na_, curr_sourceerr);
  // Errors in the gates.
  NetworkScratch::FloatVec gate_errors[WT_COUNT];
  for (auto &gate_error : gate_errors) {
    gate_error.Init(ns_, scratch);
  }
  // Rotating buffers of width buf_width allow storage of the recurrent time-
  // steps used only for true 2-D. Stores one full strip of the major direction.
  int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1;
  std::vector<NetworkScratch::FloatVec> stateerr, sourceerr;
  if (Is2D()) {
    stateerr.resize(buf_width);
    sourceerr.resize(buf_width);
    for (int t = 0; t < buf_width; ++t) {
      stateerr[t].Init(ns_, scratch);
      sourceerr[t].Init(na_, scratch);
      ZeroVector<TFloat>(ns_, stateerr[t]);
      ZeroVector<TFloat>(na_, sourceerr[t]);
    }
  }
  // Parallel-generated sourceerr from each of the gates.
  NetworkScratch::FloatVec sourceerr_temps[WT_COUNT];
  for (auto &sourceerr_temp : sourceerr_temps) {
    sourceerr_temp.Init(na_, scratch);
  }
  int width = input_width_;
  // Transposed gate errors stored over all timesteps for sum outer.
  NetworkScratch::GradientStore gate_errors_t[WT_COUNT];
  for (auto &w : gate_errors_t) {
    w.Init(ns_, width, scratch);
  }
  // Used only if softmax_ != nullptr.
  NetworkScratch::FloatVec softmax_errors;
  NetworkScratch::GradientStore softmax_errors_t;
  if (softmax_ != nullptr) {
    softmax_errors.Init(no_, scratch);
    softmax_errors_t.Init(no_, width, scratch);
  }
  TFloat state_clip = Is2D() ? 9.0 : 4.0;
#if DEBUG_DETAIL > 1
  tprintf("fwd_deltas:%s\n", name_.c_str());
  fwd_deltas.Print(10);
#endif
  StrideMap::Index dest_index(input_map_);
  dest_index.InitToLast();
  // Used only by NT_LSTM_SUMMARY.
  StrideMap::Index src_index(fwd_deltas.stride_map());
  src_index.InitToLast();
  do {
    int t = dest_index.t();
    bool at_last_x = dest_index.IsLast(FD_WIDTH);
    // up_pos is the 2-D back step, down_pos is the 2-D fwd step, and are only
    // valid if >= 0, which is true if 2d and not on the top/bottom.
    int up_pos = -1;
    int down_pos = -1;
    if (Is2D()) {
      if (dest_index.index(FD_HEIGHT) > 0) {
        StrideMap::Index up_index(dest_index);
        if (up_index.AddOffset(-1, FD_HEIGHT)) {
          up_pos = up_index.t();
        }
      }
      if (!dest_index.IsLast(FD_HEIGHT)) {
        StrideMap::Index down_index(dest_index);
        if (down_index.AddOffset(1, FD_HEIGHT)) {
          down_pos = down_index.t();
        }
      }
    }
    // Index of the 2-D revolving buffers (sourceerr, stateerr).
    int mod_t = Modulo(t, buf_width); // Current timestep.
    // Zero the state in the major direction only at the end of every row.
    if (at_last_x) {
      ZeroVector<TFloat>(na_, curr_sourceerr);
      ZeroVector<TFloat>(ns_, curr_stateerr);
    }
    // Setup the outputerr.
    if (type_ == NT_LSTM_SUMMARY) {
      if (dest_index.IsLast(FD_WIDTH)) {
        fwd_deltas.ReadTimeStep(src_index.t(), outputerr);
        src_index.Decrement();
      } else {
        ZeroVector<TFloat>(ns_, outputerr);
      }
    } else if (softmax_ == nullptr) {
      fwd_deltas.ReadTimeStep(t, outputerr);
    } else {
      softmax_->BackwardTimeStep(fwd_deltas, t, softmax_errors, softmax_errors_t.get(), outputerr);
    }
    if (!at_last_x) {
      AccumulateVector(ns_, curr_sourceerr + ni_ + nf_, outputerr);
    }
    if (down_pos >= 0) {
      AccumulateVector(ns_, sourceerr[mod_t] + ni_ + nf_ + ns_, outputerr);
    }
    // Apply the 1-d forget gates.
    if (!at_last_x) {
      const float *next_node_gf1 = node_values_[GF1].f(t + 1);
      for (int i = 0; i < ns_; ++i) {
        curr_stateerr[i] *= next_node_gf1[i];
      }
    }
    if (Is2D() && t + 1 < width) {
      for (int i = 0; i < ns_; ++i) {
        if (which_fg_[t + 1][i] != 1) {
          curr_stateerr[i] = 0.0;
        }
      }
      if (down_pos >= 0) {
        const float *right_node_gfs = node_values_[GFS].f(down_pos);
        const TFloat *right_stateerr = stateerr[mod_t];
        for (int i = 0; i < ns_; ++i) {
          if (which_fg_[down_pos][i] == 2) {
            curr_stateerr[i] += right_stateerr[i] * right_node_gfs[i];
          }
        }
      }
    }
    state_.FuncMultiply3Add<HPrime>(node_values_[GO], t, outputerr, curr_stateerr);
    // Clip stateerr_ to a sane range.
    ClipVector<TFloat>(ns_, -state_clip, state_clip, curr_stateerr);
#if DEBUG_DETAIL > 1
    if (t + 10 > width) {
      tprintf("t=%d, stateerr=", t);
      for (int i = 0; i < ns_; ++i)
        tprintf(" %g,%g,%g", curr_stateerr[i], outputerr[i], curr_sourceerr[ni_ + nf_ + i]);
      tprintf("\n");
    }
#endif
    // Matrix multiply to get the source errors.
    PARALLEL_IF_OPENMP(GFS)
 
    // Cell inputs.
    node_values_[CI].FuncMultiply3<GPrime>(t, node_values_[GI], t, curr_stateerr, gate_errors[CI]);
    ClipVector(ns_, -kErrClip, kErrClip, gate_errors[CI].get());
    gate_weights_[CI].VectorDotMatrix(gate_errors[CI], sourceerr_temps[CI]);
    gate_errors_t[CI].get()->WriteStrided(t, gate_errors[CI]);
 
    SECTION_IF_OPENMP
    // Input Gates.
    node_values_[GI].FuncMultiply3<FPrime>(t, node_values_[CI], t, curr_stateerr, gate_errors[GI]);
    ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GI].get());
    gate_weights_[GI].VectorDotMatrix(gate_errors[GI], sourceerr_temps[GI]);
    gate_errors_t[GI].get()->WriteStrided(t, gate_errors[GI]);
 
    SECTION_IF_OPENMP
    // 1-D forget Gates.
    if (t > 0) {
      node_values_[GF1].FuncMultiply3<FPrime>(t, state_, t - 1, curr_stateerr, gate_errors[GF1]);
      ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GF1].get());
      gate_weights_[GF1].VectorDotMatrix(gate_errors[GF1], sourceerr_temps[GF1]);
    } else {
      memset(gate_errors[GF1], 0, ns_ * sizeof(gate_errors[GF1][0]));
      memset(sourceerr_temps[GF1], 0, na_ * sizeof(*sourceerr_temps[GF1]));
    }
    gate_errors_t[GF1].get()->WriteStrided(t, gate_errors[GF1]);
 
    // 2-D forget Gates.
    if (up_pos >= 0) {
      node_values_[GFS].FuncMultiply3<FPrime>(t, state_, up_pos, curr_stateerr, gate_errors[GFS]);
      ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GFS].get());
      gate_weights_[GFS].VectorDotMatrix(gate_errors[GFS], sourceerr_temps[GFS]);
    } else {
      memset(gate_errors[GFS], 0, ns_ * sizeof(gate_errors[GFS][0]));
      memset(sourceerr_temps[GFS], 0, na_ * sizeof(*sourceerr_temps[GFS]));
    }
    if (Is2D()) {
      gate_errors_t[GFS].get()->WriteStrided(t, gate_errors[GFS]);
    }
 
    SECTION_IF_OPENMP
    // Output gates.
    state_.Func2Multiply3<HFunc, FPrime>(node_values_[GO], t, outputerr, gate_errors[GO]);
    ClipVector(ns_, -kErrClip, kErrClip, gate_errors[GO].get());
    gate_weights_[GO].VectorDotMatrix(gate_errors[GO], sourceerr_temps[GO]);
    gate_errors_t[GO].get()->WriteStrided(t, gate_errors[GO]);
    END_PARALLEL_IF_OPENMP
 
    SumVectors(na_, sourceerr_temps[CI], sourceerr_temps[GI], sourceerr_temps[GF1],
               sourceerr_temps[GO], sourceerr_temps[GFS], curr_sourceerr);
    back_deltas->WriteTimeStep(t, curr_sourceerr);
    // Save states for use by the 2nd dimension only if needed.
    if (Is2D()) {
      CopyVector(ns_, curr_stateerr, stateerr[mod_t]);
      CopyVector(na_, curr_sourceerr, sourceerr[mod_t]);
    }
  } while (dest_index.Decrement());
#if DEBUG_DETAIL > 2
  for (int w = 0; w < WT_COUNT; ++w) {
    tprintf("%s gate errors[%d]\n", name_.c_str(), w);
    gate_errors_t[w].get()->PrintUnTransposed(10);
  }
#endif
  // Transposed source_ used to speed-up SumOuter.
  NetworkScratch::GradientStore source_t, state_t;
  source_t.Init(na_, width, scratch);
  source_.Transpose(source_t.get());
  state_t.Init(ns_, width, scratch);
  state_.Transpose(state_t.get());
#ifdef _OPENMP
#  pragma omp parallel for num_threads(GFS) if (!Is2D())
#endif
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    gate_weights_[w].SumOuterTransposed(*gate_errors_t[w], *source_t, false);
  }
  if (softmax_ != nullptr) {
    softmax_->FinishBackward(*softmax_errors_t);
  }
  return needs_to_backprop_;
}

◆ ConvertToInt()

void tesseract::LSTM::ConvertToInt ( )

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 202 of file lstm.cpp.

                        {
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    gate_weights_[w].ConvertToInt();
  }
  if (softmax_ != nullptr) {
    softmax_->ConvertToInt();
  }
}

◆ CountAlternators()

void tesseract::LSTM::CountAlternators	(	const Network &	other,
		TFloat *	same,
		TFloat *	changed
	)		const

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 761 of file lstm.cpp.

                                                                                     {
  ASSERT_HOST(other.type() == type_);
  const LSTM *lstm = static_cast<const LSTM *>(&other);
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    gate_weights_[w].CountAlternators(lstm->gate_weights_[w], same, changed);
  }
  if (softmax_ != nullptr) {
    softmax_->CountAlternators(*lstm->softmax_, same, changed);
  }
}

◆ DebugWeights()

void tesseract::LSTM::DebugWeights ( )

overridevirtual

Implements tesseract::Network.

Definition at line 215 of file lstm.cpp.

                        {
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    std::ostringstream msg;
    msg << name_ << " Gate weights " << w;
    gate_weights_[w].Debug2D(msg.str().c_str());
  }
  if (softmax_ != nullptr) {
    softmax_->DebugWeights();
  }
}

◆ DeSerialize()

bool tesseract::LSTM::DeSerialize ( TFile * fp )

overridevirtual

Implements tesseract::Network.

Definition at line 253 of file lstm.cpp.

                                {
  if (!fp->DeSerialize(&na_)) {
    return false;
  }
  if (type_ == NT_LSTM_SOFTMAX) {
    nf_ = no_;
  } else if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
    nf_ = ceil_log2(no_);
  } else {
    nf_ = 0;
  }
  is_2d_ = false;
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    if (!gate_weights_[w].DeSerialize(IsTraining(), fp)) {
      return false;
    }
    if (w == CI) {
      ns_ = gate_weights_[CI].NumOutputs();
      is_2d_ = na_ - nf_ == ni_ + 2 * ns_;
    }
  }
  delete softmax_;
  if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) {
    softmax_ = static_cast<FullyConnected *>(Network::CreateFromFile(fp));
    if (softmax_ == nullptr) {
      return false;
    }
  } else {
    softmax_ = nullptr;
  }
  return true;
}

◆ Forward()

void tesseract::LSTM::Forward	(	bool	debug,
		const NetworkIO &	input,
		const TransposedArray *	input_transpose,
		NetworkScratch *	scratch,
		NetworkIO *	output
	)

overridevirtual

Implements tesseract::Network.

Definition at line 291 of file lstm.cpp.

                                                               {
  input_map_ = input.stride_map();
  input_width_ = input.Width();
  if (softmax_ != nullptr) {
    output->ResizeFloat(input, no_);
  } else if (type_ == NT_LSTM_SUMMARY) {
    output->ResizeXTo1(input, no_);
  } else {
    output->Resize(input, no_);
  }
  ResizeForward(input);
  // Temporary storage of forward computation for each gate.
  NetworkScratch::FloatVec temp_lines[WT_COUNT];
  int ro = ns_;
  if (source_.int_mode() && IntSimdMatrix::intSimdMatrix) {
    ro = IntSimdMatrix::intSimdMatrix->RoundOutputs(ro);
  }
  for (auto &temp_line : temp_lines) {
    temp_line.Init(ns_, ro, scratch);
  }
  // Single timestep buffers for the current/recurrent output and state.
  NetworkScratch::FloatVec curr_state, curr_output;
  curr_state.Init(ns_, scratch);
  ZeroVector<TFloat>(ns_, curr_state);
  curr_output.Init(ns_, scratch);
  ZeroVector<TFloat>(ns_, curr_output);
  // Rotating buffers of width buf_width allow storage of the state and output
  // for the other dimension, used only when working in true 2D mode. The width
  // is enough to hold an entire strip of the major direction.
  int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1;
  std::vector<NetworkScratch::FloatVec> states, outputs;
  if (Is2D()) {
    states.resize(buf_width);
    outputs.resize(buf_width);
    for (int i = 0; i < buf_width; ++i) {
      states[i].Init(ns_, scratch);
      ZeroVector<TFloat>(ns_, states[i]);
      outputs[i].Init(ns_, scratch);
      ZeroVector<TFloat>(ns_, outputs[i]);
    }
  }
  // Used only if a softmax LSTM.
  NetworkScratch::FloatVec softmax_output;
  NetworkScratch::IO int_output;
  if (softmax_ != nullptr) {
    softmax_output.Init(no_, scratch);
    ZeroVector<TFloat>(no_, softmax_output);
    int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_);
    if (input.int_mode()) {
      int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch);
    }
    softmax_->SetupForward(input, nullptr);
  }
  NetworkScratch::FloatVec curr_input;
  curr_input.Init(na_, scratch);
  StrideMap::Index src_index(input_map_);
  // Used only by NT_LSTM_SUMMARY.
  StrideMap::Index dest_index(output->stride_map());
  do {
    int t = src_index.t();
    // True if there is a valid old state for the 2nd dimension.
    bool valid_2d = Is2D();
    if (valid_2d) {
      StrideMap::Index dim_index(src_index);
      if (!dim_index.AddOffset(-1, FD_HEIGHT)) {
        valid_2d = false;
      }
    }
    // Index of the 2-D revolving buffers (outputs, states).
    int mod_t = Modulo(t, buf_width); // Current timestep.
    // Setup the padded input in source.
    source_.CopyTimeStepGeneral(t, 0, ni_, input, t, 0);
    if (softmax_ != nullptr) {
      source_.WriteTimeStepPart(t, ni_, nf_, softmax_output);
    }
    source_.WriteTimeStepPart(t, ni_ + nf_, ns_, curr_output);
    if (Is2D()) {
      source_.WriteTimeStepPart(t, ni_ + nf_ + ns_, ns_, outputs[mod_t]);
    }
    if (!source_.int_mode()) {
      source_.ReadTimeStep(t, curr_input);
    }
    // Matrix multiply the inputs with the source.
    PARALLEL_IF_OPENMP(GFS)
    // It looks inefficient to create the threads on each t iteration, but the
    // alternative of putting the parallel outside the t loop, a single around
    // the t-loop and then tasks in place of the sections is a *lot* slower.
    // Cell inputs.
    if (source_.int_mode()) {
      gate_weights_[CI].MatrixDotVector(source_.i(t), temp_lines[CI]);
    } else {
      gate_weights_[CI].MatrixDotVector(curr_input, temp_lines[CI]);
    }
    FuncInplace<GFunc>(ns_, temp_lines[CI]);
 
    SECTION_IF_OPENMP
    // Input Gates.
    if (source_.int_mode()) {
      gate_weights_[GI].MatrixDotVector(source_.i(t), temp_lines[GI]);
    } else {
      gate_weights_[GI].MatrixDotVector(curr_input, temp_lines[GI]);
    }
    FuncInplace<FFunc>(ns_, temp_lines[GI]);
 
    SECTION_IF_OPENMP
    // 1-D forget gates.
    if (source_.int_mode()) {
      gate_weights_[GF1].MatrixDotVector(source_.i(t), temp_lines[GF1]);
    } else {
      gate_weights_[GF1].MatrixDotVector(curr_input, temp_lines[GF1]);
    }
    FuncInplace<FFunc>(ns_, temp_lines[GF1]);
 
    // 2-D forget gates.
    if (Is2D()) {
      if (source_.int_mode()) {
        gate_weights_[GFS].MatrixDotVector(source_.i(t), temp_lines[GFS]);
      } else {
        gate_weights_[GFS].MatrixDotVector(curr_input, temp_lines[GFS]);
      }
      FuncInplace<FFunc>(ns_, temp_lines[GFS]);
    }
 
    SECTION_IF_OPENMP
    // Output gates.
    if (source_.int_mode()) {
      gate_weights_[GO].MatrixDotVector(source_.i(t), temp_lines[GO]);
    } else {
      gate_weights_[GO].MatrixDotVector(curr_input, temp_lines[GO]);
    }
    FuncInplace<FFunc>(ns_, temp_lines[GO]);
    END_PARALLEL_IF_OPENMP
 
    // Apply forget gate to state.
    MultiplyVectorsInPlace(ns_, temp_lines[GF1], curr_state);
    if (Is2D()) {
      // Max-pool the forget gates (in 2-d) instead of blindly adding.
      int8_t *which_fg_col = which_fg_[t];
      memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0]));
      if (valid_2d) {
        const TFloat *stepped_state = states[mod_t];
        for (int i = 0; i < ns_; ++i) {
          if (temp_lines[GF1][i] < temp_lines[GFS][i]) {
            curr_state[i] = temp_lines[GFS][i] * stepped_state[i];
            which_fg_col[i] = 2;
          }
        }
      }
    }
    MultiplyAccumulate(ns_, temp_lines[CI], temp_lines[GI], curr_state);
    // Clip curr_state to a sane range.
    ClipVector<TFloat>(ns_, -kStateClip, kStateClip, curr_state);
    if (IsTraining()) {
      // Save the gate node values.
      node_values_[CI].WriteTimeStep(t, temp_lines[CI]);
      node_values_[GI].WriteTimeStep(t, temp_lines[GI]);
      node_values_[GF1].WriteTimeStep(t, temp_lines[GF1]);
      node_values_[GO].WriteTimeStep(t, temp_lines[GO]);
      if (Is2D()) {
        node_values_[GFS].WriteTimeStep(t, temp_lines[GFS]);
      }
    }
    FuncMultiply<HFunc>(curr_state, temp_lines[GO], ns_, curr_output);
    if (IsTraining()) {
      state_.WriteTimeStep(t, curr_state);
    }
    if (softmax_ != nullptr) {
      if (input.int_mode()) {
        int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
        softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output);
      } else {
        softmax_->ForwardTimeStep(curr_output, t, softmax_output);
      }
      output->WriteTimeStep(t, softmax_output);
      if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
        CodeInBinary(no_, nf_, softmax_output);
      }
    } else if (type_ == NT_LSTM_SUMMARY) {
      // Output only at the end of a row.
      if (src_index.IsLast(FD_WIDTH)) {
        output->WriteTimeStep(dest_index.t(), curr_output);
        dest_index.Increment();
      }
    } else {
      output->WriteTimeStep(t, curr_output);
    }
    // Save states for use by the 2nd dimension only if needed.
    if (Is2D()) {
      CopyVector(ns_, curr_state, states[mod_t]);
      CopyVector(ns_, curr_output, outputs[mod_t]);
    }
    // Always zero the states at the end of every row, but only for the major
    // direction. The 2-D state remains intact.
    if (src_index.IsLast(FD_WIDTH)) {
      ZeroVector<TFloat>(ns_, curr_state);
      ZeroVector<TFloat>(ns_, curr_output);
    }
  } while (src_index.Increment());
#if DEBUG_DETAIL > 0
  tprintf("Source:%s\n", name_.c_str());
  source_.Print(10);
  tprintf("State:%s\n", name_.c_str());
  state_.Print(10);
  tprintf("Output:%s\n", name_.c_str());
  output->Print(10);
#endif
#ifndef GRAPHICS_DISABLED
  if (debug) {
    DisplayForward(*output);
  }
#endif
}

◆ InitWeights()

int tesseract::LSTM::InitWeights	(	float	range,
		TRand *	randomizer
	)

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 175 of file lstm.cpp.

                                                    {
  Network::SetRandomizer(randomizer);
  num_weights_ = 0;
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    num_weights_ +=
        gate_weights_[w].InitWeightsFloat(ns_, na_ + 1, TestFlag(NF_ADAM), range, randomizer);
  }
  if (softmax_ != nullptr) {
    num_weights_ += softmax_->InitWeights(range, randomizer);
  }
  return num_weights_;
}

◆ Is2D()

bool tesseract::LSTM::Is2D ( ) const

inline

Definition at line 119 of file lstm.h.

                    {
    return is_2d_;
  }

◆ OutputShape()

StaticShape tesseract::LSTM::OutputShape ( const StaticShape & input_shape ) const

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 132 of file lstm.cpp.

                                                                  {
  StaticShape result = input_shape;
  result.set_depth(no_);
  if (type_ == NT_LSTM_SUMMARY) {
    result.set_width(1);
  }
  if (softmax_ != nullptr) {
    return softmax_->OutputShape(result);
  }
  return result;
}

◆ PrintDW()

void tesseract::LSTM::PrintDW ( )

◆ PrintW()

void tesseract::LSTM::PrintW ( )

◆ RemapOutputs()

int tesseract::LSTM::RemapOutputs	(	int	old_no,
		const std::vector< int > &	code_map
	)

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 193 of file lstm.cpp.

                                                                 {
  if (softmax_ != nullptr) {
    num_weights_ -= softmax_->num_weights();
    num_weights_ += softmax_->RemapOutputs(old_no, code_map);
  }
  return num_weights_;
}

◆ Serialize()

bool tesseract::LSTM::Serialize ( TFile * fp ) const

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 230 of file lstm.cpp.

                                    {
  if (!Network::Serialize(fp)) {
    return false;
  }
  if (!fp->Serialize(&na_)) {
    return false;
  }
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    if (!gate_weights_[w].Serialize(IsTraining(), fp)) {
      return false;
    }
  }
  if (softmax_ != nullptr && !softmax_->Serialize(fp)) {
    return false;
  }
  return true;
}

◆ SetEnableTraining()

void tesseract::LSTM::SetEnableTraining ( TrainingState state )

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 146 of file lstm.cpp.

                                                {
  if (state == TS_RE_ENABLE) {
    // Enable only from temp disabled.
    if (training_ == TS_TEMP_DISABLE) {
      training_ = TS_ENABLED;
    }
  } else if (state == TS_TEMP_DISABLE) {
    // Temp disable only from enabled.
    if (training_ == TS_ENABLED) {
      training_ = state;
    }
  } else {
    if (state == TS_ENABLED && training_ != TS_ENABLED) {
      for (int w = 0; w < WT_COUNT; ++w) {
        if (w == GFS && !Is2D()) {
          continue;
        }
        gate_weights_[w].InitBackward();
      }
    }
    training_ = state;
  }
  if (softmax_ != nullptr) {
    softmax_->SetEnableTraining(state);
  }
}

◆ spec()

std::string tesseract::LSTM::spec ( ) const

inlineoverridevirtual

Reimplemented from tesseract::Network.

Definition at line 58 of file lstm.h.

                                  {
    std::string spec;
    if (type_ == NT_LSTM) {
      spec += "Lfx" + std::to_string(ns_);
    } else if (type_ == NT_LSTM_SUMMARY) {
      spec += "Lfxs" + std::to_string(ns_);
    } else if (type_ == NT_LSTM_SOFTMAX) {
      spec += "LS" + std::to_string(ns_);
    } else if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
      spec += "LE" + std::to_string(ns_);
    }
    if (softmax_ != nullptr) {
      spec += softmax_->spec();
    }
    return spec;
  }

◆ Update()

void tesseract::LSTM::Update	(	float	learning_rate,
		float	momentum,
		float	adam_beta,
		int	num_samples
	)

overridevirtual

Reimplemented from tesseract::Network.

Definition at line 740 of file lstm.cpp.

                                                                                       {
#if DEBUG_DETAIL > 3
  PrintW();
#endif
  for (int w = 0; w < WT_COUNT; ++w) {
    if (w == GFS && !Is2D()) {
      continue;
    }
    gate_weights_[w].Update(learning_rate, momentum, adam_beta, num_samples);
  }
  if (softmax_ != nullptr) {
    softmax_->Update(learning_rate, momentum, adam_beta, num_samples);
  }
#if DEBUG_DETAIL > 3
  PrintDW();
#endif
}

The documentation for this class was generated from the following files:

/media/home/debian/src/github/tesseract-ocr/tesseract/src/lstm/lstm.h
/media/home/debian/src/github/tesseract-ocr/tesseract/src/lstm/lstm.cpp

Public Types

Public Member Functions

Additional Inherited Members

Detailed Description

Member Enumeration Documentation

◆ WeightType

Constructor & Destructor Documentation

◆ LSTM()

◆ ~LSTM()

Member Function Documentation

◆ Backward()

◆ ConvertToInt()

◆ CountAlternators()

◆ DebugWeights()

◆ DeSerialize()

◆ Forward()

◆ InitWeights()

◆ Is2D()

◆ OutputShape()

◆ PrintDW()

◆ PrintW()

◆ RemapOutputs()

◆ Serialize()

◆ SetEnableTraining()

◆ spec()

◆ Update()