34 #define PARALLEL_IF_OPENMP(__num_threads) \ 35 PRAGMA(omp parallel if (__num_threads > 1) num_threads(__num_threads)) { \ 36 PRAGMA(omp sections nowait) { \ 38 #define SECTION_IF_OPENMP \ 43 #define END_PARALLEL_IF_OPENMP \ 49 #ifdef _MSC_VER // Different _Pragma 50 #define PRAGMA(x) __pragma(x) 52 #define PRAGMA(x) _Pragma(#x) 56 #define PARALLEL_IF_OPENMP(__num_threads) 57 #define SECTION_IF_OPENMP 58 #define END_PARALLEL_IF_OPENMP 76 is_2d_(two_dimensional),
79 if (two_dimensional) na_ += ns_;
88 tprintf(
"%d is invalid type of LSTM!\n", type);
102 if (softmax_ != NULL)
return softmax_->
OutputShape(result);
117 for (
int w = 0; w <
WT_COUNT; ++w) {
118 if (w ==
GFS && !
Is2D())
continue;
132 for (
int w = 0; w <
WT_COUNT; ++w) {
133 if (w ==
GFS && !
Is2D())
continue;
137 if (softmax_ != NULL) {
146 if (softmax_ != NULL) {
155 for (
int w = 0; w <
WT_COUNT; ++w) {
156 if (w ==
GFS && !
Is2D())
continue;
159 if (softmax_ != NULL) {
166 for (
int w = 0; w <
WT_COUNT; ++w) {
167 if (w ==
GFS && !
Is2D())
continue;
172 if (softmax_ != NULL) {
180 if (fp->
FWrite(&na_,
sizeof(na_), 1) != 1)
return false;
181 for (
int w = 0; w <
WT_COUNT; ++w) {
182 if (w ==
GFS && !
Is2D())
continue;
185 if (softmax_ != NULL && !softmax_->
Serialize(fp))
return false;
192 if (fp->
FReadEndian(&na_,
sizeof(na_), 1) != 1)
return false;
201 for (
int w = 0; w <
WT_COUNT; ++w) {
202 if (w ==
GFS && !
Is2D())
continue;
206 is_2d_ = na_ - nf_ ==
ni_ + 2 * ns_;
212 if (softmax_ ==
nullptr)
return false;
225 input_width_ = input.
Width();
226 if (softmax_ != NULL)
232 ResizeForward(input);
235 for (
int i = 0; i <
WT_COUNT; ++i) temp_lines[i].Init(ns_, scratch);
238 curr_state.
Init(ns_, scratch);
239 ZeroVector<double>(ns_, curr_state);
240 curr_output.
Init(ns_, scratch);
241 ZeroVector<double>(ns_, curr_output);
250 for (
int i = 0; i < buf_width; ++i) {
251 states[i].Init(ns_, scratch);
252 ZeroVector<double>(ns_, states[i]);
253 outputs[i].Init(ns_, scratch);
254 ZeroVector<double>(ns_, outputs[i]);
260 if (softmax_ != NULL) {
261 softmax_output.Init(
no_, scratch);
262 ZeroVector<double>(
no_, softmax_output);
263 int rounded_softmax_inputs = gate_weights_[
CI].
RoundInputs(ns_);
265 int_output.
Resize2d(
true, 1, rounded_softmax_inputs, scratch);
269 curr_input.
Init(na_, scratch);
274 int t = src_index.
t();
276 bool valid_2d =
Is2D();
282 int mod_t =
Modulo(t, buf_width);
285 if (softmax_ != NULL) {
299 gate_weights_[
CI].MatrixDotVector(source_.
i(t), temp_lines[
CI]);
302 FuncInplace<GFunc>(ns_, temp_lines[
CI]);
307 gate_weights_[
GI].MatrixDotVector(source_.
i(t), temp_lines[
GI]);
310 FuncInplace<FFunc>(ns_, temp_lines[
GI]);
315 gate_weights_[
GF1].MatrixDotVector(source_.
i(t), temp_lines[
GF1]);
318 FuncInplace<FFunc>(ns_, temp_lines[
GF1]);
323 gate_weights_[
GFS].MatrixDotVector(source_.
i(t), temp_lines[
GFS]);
326 FuncInplace<FFunc>(ns_, temp_lines[
GFS]);
332 gate_weights_[
GO].MatrixDotVector(source_.
i(t), temp_lines[
GO]);
335 FuncInplace<FFunc>(ns_, temp_lines[
GO]);
342 inT8* which_fg_col = which_fg_[t];
343 memset(which_fg_col, 1, ns_ *
sizeof(which_fg_col[0]));
345 const double* stepped_state = states[mod_t];
346 for (
int i = 0; i < ns_; ++i) {
347 if (temp_lines[GF1][i] < temp_lines[
GFS][i]) {
348 curr_state[i] = temp_lines[
GFS][i] * stepped_state[i];
365 FuncMultiply<HFunc>(curr_state, temp_lines[
GO], ns_, curr_output);
367 if (softmax_ != NULL) {
382 dest_index.Increment();
395 ZeroVector<double>(ns_, curr_state);
396 ZeroVector<double>(ns_, curr_output);
420 outputerr.
Init(ns_, scratch);
423 curr_stateerr.
Init(ns_, scratch);
424 curr_sourceerr.
Init(na_, scratch);
425 ZeroVector<double>(ns_, curr_stateerr);
426 ZeroVector<double>(na_, curr_sourceerr);
429 for (
int g = 0; g <
WT_COUNT; ++g) gate_errors[g].Init(ns_, scratch);
437 for (
int t = 0; t < buf_width; ++t) {
438 stateerr[t].Init(ns_, scratch);
439 sourceerr[t].Init(na_, scratch);
440 ZeroVector<double>(ns_, stateerr[t]);
441 ZeroVector<double>(na_, sourceerr[t]);
447 sourceerr_temps[w].Init(na_, scratch);
448 int width = input_width_;
451 for (
int w = 0; w <
WT_COUNT; ++w) {
452 gate_errors_t[w].
Init(ns_, width, scratch);
457 if (softmax_ != NULL) {
458 softmax_errors.
Init(
no_, scratch);
459 softmax_errors_t.
Init(
no_, width, scratch);
461 double state_clip =
Is2D() ? 9.0 : 4.0;
464 fwd_deltas.
Print(10);
472 int t = dest_index.
t();
489 int mod_t =
Modulo(t, buf_width);
492 ZeroVector<double>(na_, curr_sourceerr);
493 ZeroVector<double>(ns_, curr_stateerr);
499 src_index.Decrement();
501 ZeroVector<double>(ns_, outputerr);
503 }
else if (softmax_ == NULL) {
507 softmax_errors_t.
get(), outputerr);
515 const float* next_node_gf1 = node_values_[
GF1].
f(t + 1);
516 for (
int i = 0; i < ns_; ++i) {
517 curr_stateerr[i] *= next_node_gf1[i];
520 if (
Is2D() && t + 1 < width) {
521 for (
int i = 0; i < ns_; ++i) {
522 if (which_fg_[t + 1][i] != 1) curr_stateerr[i] = 0.0;
525 const float* right_node_gfs = node_values_[
GFS].
f(down_pos);
526 const double* right_stateerr = stateerr[mod_t];
527 for (
int i = 0; i < ns_; ++i) {
528 if (which_fg_[down_pos][i] == 2) {
529 curr_stateerr[i] += right_stateerr[i] * right_node_gfs[i];
537 ClipVector<double>(ns_, -state_clip, state_clip, curr_stateerr);
539 if (t + 10 > width) {
541 for (
int i = 0; i < ns_; ++i)
542 tprintf(
" %g,%g,%g", curr_stateerr[i], outputerr[i],
543 curr_sourceerr[
ni_ + nf_ + i]);
551 node_values_[
CI].FuncMultiply3<
GPrime>(t, node_values_[
GI], t,
552 curr_stateerr, gate_errors[
CI]);
553 ClipVector(ns_, -kErrClip, kErrClip, gate_errors[
CI].
get());
559 node_values_[
GI].FuncMultiply3<
FPrime>(t, node_values_[
CI], t,
560 curr_stateerr, gate_errors[
GI]);
561 ClipVector(ns_, -kErrClip, kErrClip, gate_errors[
GI].
get());
568 node_values_[
GF1].FuncMultiply3<
FPrime>(t, state_, t - 1, curr_stateerr,
570 ClipVector(ns_, -kErrClip, kErrClip, gate_errors[
GF1].
get());
572 sourceerr_temps[GF1]);
574 memset(gate_errors[
GF1], 0, ns_ *
sizeof(gate_errors[GF1][0]));
575 memset(sourceerr_temps[GF1], 0, na_ *
sizeof(*sourceerr_temps[GF1]));
581 node_values_[
GFS].FuncMultiply3<
FPrime>(t, state_, up_pos, curr_stateerr,
583 ClipVector(ns_, -kErrClip, kErrClip, gate_errors[
GFS].
get());
585 sourceerr_temps[GFS]);
587 memset(gate_errors[
GFS], 0, ns_ *
sizeof(gate_errors[GFS][0]));
588 memset(sourceerr_temps[GFS], 0, na_ *
sizeof(*sourceerr_temps[GFS]));
596 ClipVector(ns_, -kErrClip, kErrClip, gate_errors[
GO].
get());
601 SumVectors(na_, sourceerr_temps[CI], sourceerr_temps[GI],
602 sourceerr_temps[GF1], sourceerr_temps[GO], sourceerr_temps[GFS],
607 CopyVector(ns_, curr_stateerr, stateerr[mod_t]);
608 CopyVector(na_, curr_sourceerr, sourceerr[mod_t]);
612 for (
int w = 0; w <
WT_COUNT; ++w) {
619 source_t.
Init(na_, width, scratch);
621 state_t.
Init(ns_, width, scratch);
624 #pragma omp parallel for num_threads(GFS) if (!Is2D()) 626 for (
int w = 0; w <
WT_COUNT; ++w) {
627 if (w ==
GFS && !
Is2D())
continue;
630 if (softmax_ != NULL) {
638 void LSTM::Update(
float learning_rate,
float momentum,
float adam_beta,
643 for (
int w = 0; w <
WT_COUNT; ++w) {
644 if (w ==
GFS && !
Is2D())
continue;
645 gate_weights_[w].
Update(learning_rate, momentum, adam_beta, num_samples);
647 if (softmax_ != NULL) {
648 softmax_->
Update(learning_rate, momentum, adam_beta, num_samples);
659 double* changed)
const {
661 const LSTM* lstm =
static_cast<const LSTM*
>(&other);
662 for (
int w = 0; w <
WT_COUNT; ++w) {
663 if (w ==
GFS && !
Is2D())
continue;
666 if (softmax_ != NULL) {
674 for (
int w = 0; w <
WT_COUNT; ++w) {
675 if (w ==
GFS && !
Is2D())
continue;
676 tprintf(
"Gate %d, inputs\n", w);
677 for (
int i = 0; i <
ni_; ++i) {
679 for (
int s = 0; s < ns_; ++s)
680 tprintf(
" %g", gate_weights_[w].GetWeights(s)[i]);
683 tprintf(
"Gate %d, outputs\n", w);
684 for (
int i = ni_; i < ni_ + ns_; ++i) {
686 for (
int s = 0; s < ns_; ++s)
687 tprintf(
" %g", gate_weights_[w].GetWeights(s)[i]);
691 for (
int s = 0; s < ns_; ++s)
692 tprintf(
" %g", gate_weights_[w].GetWeights(s)[na_]);
700 for (
int w = 0; w <
WT_COUNT; ++w) {
701 if (w ==
GFS && !
Is2D())
continue;
702 tprintf(
"Gate %d, inputs\n", w);
703 for (
int i = 0; i <
ni_; ++i) {
705 for (
int s = 0; s < ns_; ++s)
706 tprintf(
" %g", gate_weights_[w].GetDW(s, i));
709 tprintf(
"Gate %d, outputs\n", w);
710 for (
int i = ni_; i < ni_ + ns_; ++i) {
712 for (
int s = 0; s < ns_; ++s)
713 tprintf(
" %g", gate_weights_[w].GetDW(s, i));
717 for (
int s = 0; s < ns_; ++s)
718 tprintf(
" %g", gate_weights_[w].GetDW(s, na_));
724 void LSTM::ResizeForward(
const NetworkIO& input) {
726 source_.
Resize(input, rounded_inputs);
730 for (
int w = 0; w <
WT_COUNT; ++w) {
731 if (w ==
GFS && !
Is2D())
continue;
virtual void SetEnableTraining(TrainingState state)
virtual int InitWeights(float range, TRand *randomizer)
void CopyVector(int n, const double *src, double *dest)
virtual bool Serialize(TFile *fp) const
int FWrite(const void *buffer, int size, int count)
void Resize(const NetworkIO &src, int num_features)
virtual StaticShape OutputShape(const StaticShape &input_shape) const
virtual int InitWeights(float range, TRand *randomizer)
virtual void ConvertToInt()
bool IsLast(FlexDimensions dimension) const
virtual void CountAlternators(const Network &other, double *same, double *changed) const
void BackwardTimeStep(const NetworkIO &fwd_deltas, int t, double *curr_errors, TransposedArray *errors_t, double *backprop)
void ResizeNoInit(int size1, int size2, int pad=0)
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
void ClipVector(int n, T lower, T upper, T *vec)
virtual void ConvertToInt()
virtual void SetRandomizer(TRand *randomizer)
void DisplayForward(const NetworkIO &matrix)
virtual StaticShape OutputShape(const StaticShape &input_shape) const
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
void ForwardTimeStep(const double *d_input, const inT8 *i_input, int t, double *output_line)
void Resize2d(bool int_mode, int width, int num_features, NetworkScratch *scratch)
void add_str_int(const char *str, int number)
virtual bool Serialize(TFile *fp) const
void ResizeXTo1(const NetworkIO &src, int num_features)
LSTM(const STRING &name, int num_inputs, int num_states, int num_outputs, bool two_dimensional, NetworkType type)
virtual void SetEnableTraining(TrainingState state)
bool TestFlag(NetworkFlags flag) const
void MatrixDotVector(const double *u, double *v) const
void PrintUnTransposed(int num)
#define SECTION_IF_OPENMP
int Size(FlexDimensions dimension) const
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
#define PARALLEL_IF_OPENMP(__num_threads)
virtual void CountAlternators(const Network &other, double *same, double *changed) const
void FinishBackward(const TransposedArray &errors_t)
const StrideMap & stride_map() const
void FuncMultiply3Add(const NetworkIO &v_io, int t, const double *w, double *product) const
void Func2Multiply3(const NetworkIO &v_io, int t, const double *w, double *product) const
void VectorDotMatrix(const double *u, double *v) const
void Init(int size1, int size2, NetworkScratch *scratch)
virtual bool Serialize(TFile *fp) const
void ReadTimeStep(int t, double *output) const
void MultiplyVectorsInPlace(int n, const double *src, double *inout)
TransposedArray * get() const
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
const char * string() const
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
void Transpose(TransposedArray *dest) const
void set_width(int value)
virtual bool DeSerialize(TFile *fp)
void WriteTimeStep(int t, const double *input)
void Debug2D(const char *msg)
void Update(double learning_rate, double momentum, double adam_beta, int num_samples)
int RemapOutputs(int old_no, const std::vector< int > &code_map) override
void ResizeFloat(const NetworkIO &src, int num_features)
int RemapOutputs(int old_no, const std::vector< int > &code_map) override
static Network * CreateFromFile(TFile *fp)
int FReadEndian(void *buffer, int size, int count)
void MultiplyAccumulate(int n, const double *u, const double *v, double *out)
void DisplayBackward(const NetworkIO &matrix)
void Print(int num) const
virtual void DebugWeights()
void Init(int size, NetworkScratch *scratch)
int index(FlexDimensions dimension) const
void set_depth(int value)
#define END_PARALLEL_IF_OPENMP
void SumVectors(int n, const double *v1, const double *v2, const double *v3, const double *v4, const double *v5, double *sum)
bool AddOffset(int offset, FlexDimensions dimension)
void SetupForward(const NetworkIO &input, const TransposedArray *input_transpose)
int RoundInputs(int size) const
void Update(float learning_rate, float momentum, float adam_beta, int num_samples) override
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)
void Update(float learning_rate, float momentum, float adam_beta, int num_samples) override
void WriteStrided(int t, const float *data)
virtual void DebugWeights()
int IntCastRounded(double x)
void init_to_size(int size, T t)
void AccumulateVector(int n, const double *src, double *dest)
void CodeInBinary(int n, int nf, double *vec)
virtual bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, NetworkIO *back_deltas)
const inT8 * i(int t) const