30 return log(n) / log(2.0);
42 const auto dim1 = src.
dim1();
43 const auto dim2 = src.
dim2();
45 for (
int i = 0;
i < dim1; ++
i) {
46 const auto *src_i = src[
i];
48 for (
int j = 0; j < dim2; ++j) {
49 dst_i[j] =
static_cast<float>(src_i[j]);
55static void FloatToDouble(
const GENERIC_2D_ARRAY<float> &src, GENERIC_2D_ARRAY<double> &dst) {
56 const auto dim1 = src.dim1();
57 const auto dim2 = src.dim2();
58 dst.ResizeNoInit(dim1, dim2);
59 for (
int i = 0;
i < dim1; ++
i) {
60 const auto *src_i = src[
i];
62 for (
int j = 0; j < dim2; ++j) {
63 dst_i[j] =
static_cast<double>(src_i[j]);
68static bool DeSerialize(TFile *fp, GENERIC_2D_ARRAY<TFloat> &tfloat_array) {
70 GENERIC_2D_ARRAY<double> double_array;
71 if (!double_array.DeSerialize(fp)) {
74 DoubleToFloat(double_array, tfloat_array);
77 return tfloat_array.DeSerialize(fp);
81static bool Serialize(TFile *fp,
const GENERIC_2D_ARRAY<TFloat> &tfloat_array) {
83 GENERIC_2D_ARRAY<double> double_array;
84 FloatToDouble(tfloat_array, double_array);
85 return double_array.Serialize(fp);
87 return tfloat_array.Serialize(fp);
99static inline void MatrixDotVectorInternal(
const GENERIC_2D_ARRAY<TFloat> &w,
bool add_bias_fwd,
101 int num_results = w.dim1() - skip_bias_back;
102 int extent = w.dim2() - add_bias_fwd;
103 for (
int i = 0;
i < num_results; ++
i) {
115 int width = input.
dim1();
116 int num_features = input.
dim2();
118 for (
int t = 0; t < width; ++t) {
134 if (randomizer !=
nullptr) {
135 for (
int i = 0;
i < no; ++
i) {
136 for (
int j = 0; j < ni; ++j) {
141 use_adam_ = use_adam;
153 int old_no = wf_.
dim1();
154 int new_no = code_map.size();
156 std::vector<TFloat> means(ni, 0.0);
157 for (
int c = 0; c < old_no; ++c) {
158 const TFloat *weights = wf_[c];
159 for (
int i = 0;
i < ni; ++
i) {
160 means[
i] += weights[
i];
163 for (
auto &mean : means) {
166 wf_.
Resize(new_no, ni, 0.0);
169 int src = code_map[
dest];
170 const TFloat *src_data = src >= 0 ? old_wf[src] : means.data();
171 memcpy(wf_[
dest], src_data, ni *
sizeof(*src_data));
185 scales_.reserve(wi_.
dim1());
186 int dim2 = wi_.
dim2();
187 for (
int t = 0; t < wi_.
dim1(); ++t) {
189 int8_t *i_line = wi_[t];
191 for (
int f = 0; f < dim2; ++f) {
192 TFloat abs_val = fabs(f_line[f]);
193 if (abs_val > max_abs) {
197 TFloat scale = max_abs / INT8_MAX;
198 scales_.push_back(scale / INT8_MAX);
202 for (
int f = 0; f < dim2; ++f) {
209 int32_t rounded_num_out;
211 scales_.resize(rounded_num_out);
218 int no = int_mode_ ? wi_.
dim1() : wf_.
dim1();
219 int ni = int_mode_ ? wi_.
dim2() : wf_.
dim2();
221 updates_.
Resize(no, ni, 0.0);
224 dw_sq_sum_.
Resize(no, ni, 0.0);
249 uint32_t size = scales_.size();
253 for (
auto scale : scales_) {
257 double value = scale * INT8_MAX;
299 scales_.reserve(size);
300 for (
auto n = size; n > 0; n--) {
305 scales_.push_back(val / INT8_MAX);
308 scales_.resize(size);
312 for (
auto &scale : scales_) {
317 int32_t rounded_num_out;
319 scales_.resize(rounded_num_out);
352 std::vector<float> old_scales;
356 scales_.reserve(old_scales.size());
357 for (
float old_scale : old_scales) {
358 scales_.push_back(old_scale);
365 FloatToDouble(float_array, wf_);
373 FloatToDouble(float_array, updates_);
390 MatrixDotVectorInternal(wf_,
true,
false, u, v);
407 assert(wf_.
dim1() == 1);
410 for (
int i = 0;
i < n; ++
i) {
411 inout[
i] += u[
i] * v[
i];
421 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
432 int num_outputs = dw_.
dim1();
433 assert(u.
dim1() == num_outputs);
435 int num_inputs = dw_.
dim2() - 1;
436 int num_samples = u.
dim2();
438 assert(v.
dim1() == num_inputs);
440# pragma omp parallel for num_threads(4) if (in_parallel)
442 for (
int i = 0;
i < num_outputs; ++
i) {
445 for (
int j = 0; j < num_inputs; ++j) {
450 for (
int k = 0; k < num_samples; ++k) {
453 dwi[num_inputs] = total;
463 learning_rate *= sqrt(1.0f - pow(adam_beta, num_samples));
464 learning_rate /= 1.0f - pow(momentum, num_samples);
466 if (use_adam_ && num_samples > 0 && momentum > 0.0f) {
468 dw_ *= learning_rate * (1.0f - momentum);
469 updates_ *= momentum;
473 dw_ *= learning_rate;
475 if (momentum > 0.0f) {
478 if (momentum >= 0.0f) {
479 updates_ *= momentum;
487 assert(dw_.
dim1() == other.dw_.
dim1());
488 assert(dw_.
dim2() == other.dw_.
dim2());
497 int num_outputs = updates_.
dim1();
498 int num_inputs = updates_.
dim2();
499 assert(num_outputs == other.updates_.
dim1());
500 assert(num_inputs == other.updates_.
dim2());
501 for (
int i = 0;
i < num_outputs; ++
i) {
502 const TFloat *this_i = updates_[
i];
503 const TFloat *other_i = other.updates_[
i];
504 for (
int j = 0; j < num_inputs; ++j) {
505 TFloat product = this_i[j] * other_i[j];
518static void HistogramWeight(
TFloat weight,
STATS *histogram) {
521 TFloat logval = -log2(fabs(weight));
524 histogram->
add(bucket, 1);
530 for (
int i = 0;
i < wi_.
dim1(); ++
i) {
531 for (
int j = 0; j < wi_.
dim2(); ++j) {
532 HistogramWeight(wi_[
i][j] * scales_[
i], &histogram);
536 for (
int i = 0;
i < wf_.
dim1(); ++
i) {
537 for (
int j = 0; j < wf_.
dim2(); ++j) {
538 HistogramWeight(wf_[
i][j], &histogram);
const TFloat kAdamEpsilon
void tprintf(const char *format,...)
int IntCastRounded(double x)
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
bool Serialize(FILE *fp, const std::vector< T > &data)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
DotProductFunction DotProduct
const int kHistogramBuckets
const int kAdamCorrectionIterations
void AdamUpdate(const GENERIC_2D_ARRAY< T > &sum, const GENERIC_2D_ARRAY< T > &sqsum, const T &epsilon)
bool DeSerialize(bool swap, FILE *fp)
void SumSquares(const GENERIC_2D_ARRAY< T > &src, const T &decay_factor)
void Resize(int size1, int size2, const T &empty)
void ResizeNoInit(int size1, int size2, int pad=0)
bool Serialize(FILE *fp) const
static void MatrixDotVector(const GENERIC_2D_ARRAY< int8_t > &w, const std::vector< TFloat > &scales, const int8_t *u, TFloat *v)
MatrixDotVectorFunction matrixDotVectorFunction
static const IntSimdMatrix * intSimdMatrix
void Init(const GENERIC_2D_ARRAY< int8_t > &w, std::vector< int8_t > &shaped_w, int32_t &rounded_num_out) const
void add(int32_t value, int32_t count)
double SignedRand(double range)
bool DeSerialize(std::string &data)
bool Serialize(const std::string &data)
~TransposedArray() override
void Transpose(const GENERIC_2D_ARRAY< TFloat > &input)
void WriteStrided(int t, const float *data)
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
bool Serialize(bool training, TFile *fp) const
bool DeSerializeOld(bool training, TFile *fp)
void MultiplyAccumulate(const TFloat *v, TFloat *inout)
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
void Update(float learning_rate, float momentum, float adam_beta, int num_samples)
void Debug2D(const char *msg)
void AddDeltas(const WeightMatrix &other)
int RemapOutputs(const std::vector< int > &code_map)
void VectorDotMatrix(const TFloat *u, TFloat *v) const
void MatrixDotVector(const TFloat *u, TFloat *v) const
bool DeSerialize(bool training, TFile *fp)
void CountAlternators(const WeightMatrix &other, TFloat *same, TFloat *changed) const