30 #if (defined(_MSC_VER) && _MSC_VER < 1900) || defined(ANDROID) 31 static inline double log2(
double n) {
32 return log(n) / log(2.0);
43 int width = input.
dim1();
44 int num_features = input.
dim2();
46 for (
int t = 0; t < width; ++t)
WriteStrided(t, input[t]);
52 float weight_range,
TRand* randomizer) {
54 wf_.Resize(no, ni, 0.0);
55 if (randomizer !=
nullptr) {
56 for (
int i = 0; i < no; ++i) {
57 for (
int j = 0; j < ni; ++j) {
58 wf_[i][j] = randomizer->
SignedRand(weight_range);
74 int old_no = wf_.dim1();
75 int new_no = code_map.size();
77 std::vector<double> means(ni, 0.0);
78 for (
int c = 0; c < old_no; ++c) {
79 const double* weights = wf_[c];
80 for (
int i = 0; i < ni; ++i) means[i] += weights[i];
82 for (
double& mean : means) mean /= old_no;
83 wf_.ResizeNoInit(new_no, ni);
85 for (
int dest = 0; dest < new_no; ++dest) {
86 int src = code_map[dest];
87 const double* src_data = src >= 0 ? old_wf[src] : means.data();
88 memcpy(wf_[dest], src_data, ni *
sizeof(*src_data));
101 wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
102 scales_.init_to_size(wi_.dim1(), 0.0);
103 int dim2 = wi_.dim2();
104 for (
int t = 0; t < wi_.dim1(); ++t) {
105 double* f_line = wf_[t];
106 int8_t* i_line = wi_[t];
107 double max_abs = 0.0;
108 for (
int f = 0; f <
dim2; ++f) {
109 double abs_val = fabs(f_line[f]);
110 if (abs_val > max_abs) max_abs = abs_val;
112 double scale = max_abs / INT8_MAX;
114 if (scale == 0.0) scale = 1.0;
115 for (
int f = 0; f <
dim2; ++f) {
119 wf_.Resize(1, 1, 0.0);
122 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
128 int no = int_mode_ ? wi_.dim1() : wf_.dim1();
129 int ni = int_mode_ ? wi_.dim2() : wf_.dim2();
130 dw_.Resize(no, ni, 0.0);
131 updates_.Resize(no, ni, 0.0);
132 wf_t_.Transpose(wf_);
133 if (use_adam_) dw_sq_sum_.Resize(no, ni, 0.0);
151 if (fp->
FWrite(&mode,
sizeof(mode), 1) != 1)
return false;
153 if (!wi_.Serialize(fp))
return false;
154 if (!scales_.Serialize(fp))
return false;
156 if (!wf_.Serialize(fp))
return false;
157 if (training && !updates_.Serialize(fp))
return false;
158 if (training && use_adam_ && !dw_sq_sum_.Serialize(fp))
return false;
167 if (fp->
FRead(&mode,
sizeof(mode), 1) != 1)
return false;
170 if ((mode & kDoubleFlag) == 0)
return DeSerializeOld(training, fp);
172 if (!wi_.DeSerialize(fp))
return false;
173 if (!scales_.DeSerialize(fp))
return false;
175 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
177 if (!wf_.DeSerialize(fp))
return false;
180 if (!updates_.DeSerialize(fp))
return false;
181 if (use_adam_ && !dw_sq_sum_.DeSerialize(fp))
return false;
192 if (!wi_.DeSerialize(fp))
return false;
196 for (
int i = 0; i < old_scales.
size(); ++i) scales_[i] = old_scales[i];
199 FloatToDouble(float_array, &wf_);
204 FloatToDouble(float_array, &updates_);
218 MatrixDotVectorInternal(wf_,
true,
false, u, v);
224 multiplier_->MatrixDotVector(wi_, scales_, u, v);
233 const double* u = wf_[0];
234 for (
int i = 0; i < n; ++i) {
235 inout[i] += u[i] * v[i];
245 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
257 int num_outputs = dw_.dim1();
260 int num_inputs = dw_.dim2() - 1;
261 int num_samples = u.
dim2();
265 #pragma omp parallel for num_threads(4) if (in_parallel) 267 for (
int i = 0; i < num_outputs; ++i) {
268 double* dwi = dw_[i];
269 const double* ui = u[i];
270 for (
int j = 0; j < num_inputs; ++j) {
271 dwi[j] = DotProduct(ui, v[j], num_samples);
275 for (
int k = 0; k < num_samples; ++k) total += ui[k];
276 dwi[num_inputs] = total;
284 double adam_beta,
int num_samples) {
286 if (use_adam_ && num_samples > 0 && num_samples < kAdamCorrectionIterations) {
287 learning_rate *= sqrt(1.0 - pow(adam_beta, num_samples));
288 learning_rate /= 1.0 - pow(momentum, num_samples);
290 if (use_adam_ && num_samples > 0 && momentum > 0.0) {
291 dw_sq_sum_.SumSquares(dw_, adam_beta);
292 dw_ *= learning_rate * (1.0 - momentum);
293 updates_ *= momentum;
295 wf_.AdamUpdate(updates_, dw_sq_sum_, learning_rate * kAdamEpsilon);
297 dw_ *= learning_rate;
299 if (momentum > 0.0) wf_ += updates_;
300 if (momentum >= 0.0) updates_ *= momentum;
302 wf_t_.Transpose(wf_);
316 double* changed)
const {
317 int num_outputs = updates_.dim1();
318 int num_inputs = updates_.dim2();
321 for (
int i = 0; i < num_outputs; ++i) {
322 const double* this_i = updates_[i];
323 const double* other_i = other.updates_[i];
324 for (
int j = 0; j < num_inputs; ++j) {
325 double product = this_i[j] * other_i[j];
337 static void HistogramWeight(
double weight,
STATS* histogram) {
338 int bucket = kHistogramBuckets - 1;
340 double logval = -log2(fabs(weight));
343 histogram->
add(bucket, 1);
347 STATS histogram(0, kHistogramBuckets);
349 for (
int i = 0; i < wi_.dim1(); ++i) {
350 for (
int j = 0; j < wi_.dim2(); ++j) {
351 HistogramWeight(wi_[i][j] * scales_[i], &histogram);
355 for (
int i = 0; i < wf_.dim1(); ++i) {
356 for (
int j = 0; j < wf_.dim2(); ++j) {
357 HistogramWeight(wf_[i][j], &histogram);
380 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
392 for (
int i = 0; i <
dim1; ++i) {
393 const float* wfi = wf[i];
394 double* wdi = (*wd)[i];
395 for (
int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
409 bool skip_bias_back,
const double* u,
411 int num_results = w.
dim1() - skip_bias_back;
412 int extent = w.
dim2() - add_bias_fwd;
413 for (
int i = 0; i < num_results; ++i) {
414 const double* wi = w[i];
415 double total = DotProduct(wi, u, extent);
416 if (add_bias_fwd) total += wi[extent];
const int kHistogramBuckets
void ResizeNoInit(int size1, int size2, int pad=0)
static bool IsAVXAvailable()
void Debug2D(const char *msg)
void Transpose(const GENERIC_2D_ARRAY< double > &input)
static IntSimdMatrix * GetFastestMultiplier()
void AddDeltas(const WeightMatrix &other)
int FRead(void *buffer, int size, int count)
static bool IsSSEAvailable()
double SignedRand(double range)
bool DeSerialize(bool swap, FILE *fp)
const int kAdamCorrectionIterations
int RemapOutputs(const std::vector< int > &code_map)
void Update(double learning_rate, double momentum, double adam_beta, int num_samples)
void VectorDotMatrix(const double *u, double *v) const
const double kAdamEpsilon
void resize_no_init(int size)
void MultiplyAccumulate(const double *v, double *inout)
bool Serialize(bool training, TFile *fp) const
bool DeSerialize(bool training, TFile *fp)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
bool DeSerialize(bool swap, FILE *fp)
static double DotProduct(const double *u, const double *v, int n)
void add(int32_t value, int32_t count)
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
double DotProductSSE(const double *u, const double *v, int n)
int IntCastRounded(double x)
static void FloatToDouble(const GENERIC_2D_ARRAY< float > &wf, GENERIC_2D_ARRAY< double > *wd)
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
int FWrite(const void *buffer, int size, int count)
double DotProductAVX(const double *u, const double *v, int n)
bool DeSerializeOld(bool training, TFile *fp)
void WriteStrided(int t, const float *data)
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
void MatrixDotVector(const double *u, double *v) const