37 int width = input.
dim1();
38 int num_features = input.
dim2();
40 for (
int t = 0; t < width; ++t)
WriteStrided(t, input[t]);
46 float weight_range,
TRand* randomizer) {
48 wf_.Resize(no, ni, 0.0);
49 if (randomizer != NULL) {
50 for (
int i = 0; i < no; ++i) {
51 for (
int j = 0; j < ni; ++j) {
52 wf_[i][j] = randomizer->
SignedRand(weight_range);
68 int old_no = wf_.dim1();
69 int new_no = code_map.size();
71 std::vector<double> means(ni, 0.0);
72 for (
int c = 0; c < old_no; ++c) {
73 const double* weights = wf_[c];
74 for (
int i = 0; i < ni; ++i) means[i] += weights[i];
76 for (
double& mean : means) mean /= old_no;
77 wf_.ResizeNoInit(new_no, ni);
79 for (
int dest = 0; dest < new_no; ++dest) {
80 int src = code_map[dest];
81 const double* src_data = src >= 0 ? old_wf[src] : means.data();
82 memcpy(wf_[dest], src_data, ni *
sizeof(*src_data));
95 wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
96 scales_.init_to_size(wi_.dim1(), 0.0);
97 int dim2 = wi_.dim2();
98 for (
int t = 0; t < wi_.dim1(); ++t) {
99 double* f_line = wf_[t];
100 inT8* i_line = wi_[t];
101 double max_abs = 0.0;
102 for (
int f = 0; f <
dim2; ++f) {
103 double abs_val = fabs(f_line[f]);
104 if (abs_val > max_abs) max_abs = abs_val;
108 if (scale == 0.0) scale = 1.0;
109 for (
int f = 0; f <
dim2; ++f) {
113 wf_.Resize(1, 1, 0.0);
116 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
122 int no = int_mode_ ? wi_.dim1() : wf_.dim1();
123 int ni = int_mode_ ? wi_.dim2() : wf_.dim2();
124 dw_.Resize(no, ni, 0.0);
125 updates_.Resize(no, ni, 0.0);
126 wf_t_.Transpose(wf_);
127 if (use_adam_) dw_sq_sum_.Resize(no, ni, 0.0);
145 if (fp->
FWrite(&mode,
sizeof(mode), 1) != 1)
return false;
147 if (!wi_.Serialize(fp))
return false;
148 if (!scales_.Serialize(fp))
return false;
150 if (!wf_.Serialize(fp))
return false;
151 if (training && !updates_.Serialize(fp))
return false;
152 if (training && use_adam_ && !dw_sq_sum_.Serialize(fp))
return false;
161 if (fp->
FRead(&mode,
sizeof(mode), 1) != 1)
return false;
164 if ((mode & kDoubleFlag) == 0)
return DeSerializeOld(training, fp);
166 if (!wi_.DeSerialize(fp))
return false;
167 if (!scales_.DeSerialize(fp))
return false;
169 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
171 if (!wf_.DeSerialize(fp))
return false;
174 if (!updates_.DeSerialize(fp))
return false;
175 if (use_adam_ && !dw_sq_sum_.DeSerialize(fp))
return false;
186 if (!wi_.DeSerialize(fp))
return false;
190 for (
int i = 0; i < old_scales.
size(); ++i) scales_[i] = old_scales[i];
193 FloatToDouble(float_array, &wf_);
198 FloatToDouble(float_array, &updates_);
212 MatrixDotVectorInternal(wf_,
true,
false, u, v);
218 multiplier_->MatrixDotVector(wi_, scales_, u, v);
227 const double* u = wf_[0];
228 for (
int i = 0; i < n; ++i) {
229 inout[i] += u[i] * v[i];
239 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
251 int num_outputs = dw_.dim1();
254 int num_inputs = dw_.dim2() - 1;
255 int num_samples = u.
dim2();
259 #pragma omp parallel for num_threads(4) if (in_parallel) 261 for (
int i = 0; i < num_outputs; ++i) {
262 double* dwi = dw_[i];
263 const double* ui = u[i];
264 for (
int j = 0; j < num_inputs; ++j) {
265 dwi[j] = DotProduct(ui, v[j], num_samples);
269 for (
int k = 0; k < num_samples; ++k) total += ui[k];
270 dwi[num_inputs] = total;
278 double adam_beta,
int num_samples) {
280 if (use_adam_ && num_samples > 0 && num_samples < kAdamCorrectionIterations) {
281 learning_rate *= sqrt(1.0 - pow(adam_beta, num_samples));
282 learning_rate /= 1.0 - pow(momentum, num_samples);
284 if (use_adam_ && num_samples > 0 && momentum > 0.0) {
285 dw_sq_sum_.SumSquares(dw_, adam_beta);
286 dw_ *= learning_rate * (1.0 - momentum);
287 updates_ *= momentum;
289 wf_.AdamUpdate(updates_, dw_sq_sum_, learning_rate * kAdamEpsilon);
291 dw_ *= learning_rate;
293 if (momentum > 0.0) wf_ += updates_;
294 if (momentum >= 0.0) updates_ *= momentum;
296 wf_t_.Transpose(wf_);
310 double* changed)
const {
311 int num_outputs = updates_.dim1();
312 int num_inputs = updates_.dim2();
315 for (
int i = 0; i < num_outputs; ++i) {
316 const double* this_i = updates_[i];
317 const double* other_i = other.updates_[i];
318 for (
int j = 0; j < num_inputs; ++j) {
319 double product = this_i[j] * other_i[j];
331 static void HistogramWeight(
double weight,
STATS* histogram) {
332 int bucket = kHistogramBuckets - 1;
334 double logval = -log2(fabs(weight));
337 histogram->
add(bucket, 1);
341 STATS histogram(0, kHistogramBuckets);
343 for (
int i = 0; i < wi_.dim1(); ++i) {
344 for (
int j = 0; j < wi_.dim2(); ++j) {
345 HistogramWeight(wi_[i][j] * scales_[i], &histogram);
349 for (
int i = 0; i < wf_.dim1(); ++i) {
350 for (
int j = 0; j < wf_.dim2(); ++j) {
351 HistogramWeight(wf_[i][j], &histogram);
374 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
386 for (
int i = 0; i <
dim1; ++i) {
387 const float* wfi = wf[i];
388 double* wdi = (*wd)[i];
389 for (
int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
403 bool skip_bias_back,
const double* u,
405 int num_results = w.
dim1() - skip_bias_back;
406 int extent = w.
dim2() - add_bias_fwd;
407 for (
int i = 0; i < num_results; ++i) {
408 const double* wi = w[i];
409 double total = DotProduct(wi, u, extent);
410 if (add_bias_fwd) total += wi[extent];
static bool IsSSEAvailable()
int FWrite(const void *buffer, int size, int count)
void add(inT32 value, inT32 count)
double DotProductSSE(const double *u, const double *v, int n)
int RemapOutputs(const std::vector< int > &code_map)
void ResizeNoInit(int size1, int size2, int pad=0)
static double DotProduct(const double *u, const double *v, int n)
void resize_no_init(int size)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void MatrixDotVector(const double *u, double *v) const
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
void Transpose(const GENERIC_2D_ARRAY< double > &input)
void VectorDotMatrix(const double *u, double *v) const
double DotProductAVX(const double *u, const double *v, int n)
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
static void FloatToDouble(const GENERIC_2D_ARRAY< float > &wf, GENERIC_2D_ARRAY< double > *wd)
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
double SignedRand(double range)
const int kHistogramBuckets
bool DeSerialize(bool swap, FILE *fp)
const double kAdamEpsilon
void Debug2D(const char *msg)
void Update(double learning_rate, double momentum, double adam_beta, int num_samples)
bool DeSerialize(bool swap, FILE *fp)
void AddDeltas(const WeightMatrix &other)
const int kAdamCorrectionIterations
static bool IsAVXAvailable()
void WriteStrided(int t, const float *data)
bool DeSerialize(bool training, TFile *fp)
int IntCastRounded(double x)
static IntSimdMatrix * GetFastestMultiplier()
bool DeSerializeOld(bool training, TFile *fp)
int FRead(void *buffer, int size, int count)
void MultiplyAccumulate(const double *v, double *inout)
bool Serialize(bool training, TFile *fp) const