22#include <allheaders.h>
39 i_.
ResizeNoInit(width, num_features, GetPadding(num_features));
78 for (
int t = 0; t < width; ++t) {
93 if (end_x < full_width) {
96 int fill_size = num_features * (full_width - end_x);
108 if (end_y < full_height) {
112 int fill_size = num_features * full_width * (full_height - end_y);
127static void ComputeBlackWhite(
Image pix,
float *black,
float *white) {
128 int width = pixGetWidth(pix);
129 int height = pixGetHeight(pix);
130 STATS mins(0, 255), maxes(0, 255);
133 l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) *
y;
134 int prev = GET_DATA_BYTE(line, 0);
135 int curr = GET_DATA_BYTE(line, 1);
136 for (
int x = 1;
x + 1 < width; ++
x) {
137 int next = GET_DATA_BYTE(line,
x + 1);
138 if ((curr < prev && curr <=
next) || (curr <= prev && curr <
next)) {
142 if ((curr > prev && curr >=
next) || (curr >= prev && curr >
next)) {
150 if (mins.get_total() == 0) {
153 if (maxes.get_total() == 0) {
156 *black = mins.ile(0.25);
157 *white = maxes.ile(0.75);
164 std::vector<Image> pixes(1, pix);
173 int target_height = shape.
height();
174 int target_width = shape.
width();
175 std::vector<std::pair<int, int>> h_w_pairs;
176 for (
auto &&pix : pixes) {
178 int width = pixGetWidth(var_pix);
179 if (target_width != 0) {
180 width = target_width;
182 int height = pixGetHeight(var_pix);
183 if (target_height != 0) {
184 height = target_height;
186 h_w_pairs.emplace_back(height, width);
191 for (
size_t b = 0; b < pixes.size(); ++b) {
192 Image pix = pixes[b];
193 float black = 0.0f, white = 255.0f;
194 if (shape.
depth() != 3) {
195 ComputeBlackWhite(pix, &black, &white);
197 float contrast = (white - black) / 2.0f;
198 if (contrast <= 0.0f) {
201 if (shape.
height() == 1) {
217 int width = pixGetWidth(pix);
218 int height = pixGetHeight(pix);
219 int wpl = pixGetWpl(pix);
226 bool color = num_features == 3;
227 if (width > target_width) {
228 width = target_width;
230 uint32_t *line = pixGetData(pix);
231 for (
int y = 0;
y < target_height; ++
y, line += wpl) {
234 for (
x = 0;
x < width; ++
x, ++t) {
237 for (
int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
238 int pixel = GET_DATA_BYTE(line +
x, c);
239 SetPixel(t,
f++, pixel, black, contrast);
242 int pixel = GET_DATA_BYTE(line,
x);
243 SetPixel(t, 0, pixel, black, contrast);
247 for (;
x < target_width; ++
x) {
248 Randomize(t++, 0, num_features, randomizer);
259 int width = pixGetWidth(pix);
260 int height = pixGetHeight(pix);
262 int wpl = pixGetWpl(pix);
267 if (width > target_width) {
268 width = target_width;
271 for (
x = 0;
x < width; ++
x, ++t) {
272 for (
int y = 0;
y < height; ++
y) {
273 uint32_t *line = pixGetData(pix) + wpl *
y;
274 int pixel = GET_DATA_BYTE(line,
x);
278 for (;
x < target_width; ++
x) {
291 float float_pixel = (pixel - black) / contrast - 1.0f;
293 i_[t][
f] = ClipToRange<int>(
IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);
295 f_[t][
f] = float_pixel;
305 int feature_factor = 1;
306 if (num_features == 3) {
311 Image pix = pixCreate(im_width, im_height * num_features, 32);
319 const int8_t *features = i_[t];
320 for (
int y = 0;
y < num_features; ++
y, im_y += im_height) {
321 int pixel = features[
y * feature_factor];
323 int red = ClipToRange<int>(pixel + 128, 0, 255);
324 int green = red, blue = red;
325 if (feature_factor == 3) {
327 green = ClipToRange<int>(features[
y * feature_factor + 1] + 128, 0, 255);
328 blue = ClipToRange<int>(features[
y * feature_factor + 2] + 128, 0, 255);
329 }
else if (num_features > 3) {
332 red = abs(pixel) * 2;
341 pixSetPixel(pix, im_x, im_y,
342 (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
345 const float *features = f_[t];
346 for (
int y = 0;
y < num_features; ++
y, im_y += im_height) {
347 float pixel = features[
y * feature_factor];
349 int red = ClipToRange<int>(
IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
350 int green = red, blue = red;
351 if (feature_factor == 3) {
353 pixel = features[
y * feature_factor + 1];
354 green = ClipToRange<int>(
IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
355 pixel = features[
y * feature_factor + 2];
356 blue = ClipToRange<int>(
IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
357 }
else if (num_features > 3) {
360 red = ClipToRange<int>(
IntCastRounded(std::fabs(pixel) * 255), 0, 255);
369 pixSetPixel(pix, im_x, im_y,
370 (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
380 for (
int y = 0;
y < num_features; ++
y) {
381 for (
int t = 0; t <
Width(); ++t) {
382 if (num == 0 || t < num || t + num >=
Width()) {
384 tprintf(
" %g",
static_cast<float>(i_[t][
y]) / INT8_MAX);
398 memcpy(i_[dest_t], src.i_[src_t], i_.
dim2() *
sizeof(i_[0][0]));
400 memcpy(f_[dest_t], src.f_[src_t], f_.
dim2() *
sizeof(f_[0][0]));
406 const NetworkIO &src,
int src_t,
int src_offset) {
409 memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features *
sizeof(i_[0][0]));
411 memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features *
sizeof(f_[0][0]));
418 int8_t *line = i_[t] + offset;
419 for (
int i = 0;
i < num_features; ++
i) {
424 float *line = f_[t] + offset;
425 for (
int i = 0;
i < num_features; ++
i) {
433 float *certainty)
const {
434 if (t_end <= t_start) {
438 float min_score = 0.0f;
440 if (c == not_this || c == null_ch) {
444 if (max_char < 0 || *rating < min_score) {
455 float *certainty)
const {
459 if (t_end <= t_start || t_end <= 0) {
462 float ratings[3] = {0.0f, 0.0f, 0.0f};
463 float certs[3] = {0.0f, 0.0f, 0.0f};
464 for (
int t = t_start; t < t_end; ++t) {
465 const float *line = f_[t];
469 ratings[2] = FLT_MAX;
473 for (
int i = 2;
i >= 1; --
i) {
474 if (ratings[
i] > ratings[
i - 1]) {
475 ratings[
i] = ratings[
i - 1];
476 certs[
i] = certs[
i - 1];
480 if (zero < certs[2]) {
484 if (score < certs[1]) {
489 if (zero < certs[0]) {
493 int best_i = ratings[2] < ratings[1] ? 2 : 1;
494 *rating = ratings[best_i] + t_end - t_start;
495 *certainty = certs[best_i];
504 float best_score = -FLT_MAX;
505 const float *line = f_[t];
506 for (
int i = 0;
i < f_.
dim2(); ++
i) {
507 if (line[
i] > best_score &&
i != not_this &&
i != not_that) {
508 best_score = line[
i];
512 if (score !=
nullptr) {
521 int length = labels.size();
522 int last_start = end - length;
525 for (
int s = start; s <= last_start; ++s) {
527 if (score > best_score || best_start < 0) {
538 int length = labels.size();
540 for (
int i = 0;
i < length; ++
i) {
541 score += f_(start +
i, labels[
i]);
551 float bad_score = (1.0f - ok_score) / (num_classes - 1);
552 float *targets = f_[t];
553 for (
int i = 0;
i < num_classes; ++
i) {
554 targets[
i] = bad_score;
556 targets[label] = ok_score;
567 float *targets = f_[t];
568 for (
int c = 0; c < num_classes; ++c) {
570 targets[c] += (1.0 - targets[c]) * (2 / 3.0);
593 for (
int t = 0; t <
Width(); ++t) {
594 const float *features = f_[t];
595 for (
int y = 0;
y < num_features; ++
y) {
596 float grad = features[
y];
597 if (grad < -confidence_thr) {
599 if ((t == 0 || f_[t - 1][
y] < confidence_thr / 2) &&
600 (t + 1 ==
Width() || f_[t + 1][
y] < confidence_thr / 2)) {
612 const int8_t *line = i_[t];
613 for (
int i = 0;
i < i_.
dim2(); ++
i) {
617 const float *line = f_[t];
618 for (
int i = 0;
i < f_.
dim2(); ++
i) {
628 const int8_t *line = i_[t];
629 for (
int i = 0;
i < num_features; ++
i) {
630 inout[
i] +=
static_cast<TFloat>(line[
i]) / INT8_MAX;
633 const float *line = f_[t];
634 for (
int i = 0;
i < num_features; ++
i) {
643 const int8_t *line = i_[t] + offset;
644 for (
int i = 0;
i < num_features; ++
i) {
645 inout[
i] +=
static_cast<float>(line[
i]) / INT8_MAX;
648 const float *line = f_[t] + offset;
649 for (
int i = 0;
i < num_features; ++
i) {
664 int8_t *line = i_[t] + offset;
665 for (
int i = 0;
i < num_features; ++
i) {
666 line[
i] = ClipToRange<int>(
IntCastRounded(input[
i] * INT8_MAX), -INT8_MAX, INT8_MAX);
669 float *line = f_[t] + offset;
670 for (
int i = 0;
i < num_features; ++
i) {
671 line[
i] =
static_cast<float>(input[
i]);
681 int8_t *dest_line = i_[dest_t];
682 const int8_t *src_line = src.i_[src_t];
683 for (
int i = 0;
i < dim; ++
i) {
684 if (dest_line[
i] < src_line[
i]) {
685 dest_line[
i] = src_line[
i];
691 float *dest_line = f_[dest_t];
692 const float *src_line = src.f_[src_t];
693 for (
int i = 0;
i < dim; ++
i) {
694 if (dest_line[
i] < src_line[
i]) {
695 dest_line[
i] = src_line[
i];
709 const int *max_line = maxes[t];
710 const float *fwd_line = fwd.f_[t];
711 int num_features = fwd.f_.
dim2();
712 for (
int i = 0;
i < num_features; ++
i) {
713 f_[max_line[
i]][
i] = fwd_line[
i];
720 float min_max = 0.0f;
723 for (
int t = 0; t < width; ++t) {
724 float max_value = -FLT_MAX;
726 const int8_t *column = i_[t];
727 for (
int i = 0;
i < num_features; ++
i) {
728 if (column[
i] > max_value) {
729 max_value = column[
i];
733 const float *column = f_[t];
734 for (
int i = 0;
i < num_features; ++
i) {
735 if (column[
i] > max_value) {
736 max_value = column[
i];
740 if (t == 0 || max_value < min_max) {
756 for (
int t = 0; t < width; ++t) {
757 int8_t *out_line = i_[t];
758 const int8_t *base_line = base_output.i_[t];
759 const int8_t *comb_line = combiner_output.i_[t];
760 float base_weight =
static_cast<float>(comb_line[no]) / INT8_MAX;
761 float boost_weight = 1.0f - base_weight;
762 for (
int i = 0;
i < no; ++
i) {
763 out_line[
i] =
IntCastRounded(base_line[
i] * base_weight + comb_line[
i] * boost_weight);
767 for (
int t = 0; t < width; ++t) {
768 float *out_line = f_[t];
769 const float *base_line = base_output.f_[t];
770 const float *comb_line = combiner_output.f_[t];
771 float base_weight = comb_line[no];
772 float boost_weight = 1.0f - base_weight;
773 for (
int i = 0;
i < no; ++
i) {
774 out_line[
i] = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
789 for (
int t = 0; t < width; ++t) {
790 const float *delta_line = fwd_deltas.f_[t];
791 const float *base_line = base_output.f_[t];
792 float *comb_line = f_[t];
793 float base_weight = comb_line[no];
794 float boost_weight = 1.0f - base_weight;
795 float max_base_delta = 0.0;
796 for (
int i = 0;
i < no; ++
i) {
798 float output = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
800 float comb_target = delta_line[
i] +
output;
801 comb_line[
i] = comb_target - comb_line[
i];
802 float base_delta = std::fabs(comb_target - base_line[
i]);
803 if (base_delta > max_base_delta) {
804 max_base_delta = base_delta;
807 if (max_base_delta >= 0.5) {
810 comb_line[no] = 0.0 - base_weight;
813 for (
int i = 0;
i < no; ++
i) {
815 if (comb_line[
i] > 0.0) {
819 comb_line[no] = 1.0 - base_weight;
849 float src_max = src.f_.
MaxAbs();
851 float scale_max = scale.f_.
MaxAbs();
853 if (src_max > 0.0f) {
854 float factor = scale_max / src_max;
855 for (
int t = 0; t < src.
Width(); ++t) {
856 const float *src_ptr = src.f_[t];
857 float *dest_ptr = f_[t];
858 for (
int i = 0;
i < src.f_.
dim2(); ++
i) {
859 dest_ptr[
i] = src_ptr[
i] * factor;
870 Resize(src, num_features);
878 int fwd_t = fwd_index.
t();
879 int rev_t = rev_index.
t();
880 for (
int x = 0;
x < width; ++
x) {
890 Resize(src, num_features);
908 stride_map_ = src.stride_map_;
931 int width = src.
Width();
936 for (
int t = 0; t < width; ++t) {
937 memcpy(i_[t] + feature_offset, src.i_[t], num_features *
sizeof(i_[t][0]));
939 for (
int t = width; t < i_.
dim1(); ++t) {
940 memset(i_[t], 0, num_features *
sizeof(i_[t][0]));
943 for (
int t = 0; t < width; ++t) {
944 memcpy(f_[t] + feature_offset, src.f_[t], num_features *
sizeof(f_[t][0]));
946 for (
int t = width; t < f_.
dim1(); ++t) {
947 memset(f_[t], 0, num_features *
sizeof(f_[t][0]));
950 return num_features + feature_offset;
956 Resize(src, num_features);
957 int width = src.
Width();
960 for (
int t = 0; t < width; ++t) {
961 memcpy(i_[t], src.i_[t] + feature_offset, num_features *
sizeof(i_[t][0]));
964 for (
int t = 0; t < width; ++t) {
965 memcpy(f_[t], src.f_[t] + feature_offset, num_features *
sizeof(f_[t][0]));
974 for (
int t = 0; t < width; ++t) {
975 dest->WriteStrided(t, f_[t]);
984 for (
int i = 0;
i < dim; ++
i) {
985 v[
i] = ClipToRange<float>(v[
i], -range, range);
992int NetworkIO::GetPadding(
int num_features) {
void tprintf(const char *format,...)
int IntCastRounded(double x)
const float kMinCertainty
void ZeroVector(unsigned n, T *vec)
void ResizeNoInit(int size1, int size2, int pad=0)
int RoundInputs(int size) const
static const IntSimdMatrix * intSimdMatrix
double SignedRand(double range)
void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer)
void Resize(const NetworkIO &src, int num_features)
void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input)
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
void ResizeXTo1(const NetworkIO &src, int num_features)
void ZeroInvalidElements()
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
void FromPixes(const StaticShape &shape, const std::vector< Image > &pixes, TRand *randomizer)
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
void ClipVector(int t, float range)
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
void WriteTimeStep(int t, const TFloat *input)
void CopyWithXReversal(const NetworkIO &src)
void Print(int num) const
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
static float ProbToCertainty(float prob)
int CopyPacking(const NetworkIO &src, int feature_offset)
void CopyWithXYTranspose(const NetworkIO &src)
void ReadTimeStep(int t, TFloat *output) const
void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer)
void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer)
void Resize2d(bool int_mode, int width, int num_features)
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
void AddTimeStep(int t, TFloat *inout) const
void SetActivations(int t, int label, float ok_score)
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
void Transpose(TransposedArray *dest) const
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
bool AnySuspiciousTruth(float confidence_thr) const
void CopyWithYReversal(const NetworkIO &src)
void Randomize(int t, int offset, int num_features, TRand *randomizer)
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
void EnsureBestLabel(int t, int label)
void AddAllToFloat(const NetworkIO &src)
const StrideMap & stride_map() const
void SubtractAllFromFloat(const NetworkIO &src)
TFloat ScoreOfLabels(const std::vector< int > &labels, int start) const
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
const int8_t * i(int t) const
void CopyAll(const NetworkIO &src)
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
void SetPixel(int t, int f, int pixel, float black, float contrast)
int PositionOfBestMatch(const std::vector< int > &labels, int start, int end) const
int BestLabel(int t, float *score) const
int Size(FlexDimensions dimension) const
void ScaleXY(int x_factor, int y_factor)
void SetStride(const std::vector< std::pair< int, int > > &h_w_pairs)
int index(FlexDimensions dimension) const
bool AddOffset(int offset, FlexDimensions dimension)
int MaxIndexOfDim(FlexDimensions dim) const