17#if defined(__ARM_NEON)
27#if defined(FAST_FLOAT) && defined(__ARM_ARCH_ISA_A64)
30 float32x4_t result0123 = vdupq_n_f32(0.0f);
31 float32x4_t result4567 = vdupq_n_f32(0.0f);
34 float32x4_t u0 = vld1q_f32(u);
35 float32x4_t v0 = vld1q_f32(v);
36 float32x4_t u4 = vld1q_f32(u + 4);
37 float32x4_t v4 = vld1q_f32(v + 4);
38 result0123 = vfmaq_f32(result0123, u0, v0);
39 result4567 = vfmaq_f32(result4567, u4, v4);
44 float total = vaddvq_f32(result0123);
45 total += vaddvq_f32(result4567);
58#if defined(OPENMP_SIMD) || defined(_OPENMP)
59#pragma omp simd reduction(+:total)
61 for (
int k = 0; k < n; k++) {
TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n)