tesseract v5.3.3.20231005
dotproductneon.cpp
Go to the documentation of this file.
1
2// File: dotproductneon.cpp
3// Description: Dot product function for ARM NEON.
4// Author: Stefan Weil
5//
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9// http://www.apache.org/licenses/LICENSE-2.0
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
16
17#if defined(__ARM_NEON)
18
19#include <arm_neon.h>
20#include "dotproduct.h"
21
22namespace tesseract {
23
24// Documentation:
25// https://developer.arm.com/architectures/instruction-sets/intrinsics/
26
27#if defined(FAST_FLOAT) && defined(__ARM_ARCH_ISA_A64)
28
29float DotProductNEON(const float *u, const float *v, int n) {
30 float32x4_t result0123 = vdupq_n_f32(0.0f);
31 float32x4_t result4567 = vdupq_n_f32(0.0f);
32 while (n > 7) {
33 // Calculate 8 dot products per iteration.
34 float32x4_t u0 = vld1q_f32(u);
35 float32x4_t v0 = vld1q_f32(v);
36 float32x4_t u4 = vld1q_f32(u + 4);
37 float32x4_t v4 = vld1q_f32(v + 4);
38 result0123 = vfmaq_f32(result0123, u0, v0);
39 result4567 = vfmaq_f32(result4567, u4, v4);
40 u += 8;
41 v += 8;
42 n -= 8;
43 }
44 float total = vaddvq_f32(result0123);
45 total += vaddvq_f32(result4567);
46 while (n > 0) {
47 total += *u++ * *v++;
48 n--;
49 }
50 return total;
51}
52
53#else
54
55// Computes and returns the dot product of the two n-vectors u and v.
56TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n) {
57 TFloat total = 0;
58#if defined(OPENMP_SIMD) || defined(_OPENMP)
59#pragma omp simd reduction(+:total)
60#endif
61 for (int k = 0; k < n; k++) {
62 total += u[k] * v[k];
63 }
64 return total;
65}
66
67#endif
68
69} // namespace tesseract
70
71#endif /* __ARM_NEON */
TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n)
double TFloat
Definition: tesstypes.h:39