37 static constexpr size_t numElements = 4;
39 using NativeType = float32x4_t;
45 static NeonRegister load (
const float* d) {
return { vld1q_f32 (d) }; }
46 static NeonRegister broadcast (
float x) {
return { vdupq_n_f32 (x) }; }
50 void store (
float* d)
const { vst1q_f32 (d, value); }
54 static NeonRegister andNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (vandq_u32 (vreinterpretq_u32_f32 (a.value), vreinterpretq_u32_f32 (b.value))) }; }
58 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f32 (x.value) }; }
59 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f32 (a.value, b.value) }; }
60 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f32 (a.value, b.value) }; }
61 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f32 (a.value, b.value) }; }
62 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f32 (a.value, b.value) }; }
63 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f32 (a.value, b.value) }; }
68struct NeonRegister<double>
70 static constexpr size_t numElements = 2;
72 using NativeType = float64x2_t;
78 static NeonRegister load (
const double* d) {
return { vld1q_f64 (d) }; }
79 static NeonRegister broadcast (
double x) {
return { vdupq_n_f64 (x) }; }
83 void store (
double* d)
const { vst1q_f64 (d, value); }
87 static NeonRegister andNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u64 (vandq_u64 (vreinterpretq_u64_f64 (a.value), vreinterpretq_u64_f64 (b.value))) }; }
91 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f64 (x.value) }; }
92 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f64 (a.value, b.value) }; }
93 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f64 (a.value, b.value) }; }
94 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f64 (a.value, b.value) }; }
95 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f64 (a.value, b.value) }; }
96 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f64 (a.value, b.value) }; }
101struct NeonRegister<int32_t>
103 static constexpr size_t numElements = 4;
105 using NativeType = int32x4_t;
111 static NeonRegister load (
const int32_t* d) {
return { vld1q_s32 (d) }; }
112 static NeonRegister broadcast (int32_t x) {
return { vdupq_n_s32 (x) }; }
116 void store (int32_t* d)
const { vst1q_s32 (d, value); }
123 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s32 (x.value) }; }
124 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_s32 (a.value, b.value) }; }
125 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s32 (a.value, b.value) }; }
126 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s32 (a.value, b.value) }; }
127 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_s32 (a.value, b.value) }; }
128 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_s32 (a.value, b.value) }; }
133struct NeonRegister<uint32_t>
135 static constexpr size_t numElements = 4;
137 using NativeType = uint32x4_t;
143 static NeonRegister load (
const uint32_t* d) {
return { vld1q_u32 (d) }; }
144 static NeonRegister broadcast (uint32_t x) {
return { vdupq_n_u32 (x) }; }
148 void store (uint32_t* d)
const { vst1q_u32 (d, value); }
155 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_u32 (a.value, b.value) }; }
156 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u32 (a.value, b.value) }; }
157 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u32 (a.value, b.value) }; }
158 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_u32 (a.value, b.value) }; }
159 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_u32 (a.value, b.value) }; }
164struct NeonRegister<int64_t>
166 static constexpr size_t numElements = 2;
168 using NativeType = int64x2_t;
174 static NeonRegister load (
const int64_t* d) {
return { vld1q_s64 (d) }; }
175 static NeonRegister broadcast (int64_t x) {
return { vdupq_n_s64 (x) }; }
179 void store (int64_t* d)
const { vst1q_s64 (d, value); }
186 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s64 (x.value) }; }
187 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s64 (a.value, b.value) }; }
188 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s64 (a.value, b.value) }; }
193struct NeonRegister<uint64_t>
195 static constexpr size_t numElements = 2;
197 using NativeType = uint64x2_t;
203 static NeonRegister load (
const uint64_t* d) {
return { vld1q_u64 (d) }; }
204 static NeonRegister broadcast (uint64_t x) {
return { vdupq_n_u64 (x) }; }
208 void store (uint64_t* d)
const { vst1q_u64 (d, value); }
215 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u64 (a.value, b.value) }; }
216 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u64 (a.value, b.value) }; }
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:194
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:133
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:194
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: NeonRegister.h:28