29 static constexpr NeonRegister broadcast (
const T&) {
return {}; }
38template <CompareOp,
class>
41template <>
struct NeonCompare<
CompareOp::less, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcltq_f32 (a, b); } };
42template <>
struct NeonCompare<
CompareOp::less, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcltq_f64 (a, b); } };
43template <>
struct NeonCompare<
CompareOp::lessOrEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcleq_f32 (a, b); } };
44template <>
struct NeonCompare<
CompareOp::lessOrEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcleq_f64 (a, b); } };
45template <>
struct NeonCompare<
CompareOp::greater, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcgtq_f32 (a, b); } };
46template <>
struct NeonCompare<
CompareOp::greater, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcgtq_f64 (a, b); } };
47template <>
struct NeonCompare<
CompareOp::greaterOrEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcgeq_f32 (a, b); } };
48template <>
struct NeonCompare<
CompareOp::greaterOrEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcgeq_f64 (a, b); } };
49template <>
struct NeonCompare<
CompareOp::equal, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vceqq_f32 (a, b); } };
50template <>
struct NeonCompare<
CompareOp::equal, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vceqq_f64 (a, b); } };
51template <>
struct NeonCompare<
CompareOp::notEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vmvnq_u32 (vceqq_f32 (a, b)); } };
52template <>
struct NeonCompare<
CompareOp::notEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (vceqq_f64 (a, b)))); } };
57struct NeonRegister<float>
59 static constexpr size_t numElements = 4;
61 using NativeType = float32x4_t;
67 static NeonRegister load (
const float* d) {
return { vld1q_f32 (d) }; }
68 static NeonRegister broadcast (
float x) {
return { vdupq_n_f32 (x) }; }
72 void store (
float* d)
const { vst1q_f32 (d, value); }
76 template <CompareOp op>
77 static NeonRegister compare (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (detail::NeonCompare<op, float>::cmp (a.value, b.value)) }; }
82 static NeonRegister bitwiseAndNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (vandq_u32 (vreinterpretq_u32_f32 (a.value), vmvnq_u32 (vreinterpretq_u32_f32 (b.value)))) }; }
83 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (vandq_u32 (vreinterpretq_u32_f32 (a.value), vreinterpretq_u32_f32 (b.value))) }; }
84 static NeonRegister bitwiseBlend (NeonRegister a, NeonRegister b, NeonRegister mask) {
return { vbslq_f32 (vreinterpretq_u32_f32 (mask.value), b.value, a.value) }; }
88 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f32 (x.value) }; }
89 static NeonRegister floor (NeonRegister x) {
return { vrndmq_f32 (x.value) }; }
90 static NeonRegister ceil (NeonRegister x) {
return { vrndpq_f32 (x.value) }; }
91 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f32 (a.value, b.value) }; }
92 static NeonRegister div (NeonRegister a, NeonRegister b) {
return { vdivq_f32 (a.value, b.value) }; }
93 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f32 (a.value, b.value) }; }
94 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f32 (a.value, b.value) }; }
95 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f32 (a.value, b.value) }; }
96 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f32 (a.value, b.value) }; }
97 static NeonRegister fma (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmaq_f32 (c.value, a.value, b.value) }; }
98 static NeonRegister fms (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmsq_f32 (c.value, a.value, b.value) }; }
102 static NeonRegister<int32_t> convertToInt (NeonRegister x);
103 static NeonRegister<int32_t> reinterpretAsInt (NeonRegister x);
108struct NeonRegister<double>
110 static constexpr size_t numElements = 2;
112 using NativeType = float64x2_t;
118 static NeonRegister load (
const double* d) {
return { vld1q_f64 (d) }; }
119 static NeonRegister broadcast (
double x) {
return { vdupq_n_f64 (x) }; }
123 void store (
double* d)
const { vst1q_f64 (d, value); }
127 template <CompareOp op>
128 static NeonRegister compare (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u64 (detail::NeonCompare<op, double>::cmp (a.value, b.value)) }; }
133 static NeonRegister bitwiseAndNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u32 (vandq_u32 (vreinterpretq_u32_f64 (a.value), vmvnq_u32 (vreinterpretq_u32_f64 (b.value)))) }; }
134 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u32 (vandq_u32 (vreinterpretq_u32_f64 (a.value), vreinterpretq_u32_f64 (b.value))) }; }
135 static NeonRegister bitwiseBlend (NeonRegister a, NeonRegister b, NeonRegister mask) {
return { vbslq_f64 (vreinterpretq_u64_f64 (mask.value), b.value, a.value) }; }
139 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f64 (x.value) }; }
140 static NeonRegister floor (NeonRegister x) {
return { vrndmq_f64 (x.value) }; }
141 static NeonRegister ceil (NeonRegister x) {
return { vrndpq_f64 (x.value) }; }
142 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f64 (a.value, b.value) }; }
143 static NeonRegister div (NeonRegister a, NeonRegister b) {
return { vdivq_f64 (a.value, b.value) }; }
144 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f64 (a.value, b.value) }; }
145 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f64 (a.value, b.value) }; }
146 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f64 (a.value, b.value) }; }
147 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f64 (a.value, b.value) }; }
148 static NeonRegister fma (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmaq_f64 (c.value, a.value, b.value) }; }
149 static NeonRegister fms (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmsq_f64 (c.value, a.value, b.value) }; }
153 static NeonRegister<int64_t> convertToInt (NeonRegister x);
154 static NeonRegister<int64_t> reinterpretAsInt (NeonRegister x);
159struct NeonRegister<int32_t>
161 static constexpr size_t numElements = 4;
163 using NativeType = int32x4_t;
169 static NeonRegister load (
const int32_t* d) {
return { vld1q_s32 (d) }; }
170 static NeonRegister broadcast (int32_t x) {
return { vdupq_n_s32 (x) }; }
174 void store (int32_t* d)
const { vst1q_s32 (d, value); }
178 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_s32 (a.value, b.value) }; }
179 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_s32 (a.value, b.value) }; }
183 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s32 (x.value) }; }
184 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_s32 (a.value, b.value) }; }
185 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s32 (a.value, b.value) }; }
186 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s32 (a.value, b.value) }; }
187 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_s32 (a.value, b.value) }; }
188 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_s32 (a.value, b.value) }; }
192 static NeonRegister<float> convertToFp (NeonRegister x) {
return { vcvtq_f32_s32 (x.value) }; }
193 static NeonRegister<float> reinterpretAsFp (NeonRegister x) {
return { vreinterpretq_f32_s32 (x.value) }; }
198struct NeonRegister<uint32_t>
200 static constexpr size_t numElements = 4;
202 using NativeType = uint32x4_t;
208 static NeonRegister load (
const uint32_t* d) {
return { vld1q_u32 (d) }; }
209 static NeonRegister broadcast (uint32_t x) {
return { vdupq_n_u32 (x) }; }
213 void store (uint32_t* d)
const { vst1q_u32 (d, value); }
217 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_u32 (a.value, b.value) }; }
218 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_u32 (a.value, b.value) }; }
222 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_u32 (a.value, b.value) }; }
223 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u32 (a.value, b.value) }; }
224 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u32 (a.value, b.value) }; }
225 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_u32 (a.value, b.value) }; }
226 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_u32 (a.value, b.value) }; }
231struct NeonRegister<int64_t>
233 static constexpr size_t numElements = 2;
235 using NativeType = int64x2_t;
241 static NeonRegister load (
const int64_t* d) {
return { vld1q_s64 (d) }; }
242 static NeonRegister broadcast (int64_t x) {
return { vdupq_n_s64 (x) }; }
246 void store (int64_t* d)
const { vst1q_s64 (d, value); }
250 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_s64 (a.value, b.value) }; }
251 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_s64 (a.value, b.value) }; }
255 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s64 (x.value) }; }
256 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s64 (a.value, b.value) }; }
257 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s64 (a.value, b.value) }; }
261 static NeonRegister<double> convertToFp (NeonRegister x) {
return { vcvtq_f64_s64 (x.value) }; }
262 static NeonRegister<double> reinterpretAsFp (NeonRegister x) {
return { vreinterpretq_f64_s64 (x.value) }; }
267struct NeonRegister<uint64_t>
269 static constexpr size_t numElements = 2;
271 using NativeType = uint64x2_t;
277 static NeonRegister load (
const uint64_t* d) {
return { vld1q_u64 (d) }; }
278 static NeonRegister broadcast (uint64_t x) {
return { vdupq_n_u64 (x) }; }
282 void store (uint64_t* d)
const { vst1q_u64 (d, value); }
286 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_u64 (a.value, b.value) }; }
287 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_u64 (a.value, b.value) }; }
291 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u64 (a.value, b.value) }; }
292 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u64 (a.value, b.value) }; }
296inline NeonRegister<int32_t> NeonRegister<float>::convertToInt (NeonRegister<float> x) {
return { vcvtq_s32_f32 (x.value) }; }
297inline NeonRegister<int32_t> NeonRegister<float>::reinterpretAsInt (NeonRegister<float> x) {
return { vreinterpretq_s32_f32 (x.value) }; }
298inline NeonRegister<int64_t> NeonRegister<double>::convertToInt (NeonRegister<double> x) {
return { vcvtq_s64_f64 (x.value) }; }
299inline NeonRegister<int64_t> NeonRegister<double>::reinterpretAsInt (NeonRegister<double> x) {
return { vreinterpretq_s64_f64 (x.value) }; }
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:198
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:135
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:198
The main namespace of the VCTR project.
Definition: Array.h:24
CompareOp
Possible types of (SIMD) compare operations.
Definition: SIMDHelpers.h:63
Definition: NeonRegister.h:28