29 static constexpr NeonRegister broadcast (
const T&) {
return {}; }
38template <CompareOp,
class>
41template <>
struct NeonCompare<
CompareOp::less, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcltq_f32 (a, b); } };
42template <>
struct NeonCompare<
CompareOp::less, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcltq_f64 (a, b); } };
43template <>
struct NeonCompare<
CompareOp::lessOrEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcleq_f32 (a, b); } };
44template <>
struct NeonCompare<
CompareOp::lessOrEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcleq_f64 (a, b); } };
45template <>
struct NeonCompare<
CompareOp::greater, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcgtq_f32 (a, b); } };
46template <>
struct NeonCompare<
CompareOp::greater, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcgtq_f64 (a, b); } };
47template <>
struct NeonCompare<
CompareOp::greaterOrEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vcgeq_f32 (a, b); } };
48template <>
struct NeonCompare<
CompareOp::greaterOrEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vcgeq_f64 (a, b); } };
49template <>
struct NeonCompare<
CompareOp::equal, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vceqq_f32 (a, b); } };
50template <>
struct NeonCompare<
CompareOp::equal, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vceqq_f64 (a, b); } };
51template <>
struct NeonCompare<
CompareOp::notEqual, float> {
static auto cmp (float32x4_t a, float32x4_t b) {
return vmvnq_u32 (vceqq_f32 (a, b)); } };
52template <>
struct NeonCompare<
CompareOp::notEqual, double> {
static auto cmp (float64x2_t a, float64x2_t b) {
return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (vceqq_f64 (a, b)))); } };
57struct NeonRegister<float>
59 static constexpr size_t numElements = 4;
61 using NativeType = float32x4_t;
67 static NeonRegister load (
const float* d) {
return { vld1q_f32 (d) }; }
68 static NeonRegister broadcast (
float x) {
return { vdupq_n_f32 (x) }; }
72 void store (
float* d)
const { vst1q_f32 (d, value); }
76 template <CompareOp op>
77 static NeonRegister compare (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (detail::NeonCompare<op, float>::cmp (a.value, b.value)) }; }
82 static NeonRegister bitwiseAndNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (vandq_u32 (vreinterpretq_u32_f32 (a.value), vmvnq_u32 (vreinterpretq_u32_f32 (b.value)))) }; }
83 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f32_u32 (vandq_u32 (vreinterpretq_u32_f32 (a.value), vreinterpretq_u32_f32 (b.value))) }; }
84 static NeonRegister bitwiseBlend (NeonRegister a, NeonRegister b, NeonRegister mask) {
return { vbslq_f32 (vreinterpretq_u32_f32 (mask.value), b.value, a.value) }; }
88 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f32 (x.value) }; }
89 static NeonRegister floor (NeonRegister x) {
return { vrndmq_f32 (x.value) }; }
90 static NeonRegister ceil (NeonRegister x) {
return { vrndpq_f32 (x.value) }; }
91 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f32 (a.value, b.value) }; }
92 static NeonRegister div (NeonRegister a, NeonRegister b) {
return { vdivq_f32 (a.value, b.value) }; }
93 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f32 (a.value, b.value) }; }
94 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f32 (a.value, b.value) }; }
95 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f32 (a.value, b.value) }; }
96 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f32 (a.value, b.value) }; }
97 static NeonRegister fma (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmaq_f32 (c.value, a.value, b.value) }; }
98 static NeonRegister fms (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmsq_f32 (c.value, a.value, b.value) }; }
103 static NeonRegister
exp (NeonRegister x) {
return { vexpf (x.value) }; }
104 static NeonRegister exp2 (NeonRegister x) {
return { vexp2f (x.value) }; }
105 static NeonRegister expm1 (NeonRegister x) {
return { vexpm1f (x.value) }; }
106 static NeonRegister log (NeonRegister x) {
return { vlogf (x.value) }; }
107 static NeonRegister log1p (NeonRegister x) {
return { vlog1pf (x.value) }; }
108 static NeonRegister
log10 (NeonRegister x) {
return { vlog10f (x.value) }; }
109 static NeonRegister logb (NeonRegister x) {
return { vlogbf (x.value) }; }
110 static NeonRegister
log2 (NeonRegister x) {
return { vlog2f (x.value) }; }
111 static NeonRegister
sin (NeonRegister x) {
return { vsinf (x.value) }; }
112 static NeonRegister
cos (NeonRegister x) {
return { vcosf (x.value) }; }
113 static NeonRegister
tan (NeonRegister x) {
return { vtanf (x.value) }; }
114 static NeonRegister
sinh (NeonRegister x) {
return { vsinhf (x.value) }; }
115 static NeonRegister
cosh (NeonRegister x) {
return { vcoshf (x.value) }; }
116 static NeonRegister
tanh (NeonRegister x) {
return { vtanhf (x.value) }; }
117 static NeonRegister
asinh (NeonRegister x) {
return { vasinhf (x.value) }; }
118 static NeonRegister
acosh (NeonRegister x) {
return { vacoshf (x.value) }; }
119 static NeonRegister
atanh (NeonRegister x) {
return { vatanhf (x.value) }; }
120 static NeonRegister
pow (NeonRegister x, NeonRegister y) {
return { vpowf (x.value, y.value) }; }
121 static NeonRegister
pow (NeonRegister x, NeonRegister<int32_t> y);
126 static NeonRegister<int32_t> convertToInt (NeonRegister x);
127 static NeonRegister<int32_t> reinterpretAsInt (NeonRegister x);
132struct NeonRegister<double>
134 static constexpr size_t numElements = 2;
136 using NativeType = float64x2_t;
142 static NeonRegister load (
const double* d) {
return { vld1q_f64 (d) }; }
143 static NeonRegister broadcast (
double x) {
return { vdupq_n_f64 (x) }; }
147 void store (
double* d)
const { vst1q_f64 (d, value); }
151 template <CompareOp op>
152 static NeonRegister compare (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u64 (detail::NeonCompare<op, double>::cmp (a.value, b.value)) }; }
157 static NeonRegister bitwiseAndNot (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u32 (vandq_u32 (vreinterpretq_u32_f64 (a.value), vmvnq_u32 (vreinterpretq_u32_f64 (b.value)))) }; }
158 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vreinterpretq_f64_u32 (vandq_u32 (vreinterpretq_u32_f64 (a.value), vreinterpretq_u32_f64 (b.value))) }; }
159 static NeonRegister bitwiseBlend (NeonRegister a, NeonRegister b, NeonRegister mask) {
return { vbslq_f64 (vreinterpretq_u64_f64 (mask.value), b.value, a.value) }; }
163 static NeonRegister
abs (NeonRegister x) {
return { vabsq_f64 (x.value) }; }
164 static NeonRegister floor (NeonRegister x) {
return { vrndmq_f64 (x.value) }; }
165 static NeonRegister ceil (NeonRegister x) {
return { vrndpq_f64 (x.value) }; }
166 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_f64 (a.value, b.value) }; }
167 static NeonRegister div (NeonRegister a, NeonRegister b) {
return { vdivq_f64 (a.value, b.value) }; }
168 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_f64 (a.value, b.value) }; }
169 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_f64 (a.value, b.value) }; }
170 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_f64 (a.value, b.value) }; }
171 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_f64 (a.value, b.value) }; }
172 static NeonRegister fma (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmaq_f64 (c.value, a.value, b.value) }; }
173 static NeonRegister fms (NeonRegister a, NeonRegister b, NeonRegister c) {
return { vfmsq_f64 (c.value, a.value, b.value) }; }
177 static NeonRegister<int64_t> convertToInt (NeonRegister x);
178 static NeonRegister<int64_t> reinterpretAsInt (NeonRegister x);
183struct NeonRegister<int32_t>
185 static constexpr size_t numElements = 4;
187 using NativeType = int32x4_t;
193 static NeonRegister load (
const int32_t* d) {
return { vld1q_s32 (d) }; }
194 static NeonRegister broadcast (int32_t x) {
return { vdupq_n_s32 (x) }; }
198 void store (int32_t* d)
const { vst1q_s32 (d, value); }
202 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_s32 (a.value, b.value) }; }
203 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_s32 (a.value, b.value) }; }
207 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s32 (x.value) }; }
208 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_s32 (a.value, b.value) }; }
209 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s32 (a.value, b.value) }; }
210 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s32 (a.value, b.value) }; }
211 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_s32 (a.value, b.value) }; }
212 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_s32 (a.value, b.value) }; }
216 static NeonRegister<float> convertToFp (NeonRegister x) {
return { vcvtq_f32_s32 (x.value) }; }
217 static NeonRegister<float> reinterpretAsFp (NeonRegister x) {
return { vreinterpretq_f32_s32 (x.value) }; }
222struct NeonRegister<uint32_t>
224 static constexpr size_t numElements = 4;
226 using NativeType = uint32x4_t;
232 static NeonRegister load (
const uint32_t* d) {
return { vld1q_u32 (d) }; }
233 static NeonRegister broadcast (uint32_t x) {
return { vdupq_n_u32 (x) }; }
237 void store (uint32_t* d)
const { vst1q_u32 (d, value); }
241 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_u32 (a.value, b.value) }; }
242 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_u32 (a.value, b.value) }; }
246 static NeonRegister mul (NeonRegister a, NeonRegister b) {
return { vmulq_u32 (a.value, b.value) }; }
247 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u32 (a.value, b.value) }; }
248 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u32 (a.value, b.value) }; }
249 static NeonRegister
max (NeonRegister a, NeonRegister b) {
return { vmaxq_u32 (a.value, b.value) }; }
250 static NeonRegister
min (NeonRegister a, NeonRegister b) {
return { vminq_u32 (a.value, b.value) }; }
255struct NeonRegister<int64_t>
257 static constexpr size_t numElements = 2;
259 using NativeType = int64x2_t;
265 static NeonRegister load (
const int64_t* d) {
return { vld1q_s64 (d) }; }
266 static NeonRegister broadcast (int64_t x) {
return { vdupq_n_s64 (x) }; }
270 void store (int64_t* d)
const { vst1q_s64 (d, value); }
274 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_s64 (a.value, b.value) }; }
275 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_s64 (a.value, b.value) }; }
279 static NeonRegister
abs (NeonRegister x) {
return { vabsq_s64 (x.value) }; }
280 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_s64 (a.value, b.value) }; }
281 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_s64 (a.value, b.value) }; }
285 static NeonRegister<double> convertToFp (NeonRegister x) {
return { vcvtq_f64_s64 (x.value) }; }
286 static NeonRegister<double> reinterpretAsFp (NeonRegister x) {
return { vreinterpretq_f64_s64 (x.value) }; }
291struct NeonRegister<uint64_t>
293 static constexpr size_t numElements = 2;
295 using NativeType = uint64x2_t;
301 static NeonRegister load (
const uint64_t* d) {
return { vld1q_u64 (d) }; }
302 static NeonRegister broadcast (uint64_t x) {
return { vdupq_n_u64 (x) }; }
306 void store (uint64_t* d)
const { vst1q_u64 (d, value); }
310 static NeonRegister bitwiseAnd (NeonRegister a, NeonRegister b) {
return { vandq_u64 (a.value, b.value) }; }
311 static NeonRegister bitwiseOr (NeonRegister a, NeonRegister b) {
return { vorrq_u64 (a.value, b.value) }; }
315 static NeonRegister add (NeonRegister a, NeonRegister b) {
return { vaddq_u64 (a.value, b.value) }; }
316 static NeonRegister sub (NeonRegister a, NeonRegister b) {
return { vsubq_u64 (a.value, b.value) }; }
320inline NeonRegister<int32_t> NeonRegister<float>::convertToInt (NeonRegister<float> x) {
return { vcvtq_s32_f32 (x.value) }; }
321inline NeonRegister<int32_t> NeonRegister<float>::reinterpretAsInt (NeonRegister<float> x) {
return { vreinterpretq_s32_f32 (x.value) }; }
322inline NeonRegister<int64_t> NeonRegister<double>::convertToInt (NeonRegister<double> x) {
return { vcvtq_s64_f64 (x.value) }; }
323inline NeonRegister<int64_t> NeonRegister<double>::reinterpretAsInt (NeonRegister<double> x) {
return { vreinterpretq_s64_f64 (x.value) }; }
326inline NeonRegister<float> NeonRegister<float>::pow (NeonRegister<float> x, NeonRegister<int32_t> y) {
return { vipowf (x.value, y.value) }; }
constexpr ExpressionChainBuilder< expressions::Log10 > log10
Computes the logarithm to the base of ten of the source values.
Definition: Log10.h:84
constexpr ExpressionChainBuilder< expressions::Sin > sin
Computes the sine of each source element.
Definition: Sin.h:90
constexpr ExpressionChainBuilder< expressions::Asinh > asinh
Computes the inverse hyperbolic sine of each source element.
Definition: Asinh.h:90
constexpr ExpressionChainBuilder< expressions::Exp > exp
Computes e (Euler's number, 2.7182818...) raised to the source vector elements power.
Definition: Exp.h:104
constexpr ExpressionChainBuilder< expressions::Acosh > acosh
Computes the inverse hyperbolic cosine of each source element.
Definition: Acosh.h:90
constexpr ExpressionChainBuilder< expressions::Cosh > cosh
Computes the hyperbolic cosine of each source element.
Definition: Cosh.h:90
constexpr ExpressionChainBuilder< expressions::Cos > cos
Computes the cosine of each source element.
Definition: Cos.h:90
constexpr ExpressionChainBuilder< expressions::Tan > tan
Computes the tangent of each source element.
Definition: Tan.h:90
constexpr ExpressionChainBuilder< expressions::Atanh > atanh
Computes the inverse hyperbolic tangent of each source element.
Definition: Atanh.h:90
constexpr auto pow(SrcBaseType &&bases, SrcExpType &&exponents)
Returns an expression that raises the elements in bases element-wise to the power of the elements in ...
Definition: Pow.h:213
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:198
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:135
constexpr ExpressionChainBuilder< expressions::Log2 > log2
Computes the logarithm to the base of two of the source values.
Definition: Log2.h:91
constexpr ExpressionChainBuilder< expressions::Sinh > sinh
Computes the hyperbolic sine of each source element.
Definition: Sinh.h:90
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:198
constexpr ExpressionChainBuilder< expressions::Tanh > tanh
Computes the hyperbolic tangent of each source element.
Definition: Tanh.h:90
The main namespace of the VCTR project.
Definition: Array.h:24
CompareOp
Possible types of (SIMD) compare operations.
Definition: CompareOp.h:41
Definition: NeonRegister.h:28