29 static SSERegister broadcast (
const T&) {
return {}; }
37 static constexpr size_t numElements = 4;
39 using NativeType = __m128;
45 VCTR_TARGET (
"sse4.1") static
SSERegister loadUnaligned (const
float* d) {
return { _mm_loadu_ps (d) }; }
46 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const
float* d) {
return { _mm_load_ps (d) }; }
47 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (
float x) {
return { _mm_load1_ps (&x) }; }
51 VCTR_TARGET (
"sse4.1") void storeUnaligned (
float* d)
const { _mm_storeu_ps (d, value); }
52 VCTR_TARGET (
"sse4.1") void storeAligned (
float* d)
const { _mm_store_ps (d, value); }
56 VCTR_TARGET (
"sse4.1") static SSERegister andNot (SSERegister a, SSERegister b) {
return { _mm_andnot_ps (a.value, b.value) }; }
60 VCTR_TARGET (
"sse4.1") static SSERegister mul (SSERegister a, SSERegister b) {
return { _mm_mul_ps (a.value, b.value) }; }
61 VCTR_TARGET (
"sse4.1") static SSERegister div (SSERegister a, SSERegister b) {
return { _mm_div_ps (a.value, b.value) }; }
62 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_ps (a.value, b.value) }; }
63 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_ps (a.value, b.value) }; }
64 VCTR_TARGET (
"sse4.1") static SSERegister
max (SSERegister a, SSERegister b) {
return { _mm_max_ps (a.value, b.value) }; }
65 VCTR_TARGET (
"sse4.1") static SSERegister
min (SSERegister a, SSERegister b) {
return { _mm_min_ps (a.value, b.value) }; }
70struct SSERegister<double>
72 static constexpr size_t numElements = 2;
74 using NativeType = __m128d;
80 VCTR_TARGET (
"sse4.1") static SSERegister loadUnaligned (const
double* d) {
return { _mm_loadu_pd (d) }; }
81 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const
double* d) {
return { _mm_load_pd (d) }; }
82 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (
double x) {
return { _mm_load1_pd (&x) }; }
86 VCTR_TARGET (
"sse4.1") void storeUnaligned (
double* d)
const { _mm_storeu_pd (d, value); }
87 VCTR_TARGET (
"sse4.1") void storeAligned (
double* d)
const { _mm_store_pd (d, value); }
91 VCTR_TARGET (
"sse4.1") static SSERegister andNot (SSERegister a, SSERegister b) {
return { _mm_andnot_pd (a.value, b.value) }; }
95 VCTR_TARGET (
"sse4.1") static SSERegister mul (SSERegister a, SSERegister b) {
return { _mm_mul_pd (a.value, b.value) }; }
96 VCTR_TARGET (
"sse4.1") static SSERegister div (SSERegister a, SSERegister b) {
return { _mm_div_pd (a.value, b.value) }; }
97 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_pd (a.value, b.value) }; }
98 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_pd (a.value, b.value) }; }
99 VCTR_TARGET (
"sse4.1") static SSERegister
max (SSERegister a, SSERegister b) {
return { _mm_max_pd (a.value, b.value) }; }
100 VCTR_TARGET (
"sse4.1") static SSERegister
min (SSERegister a, SSERegister b) {
return { _mm_min_pd (a.value, b.value) }; }
105struct SSERegister<int32_t>
107 static constexpr size_t numElements = 4;
109 using NativeType = __m128i;
115 VCTR_TARGET (
"sse4.1") static SSERegister loadUnaligned (const int32_t* d) {
return { _mm_loadu_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
116 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const int32_t* d) {
return { _mm_load_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
117 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (int32_t x) {
return { _mm_set1_epi32 (x) }; }
121 VCTR_TARGET (
"sse4.1") void storeUnaligned (int32_t* d)
const { _mm_storeu_si128 (
reinterpret_cast<__m128i*
> (d), value); }
122 VCTR_TARGET (
"sse4.1") void storeAligned (int32_t* d)
const { _mm_store_si128 (
reinterpret_cast<__m128i*
> (d), value); }
129 VCTR_TARGET (
"sse4.1") static SSERegister
abs (SSERegister x) {
return { _mm_abs_epi32 (x.value) }; }
130 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_epi32 (a.value, b.value) }; }
131 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_epi32 (a.value, b.value) }; }
132 VCTR_TARGET (
"sse4.1") static SSERegister
max (SSERegister a, SSERegister b) {
return { _mm_max_epi32 (a.value, b.value) }; }
133 VCTR_TARGET (
"sse4.1") static SSERegister
min (SSERegister a, SSERegister b) {
return { _mm_min_epi32 (a.value, b.value) }; }
138struct SSERegister<uint32_t>
140 static constexpr size_t numElements = 4;
142 using NativeType = __m128i;
148 VCTR_TARGET (
"sse4.1") static SSERegister loadUnaligned (const uint32_t* d) {
return { _mm_loadu_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
149 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const uint32_t* d) {
return { _mm_load_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
150 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (uint32_t x) {
return { _mm_set1_epi32 ((int32_t) x) }; }
154 VCTR_TARGET (
"sse4.1") void storeUnaligned (uint32_t* d)
const { _mm_storeu_si128 (
reinterpret_cast<__m128i*
> (d), value); }
155 VCTR_TARGET (
"sse4.1") void storeAligned (uint32_t* d)
const { _mm_store_si128 (
reinterpret_cast<__m128i*
> (d), value); }
162 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_epi32 (a.value, b.value) }; }
163 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_epi32 (a.value, b.value) }; }
164 VCTR_TARGET (
"sse4.1") static SSERegister
max (SSERegister a, SSERegister b) {
return { _mm_max_epu32 (a.value, b.value) }; }
165 VCTR_TARGET (
"sse4.1") static SSERegister
min (SSERegister a, SSERegister b) {
return { _mm_min_epu32 (a.value, b.value) }; }
170struct SSERegister<int64_t>
172 static constexpr size_t numElements = 2;
174 using NativeType = __m128i;
180 VCTR_TARGET (
"sse4.1") static SSERegister loadUnaligned (const int64_t* d) {
return { _mm_loadu_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
181 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const int64_t* d) {
return { _mm_load_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
182 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (int64_t x) {
return { _mm_set1_epi64x (x) }; }
186 VCTR_TARGET (
"sse4.1") void storeUnaligned (int64_t* d)
const { _mm_storeu_si128 (
reinterpret_cast<__m128i*
> (d), value); }
187 VCTR_TARGET (
"sse4.1") void storeAligned (int64_t* d)
const { _mm_store_si128 (
reinterpret_cast<__m128i*
> (d), value); }
194 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_epi64 (a.value, b.value) }; }
195 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_epi64 (a.value, b.value) }; }
200struct SSERegister<uint64_t>
202 static constexpr size_t numElements = 2;
204 using NativeType = __m128i;
210 VCTR_TARGET (
"sse4.1") static SSERegister loadUnaligned (const uint64_t* d) {
return { _mm_loadu_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
211 VCTR_TARGET (
"sse4.1") static SSERegister loadAligned (const uint64_t* d) {
return { _mm_load_si128 (
reinterpret_cast<const __m128i*
> (d)) }; }
212 VCTR_TARGET (
"sse4.1") static SSERegister broadcast (uint64_t x) {
return { _mm_set1_epi64x ((int64_t) x) }; }
216 VCTR_TARGET (
"sse4.1") void storeUnaligned (uint64_t* d)
const { _mm_storeu_si128 (
reinterpret_cast<__m128i*
> (d), value); }
217 VCTR_TARGET (
"sse4.1") void storeAligned (uint64_t* d)
const { _mm_store_si128 (
reinterpret_cast<__m128i*
> (d), value); }
224 VCTR_TARGET (
"sse4.1") static SSERegister add (SSERegister a, SSERegister b) {
return { _mm_add_epi64 (a.value, b.value) }; }
225 VCTR_TARGET (
"sse4.1") static SSERegister sub (SSERegister a, SSERegister b) {
return { _mm_sub_epi64 (a.value, b.value) }; }
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:194
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:133
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:194
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: SSERegister.h:28