29 static AVXRegister broadcast (
const T&) {
return {}; }
37 static constexpr size_t numElements = 8;
39 using NativeType = __m256;
45 VCTR_TARGET (
"avx") static
AVXRegister loadUnaligned (const
float* d) {
return { _mm256_loadu_ps (d) }; }
46 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const
float* d) {
return { _mm256_load_ps (d) }; }
47 VCTR_TARGET (
"avx") static AVXRegister broadcast (
float x) {
return { _mm256_broadcast_ss (&x) }; }
48 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<
float> a, SSERegister<
float> b) {
return { _mm256_set_m128 (a.value, b.value) }; }
52 VCTR_TARGET (
"avx") void storeUnaligned (
float* d)
const { _mm256_storeu_ps (d, value); }
53 VCTR_TARGET (
"avx") void storeAligned (
float* d)
const { _mm256_store_ps (d, value); }
57 VCTR_TARGET (
"avx") static AVXRegister andNot (AVXRegister a, AVXRegister b) {
return { _mm256_andnot_ps (a.value, b.value) }; }
61 VCTR_TARGET (
"avx") static AVXRegister mul (AVXRegister a, AVXRegister b) {
return { _mm256_mul_ps (a.value, b.value) }; }
62 VCTR_TARGET (
"avx") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_ps (a.value, b.value) }; }
63 VCTR_TARGET (
"avx") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_ps (a.value, b.value) }; }
64 VCTR_TARGET (
"avx") static AVXRegister div (AVXRegister a, AVXRegister b) {
return { _mm256_div_ps (a.value, b.value) }; }
65 VCTR_TARGET (
"avx") static AVXRegister
max (AVXRegister a, AVXRegister b) {
return { _mm256_max_ps (a.value, b.value) }; }
66 VCTR_TARGET (
"avx") static AVXRegister
min (AVXRegister a, AVXRegister b) {
return { _mm256_min_ps (a.value, b.value) }; }
71struct AVXRegister<double>
73 static constexpr size_t numElements = 4;
75 using NativeType = __m256d;
81 VCTR_TARGET (
"avx") static AVXRegister loadUnaligned (const
double* d) {
return { _mm256_loadu_pd (d) }; }
82 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const
double* d) {
return { _mm256_load_pd (d) }; }
83 VCTR_TARGET (
"avx") static AVXRegister broadcast (
double x) {
return { _mm256_broadcast_sd (&x) }; }
84 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<
double> a, SSERegister<
double> b) {
return { _mm256_set_m128d (a.value, b.value) }; }
88 VCTR_TARGET (
"avx") void storeUnaligned (
double* d)
const { _mm256_storeu_pd (d, value); }
89 VCTR_TARGET (
"avx") void storeAligned (
double* d)
const { _mm256_store_pd (d, value); }
93 VCTR_TARGET (
"avx") static AVXRegister andNot (AVXRegister a, AVXRegister b) {
return { _mm256_andnot_pd (a.value, b.value) }; }
97 VCTR_TARGET (
"avx") static AVXRegister mul (AVXRegister a, AVXRegister b) {
return { _mm256_mul_pd (a.value, b.value) }; }
98 VCTR_TARGET (
"avx") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_pd (a.value, b.value) }; }
99 VCTR_TARGET (
"avx") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_pd (a.value, b.value) }; }
100 VCTR_TARGET (
"avx") static AVXRegister div (AVXRegister a, AVXRegister b) {
return { _mm256_div_pd (a.value, b.value) }; }
101 VCTR_TARGET (
"avx") static AVXRegister
max (AVXRegister a, AVXRegister b) {
return { _mm256_max_pd (a.value, b.value) }; }
102 VCTR_TARGET (
"avx") static AVXRegister
min (AVXRegister a, AVXRegister b) {
return { _mm256_min_pd (a.value, b.value) }; }
107struct AVXRegister<int32_t>
109 static constexpr size_t numElements = 8;
111 using NativeType = __m256i;
117 VCTR_TARGET (
"avx") static AVXRegister loadUnaligned (const int32_t* d) {
return { _mm256_loadu_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
118 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const int32_t* d) {
return { _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
119 VCTR_TARGET (
"avx") static AVXRegister broadcast (int32_t x) {
return { _mm256_set1_epi32 (x) }; }
120 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<int32_t> a, SSERegister<int32_t> b) {
return { _mm256_set_m128i (a.value, b.value) }; }
124 VCTR_TARGET (
"avx") void storeUnaligned (int32_t* d)
const { _mm256_storeu_si256 (
reinterpret_cast<__m256i*
> (d), value); }
125 VCTR_TARGET (
"avx") void storeAligned (int32_t* d)
const { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (d), value); }
132 VCTR_TARGET (
"avx2") static AVXRegister
abs (AVXRegister x) {
return { _mm256_abs_epi32 (x.value) }; }
133 VCTR_TARGET (
"avx2") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_epi32 (a.value, b.value) }; }
134 VCTR_TARGET (
"avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_epi32 (a.value, b.value) }; }
135 VCTR_TARGET (
"avx2") static AVXRegister
max (AVXRegister a, AVXRegister b) {
return { _mm256_max_epi32 (a.value, b.value) }; }
136 VCTR_TARGET (
"avx2") static AVXRegister
min (AVXRegister a, AVXRegister b) {
return { _mm256_min_epi32 (a.value, b.value) }; }
141struct AVXRegister<uint32_t>
143 static constexpr size_t numElements = 8;
145 using NativeType = __m256i;
151 VCTR_TARGET (
"avx") static AVXRegister loadUnaligned (const uint32_t* d) {
return { _mm256_loadu_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
152 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const uint32_t* d) {
return { _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
153 VCTR_TARGET (
"avx") static AVXRegister broadcast (uint32_t x) {
return { _mm256_set1_epi32 ((int32_t) x) }; }
154 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<uint32_t> a, SSERegister<uint32_t> b) {
return { _mm256_set_m128i (a.value, b.value) }; }
158 VCTR_TARGET (
"avx") void storeUnaligned (uint32_t* d)
const { _mm256_storeu_si256 (
reinterpret_cast<__m256i*
> (d), value); }
159 VCTR_TARGET (
"avx") void storeAligned (uint32_t* d)
const { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (d), value); }
166 VCTR_TARGET (
"avx2") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_epi32 (a.value, b.value) }; }
167 VCTR_TARGET (
"avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_epi32 (a.value, b.value) }; }
168 VCTR_TARGET (
"avx2") static AVXRegister
max (AVXRegister a, AVXRegister b) {
return { _mm256_max_epu32 (a.value, b.value) }; }
169 VCTR_TARGET (
"avx2") static AVXRegister
min (AVXRegister a, AVXRegister b) {
return { _mm256_min_epu32 (a.value, b.value) }; }
174struct AVXRegister<int64_t>
176 static constexpr size_t numElements = 4;
178 using NativeType = __m256i;
184 VCTR_TARGET (
"avx") static AVXRegister loadUnaligned (const int64_t* d) {
return { _mm256_loadu_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
185 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const int64_t* d) {
return { _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
186 VCTR_TARGET (
"avx") static AVXRegister broadcast (int64_t x) {
return { _mm256_set1_epi64x (x) }; }
187 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<int64_t> a, SSERegister<int64_t> b) {
return { _mm256_set_m128i (a.value, b.value) }; }
191 VCTR_TARGET (
"avx") void storeUnaligned (int64_t* d)
const { _mm256_storeu_si256 (
reinterpret_cast<__m256i*
> (d), value); }
192 VCTR_TARGET (
"avx") void storeAligned (int64_t* d)
const { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (d), value); }
199 VCTR_TARGET (
"avx2") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_epi64 (a.value, b.value) }; }
200 VCTR_TARGET (
"avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_epi64 (a.value, b.value) }; }
205struct AVXRegister<uint64_t>
207 static constexpr size_t numElements = 4;
209 using NativeType = __m256i;
215 VCTR_TARGET (
"avx") static AVXRegister loadUnaligned (const uint64_t* d) {
return { _mm256_loadu_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
216 VCTR_TARGET (
"avx") static AVXRegister loadAligned (const uint64_t* d) {
return { _mm256_load_si256 (
reinterpret_cast<const __m256i*
> (d)) }; }
217 VCTR_TARGET (
"avx") static AVXRegister broadcast (uint64_t x) {
return { _mm256_set1_epi64x ((int64_t) x) }; }
218 VCTR_TARGET (
"avx") static AVXRegister fromSSE (SSERegister<uint64_t> a, SSERegister<uint64_t> b) {
return { _mm256_set_m128i (a.value, b.value) }; }
222 VCTR_TARGET (
"avx") void storeUnaligned (uint64_t* d)
const { _mm256_storeu_si256 (
reinterpret_cast<__m256i*
> (d), value); }
223 VCTR_TARGET (
"avx") void storeAligned (uint64_t* d)
const { _mm256_store_si256 (
reinterpret_cast<__m256i*
> (d), value); }
230 VCTR_TARGET (
"avx2") static AVXRegister add (AVXRegister a, AVXRegister b) {
return { _mm256_add_epi64 (a.value, b.value) }; }
231 VCTR_TARGET (
"avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) {
return { _mm256_sub_epi64 (a.value, b.value) }; }
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:194
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:133
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:194
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: AVXRegister.h:28