VCTR
Loading...
Searching...
No Matches
AVXRegister.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr
24{
25
26template <class T>
28{
29 static AVXRegister broadcast (const T&) { return {}; }
30};
31
32#if VCTR_X64
33
34template <>
35struct AVXRegister<float>
36{
37 static constexpr size_t numElements = 8;
38
39 using NativeType = __m256;
40 __m256 value;
41
42 //==============================================================================
43 // Loading
44 // clang-format off
45 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const float* d) { return { _mm256_loadu_ps (d) }; }
46 VCTR_TARGET ("avx") static AVXRegister loadAligned (const float* d) { return { _mm256_load_ps (d) }; }
47 VCTR_TARGET ("avx") static AVXRegister broadcast (float x) { return { _mm256_broadcast_ss (&x) }; }
48 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<float> a, SSERegister<float> b) { return { _mm256_set_m128 (a.value, b.value) }; }
49
50 //==============================================================================
51 // Storing
52 VCTR_TARGET ("avx") void storeUnaligned (float* d) const { _mm256_storeu_ps (d, value); }
53 VCTR_TARGET ("avx") void storeAligned (float* d) const { _mm256_store_ps (d, value); }
54
55 //==============================================================================
56 // Bit Operations
57 VCTR_TARGET ("avx") static AVXRegister andNot (AVXRegister a, AVXRegister b) { return { _mm256_andnot_ps (a.value, b.value) }; }
58
59 //==============================================================================
60 // Math
61 VCTR_TARGET ("avx") static AVXRegister mul (AVXRegister a, AVXRegister b) { return { _mm256_mul_ps (a.value, b.value) }; }
62 VCTR_TARGET ("avx") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_ps (a.value, b.value) }; }
63 VCTR_TARGET ("avx") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_ps (a.value, b.value) }; }
64 VCTR_TARGET ("avx") static AVXRegister div (AVXRegister a, AVXRegister b) { return { _mm256_div_ps (a.value, b.value) }; }
65 VCTR_TARGET ("avx") static AVXRegister max (AVXRegister a, AVXRegister b) { return { _mm256_max_ps (a.value, b.value) }; }
66 VCTR_TARGET ("avx") static AVXRegister min (AVXRegister a, AVXRegister b) { return { _mm256_min_ps (a.value, b.value) }; }
67 // clang-format on
68};
69
70template <>
71struct AVXRegister<double>
72{
73 static constexpr size_t numElements = 4;
74
75 using NativeType = __m256d;
76 __m256d value;
77
78 //==============================================================================
79 // Loading
80 // clang-format off
81 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const double* d) { return { _mm256_loadu_pd (d) }; }
82 VCTR_TARGET ("avx") static AVXRegister loadAligned (const double* d) { return { _mm256_load_pd (d) }; }
83 VCTR_TARGET ("avx") static AVXRegister broadcast (double x) { return { _mm256_broadcast_sd (&x) }; }
84 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<double> a, SSERegister<double> b) { return { _mm256_set_m128d (a.value, b.value) }; }
85
86 //==============================================================================
87 // Storing
88 VCTR_TARGET ("avx") void storeUnaligned (double* d) const { _mm256_storeu_pd (d, value); }
89 VCTR_TARGET ("avx") void storeAligned (double* d) const { _mm256_store_pd (d, value); }
90
91 //==============================================================================
92 // Bit Operations
93 VCTR_TARGET ("avx") static AVXRegister andNot (AVXRegister a, AVXRegister b) { return { _mm256_andnot_pd (a.value, b.value) }; }
94
95 //==============================================================================
96 // Math
97 VCTR_TARGET ("avx") static AVXRegister mul (AVXRegister a, AVXRegister b) { return { _mm256_mul_pd (a.value, b.value) }; }
98 VCTR_TARGET ("avx") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_pd (a.value, b.value) }; }
99 VCTR_TARGET ("avx") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_pd (a.value, b.value) }; }
100 VCTR_TARGET ("avx") static AVXRegister div (AVXRegister a, AVXRegister b) { return { _mm256_div_pd (a.value, b.value) }; }
101 VCTR_TARGET ("avx") static AVXRegister max (AVXRegister a, AVXRegister b) { return { _mm256_max_pd (a.value, b.value) }; }
102 VCTR_TARGET ("avx") static AVXRegister min (AVXRegister a, AVXRegister b) { return { _mm256_min_pd (a.value, b.value) }; }
103 // clang-format on
104};
105
106template <>
107struct AVXRegister<int32_t>
108{
109 static constexpr size_t numElements = 8;
110
111 using NativeType = __m256i;
112 __m256i value;
113
114 //==============================================================================
115 // Loading
116 // clang-format off
117 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const int32_t* d) { return { _mm256_loadu_si256 (reinterpret_cast<const __m256i*> (d)) }; }
118 VCTR_TARGET ("avx") static AVXRegister loadAligned (const int32_t* d) { return { _mm256_load_si256 (reinterpret_cast<const __m256i*> (d)) }; }
119 VCTR_TARGET ("avx") static AVXRegister broadcast (int32_t x) { return { _mm256_set1_epi32 (x) }; }
120 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<int32_t> a, SSERegister<int32_t> b) { return { _mm256_set_m128i (a.value, b.value) }; }
121
122 //==============================================================================
123 // Storing
124 VCTR_TARGET ("avx") void storeUnaligned (int32_t* d) const { _mm256_storeu_si256 (reinterpret_cast<__m256i*> (d), value); }
125 VCTR_TARGET ("avx") void storeAligned (int32_t* d) const { _mm256_store_si256 (reinterpret_cast<__m256i*> (d), value); }
126
127 //==============================================================================
128 // Bit Operations
129
130 //==============================================================================
131 // Math
132 VCTR_TARGET ("avx2") static AVXRegister abs (AVXRegister x) { return { _mm256_abs_epi32 (x.value) }; }
133 VCTR_TARGET ("avx2") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_epi32 (a.value, b.value) }; }
134 VCTR_TARGET ("avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_epi32 (a.value, b.value) }; }
135 VCTR_TARGET ("avx2") static AVXRegister max (AVXRegister a, AVXRegister b) { return { _mm256_max_epi32 (a.value, b.value) }; }
136 VCTR_TARGET ("avx2") static AVXRegister min (AVXRegister a, AVXRegister b) { return { _mm256_min_epi32 (a.value, b.value) }; }
137 // clang-format on
138};
139
140template <>
141struct AVXRegister<uint32_t>
142{
143 static constexpr size_t numElements = 8;
144
145 using NativeType = __m256i;
146 __m256i value;
147
148 //==============================================================================
149 // Loading
150 // clang-format off
151 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const uint32_t* d) { return { _mm256_loadu_si256 (reinterpret_cast<const __m256i*> (d)) }; }
152 VCTR_TARGET ("avx") static AVXRegister loadAligned (const uint32_t* d) { return { _mm256_load_si256 (reinterpret_cast<const __m256i*> (d)) }; }
153 VCTR_TARGET ("avx") static AVXRegister broadcast (uint32_t x) { return { _mm256_set1_epi32 ((int32_t) x) }; }
154 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<uint32_t> a, SSERegister<uint32_t> b) { return { _mm256_set_m128i (a.value, b.value) }; }
155
156 //==============================================================================
157 // Storing
158 VCTR_TARGET ("avx") void storeUnaligned (uint32_t* d) const { _mm256_storeu_si256 (reinterpret_cast<__m256i*> (d), value); }
159 VCTR_TARGET ("avx") void storeAligned (uint32_t* d) const { _mm256_store_si256 (reinterpret_cast<__m256i*> (d), value); }
160
161 //==============================================================================
162 // Bit Operations
163
164 //==============================================================================
165 // Math
166 VCTR_TARGET ("avx2") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_epi32 (a.value, b.value) }; }
167 VCTR_TARGET ("avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_epi32 (a.value, b.value) }; }
168 VCTR_TARGET ("avx2") static AVXRegister max (AVXRegister a, AVXRegister b) { return { _mm256_max_epu32 (a.value, b.value) }; }
169 VCTR_TARGET ("avx2") static AVXRegister min (AVXRegister a, AVXRegister b) { return { _mm256_min_epu32 (a.value, b.value) }; }
170 // clang-format on
171};
172
173template <>
174struct AVXRegister<int64_t>
175{
176 static constexpr size_t numElements = 4;
177
178 using NativeType = __m256i;
179 __m256i value;
180
181 //==============================================================================
182 // Loading
183 // clang-format off
184 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const int64_t* d) { return { _mm256_loadu_si256 (reinterpret_cast<const __m256i*> (d)) }; }
185 VCTR_TARGET ("avx") static AVXRegister loadAligned (const int64_t* d) { return { _mm256_load_si256 (reinterpret_cast<const __m256i*> (d)) }; }
186 VCTR_TARGET ("avx") static AVXRegister broadcast (int64_t x) { return { _mm256_set1_epi64x (x) }; }
187 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<int64_t> a, SSERegister<int64_t> b) { return { _mm256_set_m128i (a.value, b.value) }; }
188
189 //==============================================================================
190 // Storing
191 VCTR_TARGET ("avx") void storeUnaligned (int64_t* d) const { _mm256_storeu_si256 (reinterpret_cast<__m256i*> (d), value); }
192 VCTR_TARGET ("avx") void storeAligned (int64_t* d) const { _mm256_store_si256 (reinterpret_cast<__m256i*> (d), value); }
193
194 //==============================================================================
195 // Bit Operations
196
197 //==============================================================================
198 // Math
199 VCTR_TARGET ("avx2") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_epi64 (a.value, b.value) }; }
200 VCTR_TARGET ("avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_epi64 (a.value, b.value) }; }
201 // clang-format on
202};
203
204template <>
205struct AVXRegister<uint64_t>
206{
207 static constexpr size_t numElements = 4;
208
209 using NativeType = __m256i;
210 __m256i value;
211
212 //==============================================================================
213 // Loading
214 // clang-format off
215 VCTR_TARGET ("avx") static AVXRegister loadUnaligned (const uint64_t* d) { return { _mm256_loadu_si256 (reinterpret_cast<const __m256i*> (d)) }; }
216 VCTR_TARGET ("avx") static AVXRegister loadAligned (const uint64_t* d) { return { _mm256_load_si256 (reinterpret_cast<const __m256i*> (d)) }; }
217 VCTR_TARGET ("avx") static AVXRegister broadcast (uint64_t x) { return { _mm256_set1_epi64x ((int64_t) x) }; }
218 VCTR_TARGET ("avx") static AVXRegister fromSSE (SSERegister<uint64_t> a, SSERegister<uint64_t> b) { return { _mm256_set_m128i (a.value, b.value) }; }
219
220 //==============================================================================
221 // Storing
222 VCTR_TARGET ("avx") void storeUnaligned (uint64_t* d) const { _mm256_storeu_si256 (reinterpret_cast<__m256i*> (d), value); }
223 VCTR_TARGET ("avx") void storeAligned (uint64_t* d) const { _mm256_store_si256 (reinterpret_cast<__m256i*> (d), value); }
224
225 //==============================================================================
226 // Bit Operations
227
228 //==============================================================================
229 // Math
230 VCTR_TARGET ("avx2") static AVXRegister add (AVXRegister a, AVXRegister b) { return { _mm256_add_epi64 (a.value, b.value) }; }
231 VCTR_TARGET ("avx2") static AVXRegister sub (AVXRegister a, AVXRegister b) { return { _mm256_sub_epi64 (a.value, b.value) }; }
232 // clang-format on
233};
234
235#endif
236
237} // namespace vctr
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:194
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:133
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:194
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: AVXRegister.h:28