VCTR
Loading...
Searching...
No Matches
SSERegister.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23namespace vctr
24{
25
26template <class T>
28{
29 static SSERegister broadcast (const T&) { return {}; }
30};
31
32#if VCTR_X64
33
34template <>
35struct SSERegister<float>
36{
37 static constexpr size_t numElements = 4;
38
39 using NativeType = __m128;
40 __m128 value;
41
42 //==============================================================================
43 // Loading
44 // clang-format off
45 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const float* d) { return { _mm_loadu_ps (d) }; }
46 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const float* d) { return { _mm_load_ps (d) }; }
47 VCTR_TARGET ("sse4.1") static SSERegister broadcast (float x) { return { _mm_load1_ps (&x) }; }
48
49 //==============================================================================
50 // Storing
51 VCTR_TARGET ("sse4.1") void storeUnaligned (float* d) const { _mm_storeu_ps (d, value); }
52 VCTR_TARGET ("sse4.1") void storeAligned (float* d) const { _mm_store_ps (d, value); }
53
54 //==============================================================================
55 // Bit Operations
56 VCTR_TARGET ("sse4.1") static SSERegister andNot (SSERegister a, SSERegister b) { return { _mm_andnot_ps (a.value, b.value) }; }
57
58 //==============================================================================
59 // Math
60 VCTR_TARGET ("sse4.1") static SSERegister mul (SSERegister a, SSERegister b) { return { _mm_mul_ps (a.value, b.value) }; }
61 VCTR_TARGET ("sse4.1") static SSERegister div (SSERegister a, SSERegister b) { return { _mm_div_ps (a.value, b.value) }; }
62 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_ps (a.value, b.value) }; }
63 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_ps (a.value, b.value) }; }
64 VCTR_TARGET ("sse4.1") static SSERegister max (SSERegister a, SSERegister b) { return { _mm_max_ps (a.value, b.value) }; }
65 VCTR_TARGET ("sse4.1") static SSERegister min (SSERegister a, SSERegister b) { return { _mm_min_ps (a.value, b.value) }; }
66 // clang-format on
67};
68
69template <>
70struct SSERegister<double>
71{
72 static constexpr size_t numElements = 2;
73
74 using NativeType = __m128d;
75 __m128d value;
76
77 //==============================================================================
78 // Loading
79 // clang-format off
80 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const double* d) { return { _mm_loadu_pd (d) }; }
81 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const double* d) { return { _mm_load_pd (d) }; }
82 VCTR_TARGET ("sse4.1") static SSERegister broadcast (double x) { return { _mm_load1_pd (&x) }; }
83
84 //==============================================================================
85 // Storing
86 VCTR_TARGET ("sse4.1") void storeUnaligned (double* d) const { _mm_storeu_pd (d, value); }
87 VCTR_TARGET ("sse4.1") void storeAligned (double* d) const { _mm_store_pd (d, value); }
88
89 //==============================================================================
90 // Bit Operations
91 VCTR_TARGET ("sse4.1") static SSERegister andNot (SSERegister a, SSERegister b) { return { _mm_andnot_pd (a.value, b.value) }; }
92
93 //==============================================================================
94 // Math
95 VCTR_TARGET ("sse4.1") static SSERegister mul (SSERegister a, SSERegister b) { return { _mm_mul_pd (a.value, b.value) }; }
96 VCTR_TARGET ("sse4.1") static SSERegister div (SSERegister a, SSERegister b) { return { _mm_div_pd (a.value, b.value) }; }
97 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_pd (a.value, b.value) }; }
98 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_pd (a.value, b.value) }; }
99 VCTR_TARGET ("sse4.1") static SSERegister max (SSERegister a, SSERegister b) { return { _mm_max_pd (a.value, b.value) }; }
100 VCTR_TARGET ("sse4.1") static SSERegister min (SSERegister a, SSERegister b) { return { _mm_min_pd (a.value, b.value) }; }
101 // clang-format on
102};
103
104template <>
105struct SSERegister<int32_t>
106{
107 static constexpr size_t numElements = 4;
108
109 using NativeType = __m128i;
110 __m128i value;
111
112 //==============================================================================
113 // Loading
114 // clang-format off
115 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const int32_t* d) { return { _mm_loadu_si128 (reinterpret_cast<const __m128i*> (d)) }; }
116 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const int32_t* d) { return { _mm_load_si128 (reinterpret_cast<const __m128i*> (d)) }; }
117 VCTR_TARGET ("sse4.1") static SSERegister broadcast (int32_t x) { return { _mm_set1_epi32 (x) }; }
118
119 //==============================================================================
120 // Storing
121 VCTR_TARGET ("sse4.1") void storeUnaligned (int32_t* d) const { _mm_storeu_si128 (reinterpret_cast<__m128i*> (d), value); }
122 VCTR_TARGET ("sse4.1") void storeAligned (int32_t* d) const { _mm_store_si128 (reinterpret_cast<__m128i*> (d), value); }
123
124 //==============================================================================
125 // Bit Operations
126
127 //==============================================================================
128 // Math
129 VCTR_TARGET ("sse4.1") static SSERegister abs (SSERegister x) { return { _mm_abs_epi32 (x.value) }; }
130 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_epi32 (a.value, b.value) }; }
131 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_epi32 (a.value, b.value) }; }
132 VCTR_TARGET ("sse4.1") static SSERegister max (SSERegister a, SSERegister b) { return { _mm_max_epi32 (a.value, b.value) }; }
133 VCTR_TARGET ("sse4.1") static SSERegister min (SSERegister a, SSERegister b) { return { _mm_min_epi32 (a.value, b.value) }; }
134 // clang-format on
135};
136
137template <>
138struct SSERegister<uint32_t>
139{
140 static constexpr size_t numElements = 4;
141
142 using NativeType = __m128i;
143 __m128i value;
144
145 //==============================================================================
146 // Loading
147 // clang-format off
148 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const uint32_t* d) { return { _mm_loadu_si128 (reinterpret_cast<const __m128i*> (d)) }; }
149 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const uint32_t* d) { return { _mm_load_si128 (reinterpret_cast<const __m128i*> (d)) }; }
150 VCTR_TARGET ("sse4.1") static SSERegister broadcast (uint32_t x) { return { _mm_set1_epi32 ((int32_t) x) }; }
151
152 //==============================================================================
153 // Storing
154 VCTR_TARGET ("sse4.1") void storeUnaligned (uint32_t* d) const { _mm_storeu_si128 (reinterpret_cast<__m128i*> (d), value); }
155 VCTR_TARGET ("sse4.1") void storeAligned (uint32_t* d) const { _mm_store_si128 (reinterpret_cast<__m128i*> (d), value); }
156
157 //==============================================================================
158 // Bit Operations
159
160 //==============================================================================
161 // Math
162 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_epi32 (a.value, b.value) }; }
163 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_epi32 (a.value, b.value) }; }
164 VCTR_TARGET ("sse4.1") static SSERegister max (SSERegister a, SSERegister b) { return { _mm_max_epu32 (a.value, b.value) }; }
165 VCTR_TARGET ("sse4.1") static SSERegister min (SSERegister a, SSERegister b) { return { _mm_min_epu32 (a.value, b.value) }; }
166 // clang-format on
167};
168
169template <>
170struct SSERegister<int64_t>
171{
172 static constexpr size_t numElements = 2;
173
174 using NativeType = __m128i;
175 __m128i value;
176
177 //==============================================================================
178 // Loading
179 // clang-format off
180 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const int64_t* d) { return { _mm_loadu_si128 (reinterpret_cast<const __m128i*> (d)) }; }
181 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const int64_t* d) { return { _mm_load_si128 (reinterpret_cast<const __m128i*> (d)) }; }
182 VCTR_TARGET ("sse4.1") static SSERegister broadcast (int64_t x) { return { _mm_set1_epi64x (x) }; }
183
184 //==============================================================================
185 // Storing
186 VCTR_TARGET ("sse4.1") void storeUnaligned (int64_t* d) const { _mm_storeu_si128 (reinterpret_cast<__m128i*> (d), value); }
187 VCTR_TARGET ("sse4.1") void storeAligned (int64_t* d) const { _mm_store_si128 (reinterpret_cast<__m128i*> (d), value); }
188
189 //==============================================================================
190 // Bit Operations
191
192 //==============================================================================
193 // Math
194 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_epi64 (a.value, b.value) }; }
195 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_epi64 (a.value, b.value) }; }
196 // clang-format on
197};
198
199template <>
200struct SSERegister<uint64_t>
201{
202 static constexpr size_t numElements = 2;
203
204 using NativeType = __m128i;
205 __m128i value;
206
207 //==============================================================================
208 // Loading
209 // clang-format off
210 VCTR_TARGET ("sse4.1") static SSERegister loadUnaligned (const uint64_t* d) { return { _mm_loadu_si128 (reinterpret_cast<const __m128i*> (d)) }; }
211 VCTR_TARGET ("sse4.1") static SSERegister loadAligned (const uint64_t* d) { return { _mm_load_si128 (reinterpret_cast<const __m128i*> (d)) }; }
212 VCTR_TARGET ("sse4.1") static SSERegister broadcast (uint64_t x) { return { _mm_set1_epi64x ((int64_t) x) }; }
213
214 //==============================================================================
215 // Storing
216 VCTR_TARGET ("sse4.1") void storeUnaligned (uint64_t* d) const { _mm_storeu_si128 (reinterpret_cast<__m128i*> (d), value); }
217 VCTR_TARGET ("sse4.1") void storeAligned (uint64_t* d) const { _mm_store_si128 (reinterpret_cast<__m128i*> (d), value); }
218
219 //==============================================================================
220 // Bit Operations
221
222 //==============================================================================
223 // Math
224 VCTR_TARGET ("sse4.1") static SSERegister add (SSERegister a, SSERegister b) { return { _mm_add_epi64 (a.value, b.value) }; }
225 VCTR_TARGET ("sse4.1") static SSERegister sub (SSERegister a, SSERegister b) { return { _mm_sub_epi64 (a.value, b.value) }; }
226 // clang-format on
227};
228
229#endif
230
231} // namespace vctr
constexpr ExpressionChainBuilder< expressions::Max > max
Computes the maximum value of the source values.
Definition: Max.h:194
constexpr ExpressionChainBuilder< expressions::Abs > abs
Computes the absolute value of the source values.
Definition: Abs.h:133
constexpr ExpressionChainBuilder< expressions::Min > min
Computes the minimum value of the source values.
Definition: Min.h:194
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: SSERegister.h:28