VCTR
Loading...
Searching...
No Matches
Config.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23//==============================================================================
24// Platform defines
25//==============================================================================
26#if defined(_WIN32) || defined(_WIN64)
27#define VCTR_WINDOWS 1
28#elif defined(LINUX) || defined(__linux__)
29#define VCTR_LINUX 1
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
31#define VCTR_APPLE 1
32#elif defined(__wasm__)
33#define VCTR_WASM 1
34#endif
35
36#ifndef VCTR_WINDOWS
37#define VCTR_WINDOWS 0
38#endif
39
40#ifndef VCTR_LINUX
41#define VCTR_LINUX 0
42#endif
43
44#ifndef VCTR_APPLE
45#define VCTR_APPLE 0
46#endif
47
48#ifndef VCTR_WASM
49#define VCTR_WASM 0
50#endif
51
52//==============================================================================
53// CPU architecture defines
54//==============================================================================
55#if defined(__aarch64__) || defined(_M_ARM64)
56#define VCTR_ARM 1
57#define VCTR_X64 0
58#define VCTR_MAX_SIMD_REGISTER_SIZE 16
59#else
60#define VCTR_ARM 0
61#define VCTR_X64 1
62#define VCTR_MAX_SIMD_REGISTER_SIZE 32
63#endif
64
65//==============================================================================
66// Compiler defines
67//==============================================================================
68#if defined(__clang__)
69#define VCTR_CLANG 1
70#define VCTR_COMPILER_NAME clang
71#elif defined(__GNUC__)
72#define VCTR_GCC 1
73#define VCTR_COMPILER_NAME GCC
74#elif defined(_MSC_VER)
75#define VCTR_MSVC 1
76#define VCTR_COMPILER_NAME MSVC
77// Since Visual Studio 2019, there is builtin SVML support in MSVC
78// https://devblogs.microsoft.com/cppblog/msvc-backend-updates-in-visual-studio-2019-preview-2/
79#define VCTR_HAS_SVML (_MSC_VER >= 1921)
80#endif
81
82#ifndef VCTR_CLANG
83#define VCTR_CLANG 0
84#endif
85
86#ifndef VCTR_GCC
87#define VCTR_GCC 0
88#endif
89
90#ifndef VCTR_MSVC
91#define VCTR_MSVC 0
92#endif
93
94#ifndef VCTR_HAS_SVML
95#define VCTR_HAS_SVML 0
96#endif
97
98//==============================================================================
99// Build type defines
100//==============================================================================
101#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
102#define VCTR_DEBUG 1
103#else
104#define VCTR_DEBUG 0
105#endif
106
107//==============================================================================
108// User supplied defines
109//==============================================================================
110
115#ifndef VCTR_USE_IPP
116#if __has_include(<ipp.h>)
117#define VCTR_USE_IPP 1
118#else
119#define VCTR_USE_IPP 0
120#endif
121#endif
122
127#ifndef VCTR_USE_GCEM
128#if __has_include(<gcem.hpp>)
129#define VCTR_USE_GCEM 1
130#else
131#define VCTR_USE_GCEM 0
132#endif
133#endif
134
144#ifndef VCTR_ALIGNED_ARRAY
145#define VCTR_ALIGNED_ARRAY 1
146#endif
147
148#if VCTR_WINDOWS
149// This is needed for the CPU instruction set check found below
150#include <intrin.h>
151#endif
152
153namespace vctr
154{
155
157{
158 uint16_t sse4_1 : 1;
159
160 uint16_t avx : 1;
161
162 // Note: CPUs that support AVX2 always support FMA and AVX
163 uint16_t avx2 : 1;
164
165 // Note: CPUs that support FMA always support AVX
166 uint16_t fma : 1;
167
168 uint16_t neon : 1;
169};
170
171#if VCTR_WINDOWS
172namespace detail
173{
175class X64InstructionSets
176{
177private:
178 struct CPUFeatureFinder
179 {
180 CPUFeatureFinder()
181 {
182 std::array<int, 4> cpui;
183 std::vector<std::array<int, 4>> data, extdata;
184
185 // Calling __cpuid with 0x0 as the function_id argument
186 // gets the number of the highest valid function ID.
187 __cpuid (cpui.data(), 0);
188 auto nIds = cpui[0];
189
190 for (int i = 0; i <= nIds; ++i)
191 {
192 __cpuidex (cpui.data(), i, 0);
193 data.push_back (cpui);
194 }
195
196 // load bitset with flags for function 0x00000001
197 if (nIds >= 1)
198 {
199 f_1_ECX_ = data[1][2];
200 f_1_EDX_ = data[1][3];
201 }
202
203 // load bitset with flags for function 0x00000007
204 if (nIds >= 7)
205 {
206 f_7_EBX_ = data[7][1];
207 f_7_ECX_ = data[7][2];
208 }
209
210 // Calling __cpuid with 0x80000000 as the function_id argument
211 // gets the number of the highest valid extended ID.
212 __cpuid (cpui.data(), 0x80000000);
213 auto nExIds = cpui[0];
214
215 for (int i = 0x80000000; i <= nExIds; ++i)
216 {
217 __cpuidex (cpui.data(), i, 0);
218 extdata.push_back (cpui);
219 }
220
221 // load bitset with flags for function 0x80000001
222 if (nExIds >= 0x80000001)
223 {
224 f_81_ECX_ = extdata[1][2];
225 f_81_EDX_ = extdata[1][3];
226 }
227 };
228
229 std::bitset<32> f_1_ECX_;
230 std::bitset<32> f_1_EDX_;
231 std::bitset<32> f_7_EBX_;
232 std::bitset<32> f_7_ECX_;
233 std::bitset<32> f_81_ECX_;
234 std::bitset<32> f_81_EDX_;
235 };
236
237 inline static const CPUFeatureFinder cpuFeatures;
238
239public:
240 static bool hasFMA() { return cpuFeatures.f_1_ECX_[12]; }
241 static bool hasSSE41() { return cpuFeatures.f_1_ECX_[19]; }
242 static bool hasSSE42() { return cpuFeatures.f_1_ECX_[20]; }
243 static bool hasAVX() { return cpuFeatures.f_1_ECX_[28]; }
244 static bool hasAVX2() { return cpuFeatures.f_7_EBX_[5]; }
245 static bool hasAVX512F() { return cpuFeatures.f_7_EBX_[16]; }
246
247private:
248};
249} // namespace detail
250
251inline CPUInstructionSets getSupportedCPUInstructionSets()
252{
253 return {
254 .sse4_1 = detail::X64InstructionSets::hasSSE41(),
255 .avx = detail::X64InstructionSets::hasAVX(),
256 .avx2 = detail::X64InstructionSets::hasAVX2(),
257 .fma = detail::X64InstructionSets::hasFMA(),
258 .neon = false
259 };
260}
261
262#elif VCTR_ARM
263
264constexpr CPUInstructionSets getSupportedCPUInstructionSets()
265{
266 return {
267 .sse4_1 = false,
268 .avx = false,
269 .avx2 = false,
270 .fma = false,
271 .neon = true
272 };
273}
274
275#else
276
277inline CPUInstructionSets getSupportedCPUInstructionSets()
278{
279 __builtin_cpu_init();
280
281 return {
282 .sse4_1 = __builtin_cpu_supports ("sse4.1"),
283 .avx = __builtin_cpu_supports ("avx"),
284 .avx2 = __builtin_cpu_supports ("avx2"),
285 .fma = __builtin_cpu_supports ("fma"),
286 .neon = false
287 };
288}
289
290#endif
291
292namespace detail
293{
294
296template <bool... settings>
297consteval size_t trueCount()
298{
299 return (size_t (settings) + ...);
300}
301
302} // namespace detail
303
304struct Config
305{
306 static const inline auto supportedCPUInstructionSets = getSupportedCPUInstructionSets();
307
308 //==============================================================================
309 // Platform config
310 //==============================================================================
311 static constexpr bool platformWindows = VCTR_WINDOWS;
312
313 static constexpr bool platformApple = VCTR_APPLE;
314
315 static constexpr bool platformLinux = VCTR_LINUX;
316
317 static constexpr bool platformWasm = VCTR_WASM;
318
319 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1, "Unsupported platform or platform detection error");
320
321 //==============================================================================
322 // CPU architecture config
323 //==============================================================================
324 static constexpr bool archARM = VCTR_ARM;
325
326 static constexpr bool archX64 = VCTR_X64;
327
328 static_assert (detail::trueCount<archARM, archX64>() == 1, "Unsupported architecture or architecture detection error");
329
330 //==============================================================================
331 // Compiler config
332 //==============================================================================
333 static constexpr bool compilerClang = VCTR_CLANG;
334
335 static constexpr bool compilerGCC = VCTR_GCC;
336
337 static constexpr bool compilerMSVC = VCTR_MSVC;
338
339 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1, "Unsupported compiler or compiler detection error");
340
341 static constexpr bool hasSVML = VCTR_HAS_SVML;
342
343 //==============================================================================
344 // User supplied config
345 //==============================================================================
346 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
347
348 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
349
350 //==============================================================================
351 // Auto generated config
352 //==============================================================================
353 static constexpr size_t maxSIMDRegisterSize = VCTR_MAX_SIMD_REGISTER_SIZE;
354};
355
356} // namespace vctr
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: Config.h:157
Definition: Config.h:305