VCTR
Loading...
Searching...
No Matches
Config.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23//==============================================================================
24// Platform defines
25//==============================================================================
26#if defined(_WIN32) || defined(_WIN64)
27#define VCTR_WINDOWS 1
28#elif defined(LINUX) || defined(__linux__)
29#define VCTR_LINUX 1
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
31#define VCTR_APPLE 1
32#elif defined(__wasm__)
33#define VCTR_WASM 1
34#endif
35
36#ifndef VCTR_WINDOWS
37#define VCTR_WINDOWS 0
38#endif
39
40#ifndef VCTR_LINUX
41#define VCTR_LINUX 0
42#endif
43
44#ifndef VCTR_APPLE
45#define VCTR_APPLE 0
46#endif
47
48#ifndef VCTR_WASM
49#define VCTR_WASM 0
50#endif
51
52//==============================================================================
53// CPU architecture defines
54//==============================================================================
55#if defined(__aarch64__) || defined(_M_ARM64)
56#define VCTR_ARM 1
57#define VCTR_X64 0
58#define VCTR_MAX_SIMD_REGISTER_SIZE 16
59#else
60#define VCTR_ARM 0
61#define VCTR_X64 1
62#define VCTR_MAX_SIMD_REGISTER_SIZE 32
63#endif
64
65//==============================================================================
66// Compiler defines
67//==============================================================================
68#if defined(__clang__)
69#define VCTR_CLANG 1
70#define VCTR_COMPILER_NAME clang
71#elif defined(__GNUC__)
72#define VCTR_GCC 1
73#define VCTR_COMPILER_NAME GCC
74#elif defined(_MSC_VER)
75#define VCTR_MSVC 1
76#define VCTR_COMPILER_NAME MSVC
77#endif
78
79#ifndef VCTR_CLANG
80#define VCTR_CLANG 0
81#endif
82
83#ifndef VCTR_GCC
84#define VCTR_GCC 0
85#endif
86
87#ifndef VCTR_MSVC
88#define VCTR_MSVC 0
89#endif
90
91//==============================================================================
92// Build type defines
93//==============================================================================
94#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
95#define VCTR_DEBUG 1
96#else
97#define VCTR_DEBUG 0
98#endif
99
100//==============================================================================
101// User supplied defines
102//==============================================================================
103
108#ifndef VCTR_USE_IPP
109#if __has_include(<ipp.h>)
110#define VCTR_USE_IPP 1
111#else
112#define VCTR_USE_IPP 0
113#endif
114#endif
115
120#ifndef VCTR_USE_GCEM
121#if __has_include(<gcem.hpp>)
122#define VCTR_USE_GCEM 1
123#else
124#define VCTR_USE_GCEM 0
125#endif
126#endif
127
137#ifndef VCTR_ALIGNED_ARRAY
138#define VCTR_ALIGNED_ARRAY 1
139#endif
140
141#if VCTR_WINDOWS
142// This is needed for the CPU instruction set check found below
143#include <intrin.h>
144#endif
145
146namespace vctr
147{
148
150{
151 uint16_t sse4_1 : 1;
152
153 uint16_t avx : 1;
154
155 // Note: CPUs that support AVX2 always support FMA and AVX
156 uint16_t avx2 : 1;
157
158 // Note: CPUs that support FMA always support AVX
159 uint16_t fma : 1;
160
161 uint16_t neon : 1;
162};
163
164#if VCTR_WINDOWS
165namespace detail
166{
168class X64InstructionSets
169{
170private:
171 struct CPUFeatureFinder
172 {
173 CPUFeatureFinder()
174 {
175 std::array<int, 4> cpui;
176 std::vector<std::array<int, 4>> data, extdata;
177
178 // Calling __cpuid with 0x0 as the function_id argument
179 // gets the number of the highest valid function ID.
180 __cpuid (cpui.data(), 0);
181 auto nIds = cpui[0];
182
183 for (int i = 0; i <= nIds; ++i)
184 {
185 __cpuidex (cpui.data(), i, 0);
186 data.push_back (cpui);
187 }
188
189 // load bitset with flags for function 0x00000001
190 if (nIds >= 1)
191 {
192 f_1_ECX_ = data[1][2];
193 f_1_EDX_ = data[1][3];
194 }
195
196 // load bitset with flags for function 0x00000007
197 if (nIds >= 7)
198 {
199 f_7_EBX_ = data[7][1];
200 f_7_ECX_ = data[7][2];
201 }
202
203 // Calling __cpuid with 0x80000000 as the function_id argument
204 // gets the number of the highest valid extended ID.
205 __cpuid (cpui.data(), 0x80000000);
206 auto nExIds = cpui[0];
207
208 for (int i = 0x80000000; i <= nExIds; ++i)
209 {
210 __cpuidex (cpui.data(), i, 0);
211 extdata.push_back (cpui);
212 }
213
214 // load bitset with flags for function 0x80000001
215 if (nExIds >= 0x80000001)
216 {
217 f_81_ECX_ = extdata[1][2];
218 f_81_EDX_ = extdata[1][3];
219 }
220 };
221
222 std::bitset<32> f_1_ECX_;
223 std::bitset<32> f_1_EDX_;
224 std::bitset<32> f_7_EBX_;
225 std::bitset<32> f_7_ECX_;
226 std::bitset<32> f_81_ECX_;
227 std::bitset<32> f_81_EDX_;
228 };
229
230 inline static const CPUFeatureFinder cpuFeatures;
231
232public:
233 static bool hasFMA() { return cpuFeatures.f_1_ECX_[12]; }
234 static bool hasSSE41() { return cpuFeatures.f_1_ECX_[19]; }
235 static bool hasSSE42() { return cpuFeatures.f_1_ECX_[20]; }
236 static bool hasAVX() { return cpuFeatures.f_1_ECX_[28]; }
237 static bool hasAVX2() { return cpuFeatures.f_7_EBX_[5]; }
238 static bool hasAVX512F() { return cpuFeatures.f_7_EBX_[16]; }
239
240private:
241};
242} // namespace detail
243
244inline CPUInstructionSets getSupportedCPUInstructionSets()
245{
246 return {
247 .sse4_1 = detail::X64InstructionSets::hasSSE41(),
248 .avx = detail::X64InstructionSets::hasAVX(),
249 .avx2 = detail::X64InstructionSets::hasAVX2(),
250 .fma = detail::X64InstructionSets::hasFMA(),
251 .neon = false
252 };
253}
254
255#elif VCTR_ARM
256
257constexpr CPUInstructionSets getSupportedCPUInstructionSets()
258{
259 return {
260 .sse4_1 = false,
261 .avx = false,
262 .avx2 = false,
263 .fma = false,
264 .neon = true
265 };
266}
267
268#else
269
270inline CPUInstructionSets getSupportedCPUInstructionSets()
271{
272 __builtin_cpu_init();
273
274 return {
275 .sse4_1 = __builtin_cpu_supports ("sse4.1"),
276 .avx = __builtin_cpu_supports ("avx"),
277 .avx2 = __builtin_cpu_supports ("avx2"),
278 .fma = __builtin_cpu_supports ("fma"),
279 .neon = false
280 };
281}
282
283#endif
284
285namespace detail
286{
287
289template <bool... settings>
290consteval size_t trueCount()
291{
292 return (size_t (settings) + ...);
293}
294
295} // namespace detail
296
297struct Config
298{
299 static const inline auto supportedCPUInstructionSets = getSupportedCPUInstructionSets();
300
301 //==============================================================================
302 // Platform config
303 //==============================================================================
304 static constexpr bool platformWindows = VCTR_WINDOWS;
305
306 static constexpr bool platformApple = VCTR_APPLE;
307
308 static constexpr bool platformLinux = VCTR_LINUX;
309
310 static constexpr bool platformWasm = VCTR_WASM;
311
312 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1, "Unsupported platform or platform detection error");
313
314 //==============================================================================
315 // CPU architecture config
316 //==============================================================================
317 static constexpr bool archARM = VCTR_ARM;
318
319 static constexpr bool archX64 = VCTR_X64;
320
321 static_assert (detail::trueCount<archARM, archX64>() == 1, "Unsupported architecture or architecture detection error");
322
323 //==============================================================================
324 // Compiler config
325 //==============================================================================
326 static constexpr bool compilerClang = VCTR_CLANG;
327
328 static constexpr bool compilerGCC = VCTR_GCC;
329
330 static constexpr bool compilerMSVC = VCTR_MSVC;
331
332 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1, "Unsupported compiler or compiler detection error");
333
334 //==============================================================================
335 // User supplied config
336 //==============================================================================
337 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
338
339 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
340
341 //==============================================================================
342 // Auto generated config
343 //==============================================================================
344 static constexpr size_t maxSIMDRegisterSize = VCTR_MAX_SIMD_REGISTER_SIZE;
345};
346
347} // namespace vctr
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: Config.h:150
Definition: Config.h:298