VCTR
Loading...
Searching...
No Matches
Config.h
1/*
2 ==============================================================================
3 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5 Copyright 2022- by sonible GmbH.
6
7 This file is part of VCTR - Versatile Container Templates Reconceptualized.
8
9 VCTR is free software: you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License version 3
11 only, as published by the Free Software Foundation.
12
13 VCTR is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Lesser General Public License version 3 for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 version 3 along with VCTR. If not, see <https://www.gnu.org/licenses/>.
20 ==============================================================================
21*/
22
23//==============================================================================
24// Platform defines
25//==============================================================================
26#if defined(_WIN32) || defined(_WIN64)
27#define VCTR_WINDOWS 1
28#elif defined(LINUX) || defined(__linux__)
29#define VCTR_LINUX 1
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
31#define VCTR_APPLE 1
32#elif defined(__wasm__)
33#define VCTR_WASM 1
34#endif
35
36#ifndef VCTR_WINDOWS
37#define VCTR_WINDOWS 0
38#endif
39
40#ifndef VCTR_LINUX
41#define VCTR_LINUX 0
42#endif
43
44#ifndef VCTR_APPLE
45#define VCTR_APPLE 0
46#endif
47
48#ifndef VCTR_WASM
49#define VCTR_WASM 0
50#endif
51
52//==============================================================================
53// CPU architecture defines
54//==============================================================================
55#if defined(__aarch64__) || defined(_M_ARM64)
56#define VCTR_ARM 1
57#define VCTR_X64 0
58#else
59#define VCTR_ARM 0
60#define VCTR_X64 1
61#endif
62
63//==============================================================================
64// Compiler defines
65//==============================================================================
66#if defined(__clang__)
67#define VCTR_CLANG 1
68#define VCTR_COMPILER_NAME clang
69#elif defined(__GNUC__)
70#define VCTR_GCC 1
71#define VCTR_COMPILER_NAME GCC
72#elif defined(_MSC_VER)
73#define VCTR_MSVC 1
74#define VCTR_COMPILER_NAME MSVC
75#endif
76
77#ifndef VCTR_CLANG
78#define VCTR_CLANG 0
79#endif
80
81#ifndef VCTR_GCC
82#define VCTR_GCC 0
83#endif
84
85#ifndef VCTR_MSVC
86#define VCTR_MSVC 0
87#endif
88
89//==============================================================================
90// Build type defines
91//==============================================================================
92#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
93#define VCTR_DEBUG 1
94#else
95#define VCTR_DEBUG 0
96#endif
97
98//==============================================================================
99// User supplied defines
100//==============================================================================
101
106#ifndef VCTR_USE_IPP
107#if __has_include(<ipp.h>)
108#define VCTR_USE_IPP 1
109#else
110#define VCTR_USE_IPP 0
111#endif
112#endif
113
118#ifndef VCTR_USE_GCEM
119#if __has_include(<gcem.hpp>)
120#define VCTR_USE_GCEM 1
121#else
122#define VCTR_USE_GCEM 0
123#endif
124#endif
125
135#ifndef VCTR_ALIGNED_ARRAY
136#define VCTR_ALIGNED_ARRAY 1
137#endif
138
139namespace vctr
140{
141
142enum class CPUInstructionSet
143{
144 sse4_1,
145 avx,
146 avx2,
147
148 neon,
149
150 fallback
151};
152
153#if VCTR_WINDOWS
154
155#include <intrin.h>
156
157namespace detail
158{
160class X64InstructionSets
161{
162private:
163 struct CPUFeatureFinder
164 {
165 CPUFeatureFinder()
166 {
167 std::array<int, 4> cpui;
168 std::vector<std::array<int, 4>> data, extdata;
169
170 // Calling __cpuid with 0x0 as the function_id argument
171 // gets the number of the highest valid function ID.
172 __cpuid (cpui.data(), 0);
173 auto nIds = cpui[0];
174
175 for (int i = 0; i <= nIds; ++i)
176 {
177 __cpuidex (cpui.data(), i, 0);
178 data.push_back (cpui);
179 }
180
181 // load bitset with flags for function 0x00000001
182 if (nIds >= 1)
183 {
184 f_1_ECX_ = data[1][2];
185 f_1_EDX_ = data[1][3];
186 }
187
188 // load bitset with flags for function 0x00000007
189 if (nIds >= 7)
190 {
191 f_7_EBX_ = data[7][1];
192 f_7_ECX_ = data[7][2];
193 }
194
195 // Calling __cpuid with 0x80000000 as the function_id argument
196 // gets the number of the highest valid extended ID.
197 __cpuid (cpui.data(), 0x80000000);
198 auto nExIds = cpui[0];
199
200 for (int i = 0x80000000; i <= nExIds; ++i)
201 {
202 __cpuidex (cpui.data(), i, 0);
203 extdata.push_back (cpui);
204 }
205
206 // load bitset with flags for function 0x80000001
207 if (nExIds >= 0x80000001)
208 {
209 f_81_ECX_ = extdata[1][2];
210 f_81_EDX_ = extdata[1][3];
211 }
212 };
213
214 std::bitset<32> f_1_ECX_;
215 std::bitset<32> f_1_EDX_;
216 std::bitset<32> f_7_EBX_;
217 std::bitset<32> f_7_ECX_;
218 std::bitset<32> f_81_ECX_;
219 std::bitset<32> f_81_EDX_;
220 };
221
222 inline static const CPUFeatureFinder cpuFeatures;
223
224public:
225 static bool hasFMA() { return cpuFeatures.f_1_ECX_[12]; }
226 static bool hasSSE41() { return cpuFeatures.f_1_ECX_[19]; }
227 static bool hasSSE42() { return cpuFeatures.f_1_ECX_[20]; }
228 static bool hasAVX() { return cpuFeatures.f_1_ECX_[28]; }
229 static bool hasAVX2() { return cpuFeatures.f_7_EBX_[5]; }
230 static bool hasAVX512F() { return cpuFeatures.f_7_EBX_[16]; }
231
232private:
233};
234} // namespace detail
235
236inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
237{
238 if (detail::X64InstructionSets::hasAVX2())
239 return CPUInstructionSet::avx2;
240
241 if (detail::X64InstructionSets::hasAVX())
242 return CPUInstructionSet::avx;
243
244 if (detail::X64InstructionSets::hasSSE41())
245 return CPUInstructionSet::sse4_1;
246
247 return CPUInstructionSet::fallback;
248}
249
250#elif VCTR_ARM
251
252inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
253{
254 return CPUInstructionSet::neon;
255}
256
257#else
258
259inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
260{
261 __builtin_cpu_init();
262
263 if (__builtin_cpu_supports ("avx2"))
264 return CPUInstructionSet::avx2;
265
266 if (__builtin_cpu_supports ("avx"))
267 return CPUInstructionSet::avx;
268
269 if (__builtin_cpu_supports ("sse4.1"))
270 return CPUInstructionSet::sse4_1;
271
272 return CPUInstructionSet::fallback;
273}
274
275#endif
276
277namespace detail
278{
279
281template <bool... settings>
282consteval size_t trueCount()
283{
284 return (size_t (settings) + ...);
285}
286
287} // namespace detail
288
289struct Config
290{
291 static const inline auto highestSupportedCPUInstructionSet = getHighestSupportedCPUInstructionSet();
292
293 static const inline auto supportsAVX2 = highestSupportedCPUInstructionSet == CPUInstructionSet::avx2;
294
295 static const inline auto supportsAVX = highestSupportedCPUInstructionSet == CPUInstructionSet::avx2 || highestSupportedCPUInstructionSet == CPUInstructionSet::avx;
296
297 //==============================================================================
298 // Platform config
299 //==============================================================================
300 static constexpr bool platformWindows = VCTR_WINDOWS;
301
302 static constexpr bool platformApple = VCTR_APPLE;
303
304 static constexpr bool platformLinux = VCTR_LINUX;
305
306 static constexpr bool platformWasm = VCTR_WASM;
307
308 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1, "Unsupported platform or platform detection error");
309
310 //==============================================================================
311 // CPU architecture config
312 //==============================================================================
313 static constexpr bool archARM = VCTR_ARM;
314
315 static constexpr bool archX64 = VCTR_X64;
316
317 static_assert (detail::trueCount<archARM, archX64>() == 1, "Unsupported architecture or architecture detection error");
318
319 //==============================================================================
320 // Compiler config
321 //==============================================================================
322 static constexpr bool compilerClang = VCTR_CLANG;
323
324 static constexpr bool compilerGCC = VCTR_GCC;
325
326 static constexpr bool compilerMSVC = VCTR_MSVC;
327
328 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1, "Unsupported compiler or compiler detection error");
329
330 //==============================================================================
331 // User supplied config
332 //==============================================================================
333 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
334
335 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
336
337 //==============================================================================
338 // Auto generated config
339 //==============================================================================
340 static constexpr size_t maxSIMDRegisterSize = archX64 ? 32 : 16;
341};
342
343} // namespace vctr
The main namespace of the VCTR project.
Definition: Array.h:24
Definition: Config.h:290