26#if defined(_WIN32) || defined(_WIN64)
28#elif defined(LINUX) || defined(__linux__)
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
32#elif defined(__wasm__)
55#if defined(__aarch64__) || defined(_M_ARM64)
58#define VCTR_MAX_SIMD_REGISTER_SIZE 16
62#define VCTR_MAX_SIMD_REGISTER_SIZE 32
70#define VCTR_COMPILER_NAME clang
71#elif defined(__GNUC__)
73#define VCTR_COMPILER_NAME GCC
74#elif defined(_MSC_VER)
76#define VCTR_COMPILER_NAME MSVC
79#define VCTR_HAS_SVML (_MSC_VER >= 1921)
95#define VCTR_HAS_SVML 0
101#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
116#if __has_include(<ipp.h>)
117#define VCTR_USE_IPP 1
119#define VCTR_USE_IPP 0
128#if __has_include(<gcem.hpp>)
129#define VCTR_USE_GCEM 1
131#define VCTR_USE_GCEM 0
144#ifndef VCTR_ALIGNED_ARRAY
145#define VCTR_ALIGNED_ARRAY 1
175class X64InstructionSets
178 struct CPUFeatureFinder
182 std::array<int, 4> cpui;
183 std::vector<std::array<int, 4>> data, extdata;
187 __cpuid (cpui.data(), 0);
190 for (
int i = 0; i <= nIds; ++i)
192 __cpuidex (cpui.data(), i, 0);
193 data.push_back (cpui);
199 f_1_ECX_ = data[1][2];
200 f_1_EDX_ = data[1][3];
206 f_7_EBX_ = data[7][1];
207 f_7_ECX_ = data[7][2];
212 __cpuid (cpui.data(), 0x80000000);
213 auto nExIds = cpui[0];
215 for (
int i = 0x80000000; i <= nExIds; ++i)
217 __cpuidex (cpui.data(), i, 0);
218 extdata.push_back (cpui);
222 if (nExIds >= 0x80000001)
224 f_81_ECX_ = extdata[1][2];
225 f_81_EDX_ = extdata[1][3];
229 std::bitset<32> f_1_ECX_;
230 std::bitset<32> f_1_EDX_;
231 std::bitset<32> f_7_EBX_;
232 std::bitset<32> f_7_ECX_;
233 std::bitset<32> f_81_ECX_;
234 std::bitset<32> f_81_EDX_;
237 inline static const CPUFeatureFinder cpuFeatures;
240 static bool hasFMA() {
return cpuFeatures.f_1_ECX_[12]; }
241 static bool hasSSE41() {
return cpuFeatures.f_1_ECX_[19]; }
242 static bool hasSSE42() {
return cpuFeatures.f_1_ECX_[20]; }
243 static bool hasAVX() {
return cpuFeatures.f_1_ECX_[28]; }
244 static bool hasAVX2() {
return cpuFeatures.f_7_EBX_[5]; }
245 static bool hasAVX512F() {
return cpuFeatures.f_7_EBX_[16]; }
251inline CPUInstructionSets getSupportedCPUInstructionSets()
254 .sse4_1 = detail::X64InstructionSets::hasSSE41(),
255 .avx = detail::X64InstructionSets::hasAVX(),
256 .avx2 = detail::X64InstructionSets::hasAVX2(),
257 .fma = detail::X64InstructionSets::hasFMA(),
264constexpr CPUInstructionSets getSupportedCPUInstructionSets()
277inline CPUInstructionSets getSupportedCPUInstructionSets()
279 __builtin_cpu_init();
282 .sse4_1 = __builtin_cpu_supports (
"sse4.1"),
283 .avx = __builtin_cpu_supports (
"avx"),
284 .avx2 = __builtin_cpu_supports (
"avx2"),
285 .fma = __builtin_cpu_supports (
"fma"),
296template <
bool... settings>
297consteval size_t trueCount()
299 return (
size_t (settings) + ...);
306 static const inline auto supportedCPUInstructionSets = getSupportedCPUInstructionSets();
311 static constexpr bool platformWindows = VCTR_WINDOWS;
313 static constexpr bool platformApple = VCTR_APPLE;
315 static constexpr bool platformLinux = VCTR_LINUX;
317 static constexpr bool platformWasm = VCTR_WASM;
319 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1,
"Unsupported platform or platform detection error");
324 static constexpr bool archARM = VCTR_ARM;
326 static constexpr bool archX64 = VCTR_X64;
328 static_assert (detail::trueCount<archARM, archX64>() == 1,
"Unsupported architecture or architecture detection error");
333 static constexpr bool compilerClang = VCTR_CLANG;
335 static constexpr bool compilerGCC = VCTR_GCC;
337 static constexpr bool compilerMSVC = VCTR_MSVC;
339 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1,
"Unsupported compiler or compiler detection error");
341 static constexpr bool hasSVML = VCTR_HAS_SVML;
346 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
348 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
353 static constexpr size_t maxSIMDRegisterSize = VCTR_MAX_SIMD_REGISTER_SIZE;
The main namespace of the VCTR project.
Definition: Array.h:24