26#if defined(_WIN32) || defined(_WIN64)
28#elif defined(LINUX) || defined(__linux__)
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
32#elif defined(__wasm__)
55#if defined(__aarch64__) || defined(_M_ARM64)
58#define VCTR_MAX_SIMD_REGISTER_SIZE 16
62#define VCTR_MAX_SIMD_REGISTER_SIZE 32
70#define VCTR_COMPILER_NAME clang
71#elif defined(__GNUC__)
73#define VCTR_COMPILER_NAME GCC
74#elif defined(_MSC_VER)
76#define VCTR_COMPILER_NAME MSVC
94#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
109#if __has_include(<ipp.h>)
110#define VCTR_USE_IPP 1
112#define VCTR_USE_IPP 0
121#if __has_include(<gcem.hpp>)
122#define VCTR_USE_GCEM 1
124#define VCTR_USE_GCEM 0
137#ifndef VCTR_ALIGNED_ARRAY
138#define VCTR_ALIGNED_ARRAY 1
168class X64InstructionSets
171 struct CPUFeatureFinder
175 std::array<int, 4> cpui;
176 std::vector<std::array<int, 4>> data, extdata;
180 __cpuid (cpui.data(), 0);
183 for (
int i = 0; i <= nIds; ++i)
185 __cpuidex (cpui.data(), i, 0);
186 data.push_back (cpui);
192 f_1_ECX_ = data[1][2];
193 f_1_EDX_ = data[1][3];
199 f_7_EBX_ = data[7][1];
200 f_7_ECX_ = data[7][2];
205 __cpuid (cpui.data(), 0x80000000);
206 auto nExIds = cpui[0];
208 for (
int i = 0x80000000; i <= nExIds; ++i)
210 __cpuidex (cpui.data(), i, 0);
211 extdata.push_back (cpui);
215 if (nExIds >= 0x80000001)
217 f_81_ECX_ = extdata[1][2];
218 f_81_EDX_ = extdata[1][3];
222 std::bitset<32> f_1_ECX_;
223 std::bitset<32> f_1_EDX_;
224 std::bitset<32> f_7_EBX_;
225 std::bitset<32> f_7_ECX_;
226 std::bitset<32> f_81_ECX_;
227 std::bitset<32> f_81_EDX_;
230 inline static const CPUFeatureFinder cpuFeatures;
233 static bool hasFMA() {
return cpuFeatures.f_1_ECX_[12]; }
234 static bool hasSSE41() {
return cpuFeatures.f_1_ECX_[19]; }
235 static bool hasSSE42() {
return cpuFeatures.f_1_ECX_[20]; }
236 static bool hasAVX() {
return cpuFeatures.f_1_ECX_[28]; }
237 static bool hasAVX2() {
return cpuFeatures.f_7_EBX_[5]; }
238 static bool hasAVX512F() {
return cpuFeatures.f_7_EBX_[16]; }
244inline CPUInstructionSets getSupportedCPUInstructionSets()
247 .sse4_1 = detail::X64InstructionSets::hasSSE41(),
248 .avx = detail::X64InstructionSets::hasAVX(),
249 .avx2 = detail::X64InstructionSets::hasAVX2(),
250 .fma = detail::X64InstructionSets::hasFMA(),
257constexpr CPUInstructionSets getSupportedCPUInstructionSets()
270inline CPUInstructionSets getSupportedCPUInstructionSets()
272 __builtin_cpu_init();
275 .sse4_1 = __builtin_cpu_supports (
"sse4.1"),
276 .avx = __builtin_cpu_supports (
"avx"),
277 .avx2 = __builtin_cpu_supports (
"avx2"),
278 .fma = __builtin_cpu_supports (
"fma"),
289template <
bool... settings>
290consteval size_t trueCount()
292 return (
size_t (settings) + ...);
299 static const inline auto supportedCPUInstructionSets = getSupportedCPUInstructionSets();
304 static constexpr bool platformWindows = VCTR_WINDOWS;
306 static constexpr bool platformApple = VCTR_APPLE;
308 static constexpr bool platformLinux = VCTR_LINUX;
310 static constexpr bool platformWasm = VCTR_WASM;
312 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1,
"Unsupported platform or platform detection error");
317 static constexpr bool archARM = VCTR_ARM;
319 static constexpr bool archX64 = VCTR_X64;
321 static_assert (detail::trueCount<archARM, archX64>() == 1,
"Unsupported architecture or architecture detection error");
326 static constexpr bool compilerClang = VCTR_CLANG;
328 static constexpr bool compilerGCC = VCTR_GCC;
330 static constexpr bool compilerMSVC = VCTR_MSVC;
332 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1,
"Unsupported compiler or compiler detection error");
337 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
339 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
344 static constexpr size_t maxSIMDRegisterSize = VCTR_MAX_SIMD_REGISTER_SIZE;
The main namespace of the VCTR project.
Definition: Array.h:24