26#if defined(_WIN32) || defined(_WIN64)
28#elif defined(LINUX) || defined(__linux__)
30#elif defined(__APPLE_CPP__) || defined(__APPLE_CC__)
32#elif defined(__wasm__)
55#if defined(__aarch64__) || defined(_M_ARM64)
68#define VCTR_COMPILER_NAME clang
69#elif defined(__GNUC__)
71#define VCTR_COMPILER_NAME GCC
72#elif defined(_MSC_VER)
74#define VCTR_COMPILER_NAME MSVC
92#if defined(DEBUG) || defined(_DEBUG) || ! (defined(NDEBUG) || defined(_NDEBUG))
107#if __has_include(<ipp.h>)
108#define VCTR_USE_IPP 1
110#define VCTR_USE_IPP 0
119#if __has_include(<gcem.hpp>)
120#define VCTR_USE_GCEM 1
122#define VCTR_USE_GCEM 0
135#ifndef VCTR_ALIGNED_ARRAY
136#define VCTR_ALIGNED_ARRAY 1
142enum class CPUInstructionSet
160class X64InstructionSets
163 struct CPUFeatureFinder
167 std::array<int, 4> cpui;
168 std::vector<std::array<int, 4>> data, extdata;
172 __cpuid (cpui.data(), 0);
175 for (
int i = 0; i <= nIds; ++i)
177 __cpuidex (cpui.data(), i, 0);
178 data.push_back (cpui);
184 f_1_ECX_ = data[1][2];
185 f_1_EDX_ = data[1][3];
191 f_7_EBX_ = data[7][1];
192 f_7_ECX_ = data[7][2];
197 __cpuid (cpui.data(), 0x80000000);
198 auto nExIds = cpui[0];
200 for (
int i = 0x80000000; i <= nExIds; ++i)
202 __cpuidex (cpui.data(), i, 0);
203 extdata.push_back (cpui);
207 if (nExIds >= 0x80000001)
209 f_81_ECX_ = extdata[1][2];
210 f_81_EDX_ = extdata[1][3];
214 std::bitset<32> f_1_ECX_;
215 std::bitset<32> f_1_EDX_;
216 std::bitset<32> f_7_EBX_;
217 std::bitset<32> f_7_ECX_;
218 std::bitset<32> f_81_ECX_;
219 std::bitset<32> f_81_EDX_;
222 inline static const CPUFeatureFinder cpuFeatures;
225 static bool hasFMA() {
return cpuFeatures.f_1_ECX_[12]; }
226 static bool hasSSE41() {
return cpuFeatures.f_1_ECX_[19]; }
227 static bool hasSSE42() {
return cpuFeatures.f_1_ECX_[20]; }
228 static bool hasAVX() {
return cpuFeatures.f_1_ECX_[28]; }
229 static bool hasAVX2() {
return cpuFeatures.f_7_EBX_[5]; }
230 static bool hasAVX512F() {
return cpuFeatures.f_7_EBX_[16]; }
236inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
238 if (detail::X64InstructionSets::hasAVX2())
239 return CPUInstructionSet::avx2;
241 if (detail::X64InstructionSets::hasAVX())
242 return CPUInstructionSet::avx;
244 if (detail::X64InstructionSets::hasSSE41())
245 return CPUInstructionSet::sse4_1;
247 return CPUInstructionSet::fallback;
252inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
254 return CPUInstructionSet::neon;
259inline CPUInstructionSet getHighestSupportedCPUInstructionSet()
261 __builtin_cpu_init();
263 if (__builtin_cpu_supports (
"avx2"))
264 return CPUInstructionSet::avx2;
266 if (__builtin_cpu_supports (
"avx"))
267 return CPUInstructionSet::avx;
269 if (__builtin_cpu_supports (
"sse4.1"))
270 return CPUInstructionSet::sse4_1;
272 return CPUInstructionSet::fallback;
281template <
bool... settings>
282consteval size_t trueCount()
284 return (
size_t (settings) + ...);
291 static const inline auto highestSupportedCPUInstructionSet = getHighestSupportedCPUInstructionSet();
293 static const inline auto supportsAVX2 = highestSupportedCPUInstructionSet == CPUInstructionSet::avx2;
295 static const inline auto supportsAVX = highestSupportedCPUInstructionSet == CPUInstructionSet::avx2 || highestSupportedCPUInstructionSet == CPUInstructionSet::avx;
300 static constexpr bool platformWindows = VCTR_WINDOWS;
302 static constexpr bool platformApple = VCTR_APPLE;
304 static constexpr bool platformLinux = VCTR_LINUX;
306 static constexpr bool platformWasm = VCTR_WASM;
308 static_assert (detail::trueCount<platformWindows, platformApple, platformLinux, platformWasm>() == 1,
"Unsupported platform or platform detection error");
313 static constexpr bool archARM = VCTR_ARM;
315 static constexpr bool archX64 = VCTR_X64;
317 static_assert (detail::trueCount<archARM, archX64>() == 1,
"Unsupported architecture or architecture detection error");
322 static constexpr bool compilerClang = VCTR_CLANG;
324 static constexpr bool compilerGCC = VCTR_GCC;
326 static constexpr bool compilerMSVC = VCTR_MSVC;
328 static_assert (detail::trueCount<compilerClang, compilerGCC, compilerMSVC>() == 1,
"Unsupported compiler or compiler detection error");
333 static constexpr bool hasIPP = VCTR_USE_IPP && archX64;
335 static constexpr bool alignedArray = VCTR_ALIGNED_ARRAY;
340 static constexpr size_t maxSIMDRegisterSize = archX64 ? 32 : 16;
The main namespace of the VCTR project.
Definition: Array.h:24