aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils/cpuid.cpp
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-09-18 10:50:29 -0400
committerJack Lloyd <[email protected]>2017-09-18 10:50:29 -0400
commit3da7f2d709a7b8045f531d1c0c4a3de8f4125eb1 (patch)
treef132e1f28f2f2e2f82da6587ac7679400d3f6940 /src/lib/utils/cpuid.cpp
parent12653256d4084b644b5ac9e534d03ad7d58632a5 (diff)
parenteaa19ec15fc1eeefa2d11f28a11ae85a63c0befa (diff)
Merge GH #1213 Improve ARM processor detection for systems without getauxval
Diffstat (limited to 'src/lib/utils/cpuid.cpp')
-rw-r--r--src/lib/utils/cpuid.cpp501
1 files changed, 0 insertions, 501 deletions
diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp
deleted file mode 100644
index 82654b9b4..000000000
--- a/src/lib/utils/cpuid.cpp
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
-* Runtime CPU detection
-* (C) 2009,2010,2013,2017 Jack Lloyd
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#include <botan/cpuid.h>
-#include <botan/types.h>
-#include <botan/loadstor.h>
-#include <botan/exceptn.h>
-#include <botan/mem_ops.h>
-#include <botan/parsing.h>
-#include <botan/internal/os_utils.h>
-#include <ostream>
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
-
-/*
-* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec
-*/
-#if defined(BOTAN_TARGET_OS_IS_DARWIN)
- #include <sys/sysctl.h>
-#elif defined(BOTAN_TARGET_OS_IS_OPENBSD)
- #include <sys/param.h>
- #include <sys/sysctl.h>
- #include <machine/cpu.h>
-#endif
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
-
-/*
-* On ARM, use getauxval if available, otherwise fall back to
-* running probe functions with a SIGILL handler.
-*/
-#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
- #include <sys/auxv.h>
-#endif
-
-#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
-/*
-* On x86, use CPUID instruction
-*/
-
-#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
- #include <intrin.h>
-#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
- #include <ia32intrin.h>
-#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
- #include <cpuid.h>
-#endif
-
-#endif
-
-namespace Botan {
-
-uint64_t CPUID::g_processor_features = 0;
-size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
-CPUID::Endian_status CPUID::g_endian_status = ENDIAN_UNKNOWN;
-
-namespace {
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
-
-/*
-* PowerPC specific block: check for AltiVec using either
-* sysctl or by reading processor version number register.
-*/
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
-#if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD)
- // On Darwin/OS X and OpenBSD, use sysctl
-
- int sels[2] = {
-#if defined(BOTAN_TARGET_OS_IS_OPENBSD)
- CTL_MACHDEP, CPU_ALTIVEC
-#else
- CTL_HW, HW_VECTORUNIT
-#endif
- };
-
- int vector_type = 0;
- size_t length = sizeof(vector_type);
- int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
-
- if(error == 0 && vector_type > 0)
- return CPUID::CPUID_ALTIVEC_BIT;
-
-#else
-
- /*
- On PowerPC, MSR 287 is PVR, the Processor Version Number
- Normally it is only accessible to ring 0, but Linux and NetBSD
- (others, too, maybe?) will trap and emulate it for us.
- */
-
- int pvr = OS::run_cpu_instruction_probe([]() -> int {
- uint32_t pvr = 0;
- asm volatile("mfspr %0, 287" : "=r" (pvr));
- // Top 16 bits suffice to identify the model
- return static_cast<int>(pvr >> 16);
- });
-
- if(pvr > 0)
- {
- const uint16_t ALTIVEC_PVR[] = {
- 0x003E, // IBM POWER6,
- 0x003F, // IBM POWER7,
- 0x004B, // IBM POWER8,
- 0x000C, // G4-7400
- 0x0039, // G5 970
- 0x003C, // G5 970FX
- 0x0044, // G5 970MP
- 0x0070, // Cell PPU
- 0, // end
- };
-
- for(size_t i = 0; ALTIVEC_PVR[i]; ++i)
- {
- if(pvr == ALTIVEC_PVR[i])
- return CPUID::CPUID_ALTIVEC_BIT;
- }
-
- return 0;
- }
-
- // TODO try direct instruction probing
-
-#endif
-
- return 0;
- }
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
-
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
- uint64_t detected_features = 0;
-
-#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
- /*
- * On systems with getauxval these bits should normally be defined
- * in bits/auxv.h but some buggy? glibc installs seem to miss them.
- * These following values are all fixed, for the Linux ELF format,
- * so we just hardcode them in ARM_hwcap_bit enum.
- */
-
- enum ARM_hwcap_bit {
-#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
- NEON_bit = (1 << 12),
- AES_bit = (1 << 0),
- PMULL_bit = (1 << 1),
- SHA1_bit = (1 << 2),
- SHA2_bit = (1 << 3),
-
- ARCH_hwcap_neon = 16, // AT_HWCAP
- ARCH_hwcap_crypto = 26, // AT_HWCAP2
-#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
- NEON_bit = (1 << 1),
- AES_bit = (1 << 3),
- PMULL_bit = (1 << 4),
- SHA1_bit = (1 << 5),
- SHA2_bit = (1 << 6),
-
- ARCH_hwcap_neon = 16, // AT_HWCAP
- ARCH_hwcap_crypto = 16, // AT_HWCAP
-#endif
- };
-
- const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
- if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
- detected_features |= CPUID::CPUID_ARM_NEON_BIT;
-
- /*
- On aarch64 this ends up calling getauxval twice with AT_HWCAP
- It doesn't seem worth optimizing this out, since getauxval is
- just reading a field in the ELF header.
- */
- const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
- if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
- detected_features |= CPUID::CPUID_ARM_AES_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
- detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
- detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
- detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
-
-#if defined(AT_DCACHEBSIZE)
- const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
-
- // plausibility check
- if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
- *cache_line_size = static_cast<size_t>(dcache_line);
-#endif
-
-#else
- // No getauxval API available, fall back on probe functions
-
- // TODO: probe functions
-
-#endif
-
- return detected_features;
- }
-
-#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
-#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
- #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
-
-#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
- #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
-
-#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
- #define X86_CPUID(type, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type))
-
- #define X86_CPUID_SUBLEVEL(type, level, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type), "2" (level))
-
-#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
- #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
-
- #define X86_CPUID_SUBLEVEL(type, level, out) \
- do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
-#else
- #warning "No way of calling x86 cpuid instruction for this compiler"
- #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
-#endif
-
- uint64_t features_detected = 0;
- uint32_t cpuid[4] = { 0 };
-
- // CPUID 0: vendor identification, max sublevel
- X86_CPUID(0, cpuid);
-
- const uint32_t max_supported_sublevel = cpuid[0];
-
- const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
- const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
- const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
- const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
-
- if(max_supported_sublevel >= 1)
- {
- // CPUID 1: feature bits
- X86_CPUID(1, cpuid);
- const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
-
- enum x86_CPUID_1_bits : uint64_t {
- RDTSC = (1ULL << 4),
- SSE2 = (1ULL << 26),
- CLMUL = (1ULL << 33),
- SSSE3 = (1ULL << 41),
- SSE41 = (1ULL << 51),
- SSE42 = (1ULL << 52),
- AESNI = (1ULL << 57),
- RDRAND = (1ULL << 62)
- };
-
- if(flags0 & x86_CPUID_1_bits::RDTSC)
- features_detected |= CPUID::CPUID_RDTSC_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE2)
- features_detected |= CPUID::CPUID_SSE2_BIT;
- if(flags0 & x86_CPUID_1_bits::CLMUL)
- features_detected |= CPUID::CPUID_CLMUL_BIT;
- if(flags0 & x86_CPUID_1_bits::SSSE3)
- features_detected |= CPUID::CPUID_SSSE3_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE41)
- features_detected |= CPUID::CPUID_SSE41_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE42)
- features_detected |= CPUID::CPUID_SSE42_BIT;
- if(flags0 & x86_CPUID_1_bits::AESNI)
- features_detected |= CPUID::CPUID_AESNI_BIT;
- if(flags0 & x86_CPUID_1_bits::RDRAND)
- features_detected |= CPUID::CPUID_RDRAND_BIT;
- }
-
- if(is_intel)
- {
- // Intel cache line size is in cpuid(1) output
- *cache_line_size = 8 * get_byte(2, cpuid[1]);
- }
- else if(is_amd)
- {
- // AMD puts it in vendor zone
- X86_CPUID(0x80000005, cpuid);
- *cache_line_size = get_byte(3, cpuid[2]);
- }
-
- if(max_supported_sublevel >= 7)
- {
- clear_mem(cpuid, 4);
- X86_CPUID_SUBLEVEL(7, 0, cpuid);
-
- enum x86_CPUID_7_bits : uint64_t {
- AVX2 = (1ULL << 5),
- BMI2 = (1ULL << 8),
- AVX512F = (1ULL << 16),
- RDSEED = (1ULL << 18),
- ADX = (1ULL << 19),
- SHA = (1ULL << 29),
- };
- uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
-
- if(flags7 & x86_CPUID_7_bits::AVX2)
- features_detected |= CPUID::CPUID_AVX2_BIT;
- if(flags7 & x86_CPUID_7_bits::BMI2)
- features_detected |= CPUID::CPUID_BMI2_BIT;
- if(flags7 & x86_CPUID_7_bits::AVX512F)
- features_detected |= CPUID::CPUID_AVX512F_BIT;
- if(flags7 & x86_CPUID_7_bits::RDSEED)
- features_detected |= CPUID::CPUID_RDSEED_BIT;
- if(flags7 & x86_CPUID_7_bits::ADX)
- features_detected |= CPUID::CPUID_ADX_BIT;
- if(flags7 & x86_CPUID_7_bits::SHA)
- features_detected |= CPUID::CPUID_SHA_BIT;
- }
-
-#undef X86_CPUID
-#undef X86_CPUID_SUBLEVEL
-
- /*
- * If we don't have access to CPUID, we can still safely assume that
- * any x86-64 processor has SSE2 and RDTSC
- */
-#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
- if(features_detected == 0)
- {
- features_detected |= CPUID::CPUID_SSE2_BIT;
- features_detected |= CPUID::CPUID_RDTSC_BIT;
- }
-#endif
-
- return features_detected;
- }
-
-#endif
-
-}
-
-bool CPUID::has_simd_32()
- {
-#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
- return CPUID::has_sse2();
-#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
- return CPUID::has_altivec();
-#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
- return CPUID::has_neon();
-#else
- return true;
-#endif
- }
-
-//static
-std::string CPUID::to_string()
- {
- std::vector<std::string> flags;
-
-#define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0)
-
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- CPUID_PRINT(sse2);
- CPUID_PRINT(ssse3);
- CPUID_PRINT(sse41);
- CPUID_PRINT(sse42);
- CPUID_PRINT(avx2);
- CPUID_PRINT(avx512f);
-
- CPUID_PRINT(rdtsc);
- CPUID_PRINT(bmi2);
- CPUID_PRINT(adx);
-
- CPUID_PRINT(aes_ni);
- CPUID_PRINT(clmul);
- CPUID_PRINT(rdrand);
- CPUID_PRINT(rdseed);
- CPUID_PRINT(intel_sha);
-#endif
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- CPUID_PRINT(altivec);
-#endif
-
-#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- CPUID_PRINT(neon);
- CPUID_PRINT(arm_sha1);
- CPUID_PRINT(arm_sha2);
- CPUID_PRINT(arm_aes);
- CPUID_PRINT(arm_pmull);
-#endif
-
-#undef CPUID_PRINT
-
- return string_join(flags, ' ');
- }
-
-//static
-void CPUID::print(std::ostream& o)
- {
- o << "CPUID flags: " << CPUID::to_string() << "\n";
- }
-
-//static
-void CPUID::initialize()
- {
- g_processor_features = 0;
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \
- defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \
- defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
- g_processor_features = detect_cpu_features(&g_cache_line_size);
-
-#endif
-
- g_processor_features |= CPUID::CPUID_INITIALIZED_BIT;
- }
-
-//static
-CPUID::Endian_status CPUID::runtime_check_endian()
- {
- // Check runtime endian
- const uint32_t endian32 = 0x01234567;
- const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32);
-
- Endian_status endian = ENDIAN_UNKNOWN;
-
- if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67)
- {
- endian = ENDIAN_BIG;
- }
- else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01)
- {
- endian = ENDIAN_LITTLE;
- }
- else
- {
- throw Internal_Error("Unexpected endian at runtime, neither big nor little");
- }
-
- // If we were compiled with a known endian, verify it matches at runtime
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- BOTAN_ASSERT(endian == ENDIAN_LITTLE, "Build and runtime endian match");
-#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- BOTAN_ASSERT(endian == ENDIAN_BIG, "Build and runtime endian match");
-#endif
-
- return endian;
- }
-
-std::vector<Botan::CPUID::CPUID_bits>
-CPUID::bit_from_string(const std::string& tok)
- {
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- if(tok == "sse2" || tok == "simd")
- return {Botan::CPUID::CPUID_SSE2_BIT};
- if(tok == "ssse3")
- return {Botan::CPUID::CPUID_SSSE3_BIT};
- if(tok == "aesni")
- return {Botan::CPUID::CPUID_AESNI_BIT};
- if(tok == "clmul")
- return {Botan::CPUID::CPUID_CLMUL_BIT};
- if(tok == "avx2")
- return {Botan::CPUID::CPUID_AVX2_BIT};
- if(tok == "sha")
- return {Botan::CPUID::CPUID_SHA_BIT};
-
-#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- if(tok == "altivec" || tok == "simd")
- return {Botan::CPUID::CPUID_ALTIVEC_BIT};
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- if(tok == "neon" || tok == "simd")
- return {Botan::CPUID::CPUID_ARM_NEON_BIT};
- if(tok == "armv8sha1")
- return {Botan::CPUID::CPUID_ARM_SHA1_BIT};
- if(tok == "armv8sha2")
- return {Botan::CPUID::CPUID_ARM_SHA2_BIT};
- if(tok == "armv8aes")
- return {Botan::CPUID::CPUID_ARM_AES_BIT};
- if(tok == "armv8pmull")
- return {Botan::CPUID::CPUID_ARM_PMULL_BIT};
-
-#else
- BOTAN_UNUSED(tok);
-#endif
-
- return {};
- }
-
-}