diff options
author | Jack Lloyd <[email protected]> | 2017-09-18 10:50:29 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-09-18 10:50:29 -0400 |
commit | 3da7f2d709a7b8045f531d1c0c4a3de8f4125eb1 (patch) | |
tree | f132e1f28f2f2e2f82da6587ac7679400d3f6940 /src/lib/utils/cpuid.cpp | |
parent | 12653256d4084b644b5ac9e534d03ad7d58632a5 (diff) | |
parent | eaa19ec15fc1eeefa2d11f28a11ae85a63c0befa (diff) |
Merge GH #1213 Improve ARM processor detection for systems without getauxval
Diffstat (limited to 'src/lib/utils/cpuid.cpp')
-rw-r--r-- | src/lib/utils/cpuid.cpp | 501 |
1 files changed, 0 insertions, 501 deletions
diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp deleted file mode 100644 index 82654b9b4..000000000 --- a/src/lib/utils/cpuid.cpp +++ /dev/null @@ -1,501 +0,0 @@ -/* -* Runtime CPU detection -* (C) 2009,2010,2013,2017 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#include <botan/cpuid.h> -#include <botan/types.h> -#include <botan/loadstor.h> -#include <botan/exceptn.h> -#include <botan/mem_ops.h> -#include <botan/parsing.h> -#include <botan/internal/os_utils.h> -#include <ostream> - -#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - -/* -* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec -*/ -#if defined(BOTAN_TARGET_OS_IS_DARWIN) - #include <sys/sysctl.h> -#elif defined(BOTAN_TARGET_OS_IS_OPENBSD) - #include <sys/param.h> - #include <sys/sysctl.h> - #include <machine/cpu.h> -#endif - -#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - -/* -* On ARM, use getauxval if available, otherwise fall back to -* running probe functions with a SIGILL handler. -*/ -#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) - #include <sys/auxv.h> -#endif - -#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - -/* -* On x86, use CPUID instruction -*/ - -#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) - #include <intrin.h> -#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) - #include <ia32intrin.h> -#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) - #include <cpuid.h> -#endif - -#endif - -namespace Botan { - -uint64_t CPUID::g_processor_features = 0; -size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; -CPUID::Endian_status CPUID::g_endian_status = ENDIAN_UNKNOWN; - -namespace { - -#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - -/* -* PowerPC specific block: check for AltiVec using either -* sysctl or by reading processor version number register. -*/ -uint64_t detect_cpu_features(size_t* cache_line_size) - { -#if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD) - // On Darwin/OS X and OpenBSD, use sysctl - - int sels[2] = { -#if defined(BOTAN_TARGET_OS_IS_OPENBSD) - CTL_MACHDEP, CPU_ALTIVEC -#else - CTL_HW, HW_VECTORUNIT -#endif - }; - - int vector_type = 0; - size_t length = sizeof(vector_type); - int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); - - if(error == 0 && vector_type > 0) - return CPUID::CPUID_ALTIVEC_BIT; - -#else - - /* - On PowerPC, MSR 287 is PVR, the Processor Version Number - Normally it is only accessible to ring 0, but Linux and NetBSD - (others, too, maybe?) will trap and emulate it for us. - */ - - int pvr = OS::run_cpu_instruction_probe([]() -> int { - uint32_t pvr = 0; - asm volatile("mfspr %0, 287" : "=r" (pvr)); - // Top 16 bits suffice to identify the model - return static_cast<int>(pvr >> 16); - }); - - if(pvr > 0) - { - const uint16_t ALTIVEC_PVR[] = { - 0x003E, // IBM POWER6, - 0x003F, // IBM POWER7, - 0x004B, // IBM POWER8, - 0x000C, // G4-7400 - 0x0039, // G5 970 - 0x003C, // G5 970FX - 0x0044, // G5 970MP - 0x0070, // Cell PPU - 0, // end - }; - - for(size_t i = 0; ALTIVEC_PVR[i]; ++i) - { - if(pvr == ALTIVEC_PVR[i]) - return CPUID::CPUID_ALTIVEC_BIT; - } - - return 0; - } - - // TODO try direct instruction probing - -#endif - - return 0; - } - -#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - -uint64_t detect_cpu_features(size_t* cache_line_size) - { - uint64_t detected_features = 0; - -#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) - /* - * On systems with getauxval these bits should normally be defined - * in bits/auxv.h but some buggy? glibc installs seem to miss them. - * These following values are all fixed, for the Linux ELF format, - * so we just hardcode them in ARM_hwcap_bit enum. - */ - - enum ARM_hwcap_bit { -#if defined(BOTAN_TARGET_ARCH_IS_ARM32) - NEON_bit = (1 << 12), - AES_bit = (1 << 0), - PMULL_bit = (1 << 1), - SHA1_bit = (1 << 2), - SHA2_bit = (1 << 3), - - ARCH_hwcap_neon = 16, // AT_HWCAP - ARCH_hwcap_crypto = 26, // AT_HWCAP2 -#elif defined(BOTAN_TARGET_ARCH_IS_ARM64) - NEON_bit = (1 << 1), - AES_bit = (1 << 3), - PMULL_bit = (1 << 4), - SHA1_bit = (1 << 5), - SHA2_bit = (1 << 6), - - ARCH_hwcap_neon = 16, // AT_HWCAP - ARCH_hwcap_crypto = 16, // AT_HWCAP -#endif - }; - - const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon); - if(hwcap_neon & ARM_hwcap_bit::NEON_bit) - detected_features |= CPUID::CPUID_ARM_NEON_BIT; - - /* - On aarch64 this ends up calling getauxval twice with AT_HWCAP - It doesn't seem worth optimizing this out, since getauxval is - just reading a field in the ELF header. - */ - const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto); - if(hwcap_crypto & ARM_hwcap_bit::AES_bit) - detected_features |= CPUID::CPUID_ARM_AES_BIT; - if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) - detected_features |= CPUID::CPUID_ARM_PMULL_BIT; - if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) - detected_features |= CPUID::CPUID_ARM_SHA1_BIT; - if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) - detected_features |= CPUID::CPUID_ARM_SHA2_BIT; - -#if defined(AT_DCACHEBSIZE) - const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); - - // plausibility check - if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) - *cache_line_size = static_cast<size_t>(dcache_line); -#endif - -#else - // No getauxval API available, fall back on probe functions - - // TODO: probe functions - -#endif - - return detected_features; - } - -#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - -uint64_t detect_cpu_features(size_t* cache_line_size) - { -#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) - #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) - #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) - -#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) - #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) - #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) - -#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM) - #define X86_CPUID(type, out) \ - asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ - : "0" (type)) - - #define X86_CPUID_SUBLEVEL(type, level, out) \ - asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ - : "0" (type), "2" (level)) - -#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) - #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) - - #define X86_CPUID_SUBLEVEL(type, level, out) \ - do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) -#else - #warning "No way of calling x86 cpuid instruction for this compiler" - #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) - #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) -#endif - - uint64_t features_detected = 0; - uint32_t cpuid[4] = { 0 }; - - // CPUID 0: vendor identification, max sublevel - X86_CPUID(0, cpuid); - - const uint32_t max_supported_sublevel = cpuid[0]; - - const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; - const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; - const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); - const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); - - if(max_supported_sublevel >= 1) - { - // CPUID 1: feature bits - X86_CPUID(1, cpuid); - const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; - - enum x86_CPUID_1_bits : uint64_t { - RDTSC = (1ULL << 4), - SSE2 = (1ULL << 26), - CLMUL = (1ULL << 33), - SSSE3 = (1ULL << 41), - SSE41 = (1ULL << 51), - SSE42 = (1ULL << 52), - AESNI = (1ULL << 57), - RDRAND = (1ULL << 62) - }; - - if(flags0 & x86_CPUID_1_bits::RDTSC) - features_detected |= CPUID::CPUID_RDTSC_BIT; - if(flags0 & x86_CPUID_1_bits::SSE2) - features_detected |= CPUID::CPUID_SSE2_BIT; - if(flags0 & x86_CPUID_1_bits::CLMUL) - features_detected |= CPUID::CPUID_CLMUL_BIT; - if(flags0 & x86_CPUID_1_bits::SSSE3) - features_detected |= CPUID::CPUID_SSSE3_BIT; - if(flags0 & x86_CPUID_1_bits::SSE41) - features_detected |= CPUID::CPUID_SSE41_BIT; - if(flags0 & x86_CPUID_1_bits::SSE42) - features_detected |= CPUID::CPUID_SSE42_BIT; - if(flags0 & x86_CPUID_1_bits::AESNI) - features_detected |= CPUID::CPUID_AESNI_BIT; - if(flags0 & x86_CPUID_1_bits::RDRAND) - features_detected |= CPUID::CPUID_RDRAND_BIT; - } - - if(is_intel) - { - // Intel cache line size is in cpuid(1) output - *cache_line_size = 8 * get_byte(2, cpuid[1]); - } - else if(is_amd) - { - // AMD puts it in vendor zone - X86_CPUID(0x80000005, cpuid); - *cache_line_size = get_byte(3, cpuid[2]); - } - - if(max_supported_sublevel >= 7) - { - clear_mem(cpuid, 4); - X86_CPUID_SUBLEVEL(7, 0, cpuid); - - enum x86_CPUID_7_bits : uint64_t { - AVX2 = (1ULL << 5), - BMI2 = (1ULL << 8), - AVX512F = (1ULL << 16), - RDSEED = (1ULL << 18), - ADX = (1ULL << 19), - SHA = (1ULL << 29), - }; - uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; - - if(flags7 & x86_CPUID_7_bits::AVX2) - features_detected |= CPUID::CPUID_AVX2_BIT; - if(flags7 & x86_CPUID_7_bits::BMI2) - features_detected |= CPUID::CPUID_BMI2_BIT; - if(flags7 & x86_CPUID_7_bits::AVX512F) - features_detected |= CPUID::CPUID_AVX512F_BIT; - if(flags7 & x86_CPUID_7_bits::RDSEED) - features_detected |= CPUID::CPUID_RDSEED_BIT; - if(flags7 & x86_CPUID_7_bits::ADX) - features_detected |= CPUID::CPUID_ADX_BIT; - if(flags7 & x86_CPUID_7_bits::SHA) - features_detected |= CPUID::CPUID_SHA_BIT; - } - -#undef X86_CPUID -#undef X86_CPUID_SUBLEVEL - - /* - * If we don't have access to CPUID, we can still safely assume that - * any x86-64 processor has SSE2 and RDTSC - */ -#if defined(BOTAN_TARGET_ARCH_IS_X86_64) - if(features_detected == 0) - { - features_detected |= CPUID::CPUID_SSE2_BIT; - features_detected |= CPUID::CPUID_RDTSC_BIT; - } -#endif - - return features_detected; - } - -#endif - -} - -bool CPUID::has_simd_32() - { -#if defined(BOTAN_TARGET_SUPPORTS_SSE2) - return CPUID::has_sse2(); -#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) - return CPUID::has_altivec(); -#elif defined(BOTAN_TARGET_SUPPORTS_NEON) - return CPUID::has_neon(); -#else - return true; -#endif - } - -//static -std::string CPUID::to_string() - { - std::vector<std::string> flags; - -#define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0) - -#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - CPUID_PRINT(sse2); - CPUID_PRINT(ssse3); - CPUID_PRINT(sse41); - CPUID_PRINT(sse42); - CPUID_PRINT(avx2); - CPUID_PRINT(avx512f); - - CPUID_PRINT(rdtsc); - CPUID_PRINT(bmi2); - CPUID_PRINT(adx); - - CPUID_PRINT(aes_ni); - CPUID_PRINT(clmul); - CPUID_PRINT(rdrand); - CPUID_PRINT(rdseed); - CPUID_PRINT(intel_sha); -#endif - -#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - CPUID_PRINT(altivec); -#endif - -#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - CPUID_PRINT(neon); - CPUID_PRINT(arm_sha1); - CPUID_PRINT(arm_sha2); - CPUID_PRINT(arm_aes); - CPUID_PRINT(arm_pmull); -#endif - -#undef CPUID_PRINT - - return string_join(flags, ' '); - } - -//static -void CPUID::print(std::ostream& o) - { - o << "CPUID flags: " << CPUID::to_string() << "\n"; - } - -//static -void CPUID::initialize() - { - g_processor_features = 0; - -#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \ - defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \ - defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - - g_processor_features = detect_cpu_features(&g_cache_line_size); - -#endif - - g_processor_features |= CPUID::CPUID_INITIALIZED_BIT; - } - -//static -CPUID::Endian_status CPUID::runtime_check_endian() - { - // Check runtime endian - const uint32_t endian32 = 0x01234567; - const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32); - - Endian_status endian = ENDIAN_UNKNOWN; - - if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67) - { - endian = ENDIAN_BIG; - } - else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01) - { - endian = ENDIAN_LITTLE; - } - else - { - throw Internal_Error("Unexpected endian at runtime, neither big nor little"); - } - - // If we were compiled with a known endian, verify it matches at runtime -#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) - BOTAN_ASSERT(endian == ENDIAN_LITTLE, "Build and runtime endian match"); -#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) - BOTAN_ASSERT(endian == ENDIAN_BIG, "Build and runtime endian match"); -#endif - - return endian; - } - -std::vector<Botan::CPUID::CPUID_bits> -CPUID::bit_from_string(const std::string& tok) - { -#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - if(tok == "sse2" || tok == "simd") - return {Botan::CPUID::CPUID_SSE2_BIT}; - if(tok == "ssse3") - return {Botan::CPUID::CPUID_SSSE3_BIT}; - if(tok == "aesni") - return {Botan::CPUID::CPUID_AESNI_BIT}; - if(tok == "clmul") - return {Botan::CPUID::CPUID_CLMUL_BIT}; - if(tok == "avx2") - return {Botan::CPUID::CPUID_AVX2_BIT}; - if(tok == "sha") - return {Botan::CPUID::CPUID_SHA_BIT}; - -#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - if(tok == "altivec" || tok == "simd") - return {Botan::CPUID::CPUID_ALTIVEC_BIT}; - -#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - if(tok == "neon" || tok == "simd") - return {Botan::CPUID::CPUID_ARM_NEON_BIT}; - if(tok == "armv8sha1") - return {Botan::CPUID::CPUID_ARM_SHA1_BIT}; - if(tok == "armv8sha2") - return {Botan::CPUID::CPUID_ARM_SHA2_BIT}; - if(tok == "armv8aes") - return {Botan::CPUID::CPUID_ARM_AES_BIT}; - if(tok == "armv8pmull") - return {Botan::CPUID::CPUID_ARM_PMULL_BIT}; - -#else - BOTAN_UNUSED(tok); -#endif - - return {}; - } - -} |