diff options
author | Jack Lloyd <[email protected]> | 2017-01-18 23:53:21 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-01-18 23:53:21 -0500 |
commit | 59fe7e7a9559eb3624d8ef310325bbb917f89033 (patch) | |
tree | 138fa33873d84c41725d1cd15f43f81d6b975258 | |
parent | 4242fabe65b5710ace9799f692b462aa8b6ee37a (diff) | |
parent | 7880efc3c8d714088c82f053b8123b81ad569737 (diff) |
Merge GH #843 Refactor CPUID impl, add ARM feature detection
-rw-r--r-- | src/build-data/os/android.txt | 1 | ||||
-rw-r--r-- | src/build-data/os/linux.txt | 1 | ||||
-rw-r--r-- | src/lib/utils/cpuid.cpp | 374 | ||||
-rw-r--r-- | src/lib/utils/cpuid.h | 125 | ||||
-rw-r--r-- | src/lib/utils/os_utils.cpp | 70 | ||||
-rw-r--r-- | src/lib/utils/os_utils.h | 51 |
6 files changed, 463 insertions, 159 deletions
diff --git a/src/build-data/os/android.txt b/src/build-data/os/android.txt index 50d97fd8f..db06bab1d 100644 --- a/src/build-data/os/android.txt +++ b/src/build-data/os/android.txt @@ -7,6 +7,7 @@ clock_gettime gettimeofday posix_mlock gmtime_r +getauxval dlopen readdir threads diff --git a/src/build-data/os/linux.txt b/src/build-data/os/linux.txt index 6bd81f7f2..59f995fc2 100644 --- a/src/build-data/os/linux.txt +++ b/src/build-data/os/linux.txt @@ -8,6 +8,7 @@ gettimeofday posix_mlock gmtime_r dlopen +getauxval readdir timegm sockets diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp index 428ca2715..325421d71 100644 --- a/src/lib/utils/cpuid.cpp +++ b/src/lib/utils/cpuid.cpp @@ -1,6 +1,6 @@ /* * Runtime CPU detection -* (C) 2009-2010,2013 Jack Lloyd +* (C) 2009,2010,2013,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -14,59 +14,66 @@ #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) +/* +* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec +*/ #if defined(BOTAN_TARGET_OS_IS_DARWIN) #include <sys/sysctl.h> -#endif - -#if defined(BOTAN_TARGET_OS_IS_OPENBSD) +#elif defined(BOTAN_TARGET_OS_IS_OPENBSD) #include <sys/param.h> #include <sys/sysctl.h> #include <machine/cpu.h> #endif +#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + +/* +* On ARM, use getauxval if available, otherwise fall back to +* running probe functions with a SIGILL handler. +*/ +#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) + #include <sys/auxv.h> #endif -#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) +#include <botan/internal/os_utils.h> -#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) +#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) -#include <intrin.h> +#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) -#define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) -#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) + #include <intrin.h> + #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) + #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) -#include <ia32intrin.h> - -#define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) -#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) + #include <ia32intrin.h> + #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) + #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM) -#define X86_CPUID(type, out) \ - asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ - : "0" (type)) + #define X86_CPUID(type, out) \ + asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ + : "0" (type)) -#define X86_CPUID_SUBLEVEL(type, level, out) \ - asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ - : "0" (type), "2" (level)) + #define X86_CPUID_SUBLEVEL(type, level, out) \ + asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ + : "0" (type), "2" (level)) #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) -#include <cpuid.h> + #include <cpuid.h> -#define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) + #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) -#define X86_CPUID_SUBLEVEL(type, level, out) \ - do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) + #define X86_CPUID_SUBLEVEL(type, level, out) \ + do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) #else - -#warning "No way of calling cpuid for this compiler" - -#define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) -#define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) + #warning "No way of calling cpuid for this compiler" + #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) + #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) #endif @@ -74,18 +81,22 @@ namespace Botan { -uint64_t CPUID::g_processor_flags[2] = { 0, 0 }; +uint64_t CPUID::g_processor_features = 0; size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; -bool CPUID::g_initialized = false; bool CPUID::g_little_endian = false; namespace { #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) -bool altivec_check_sysctl() +/* +* PowerPC specific block: check for AltiVec using either +* sysctl or by reading processor version number register. +*/ +uint64_t powerpc_detect_cpu_featutures() { #if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD) + // On Darwin/OS X and OpenBSD, use sysctl #if defined(BOTAN_TARGET_OS_IS_OPENBSD) int sels[2] = { CTL_MACHDEP, CPU_ALTIVEC }; @@ -98,18 +109,9 @@ bool altivec_check_sysctl() int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); if(error == 0 && vector_type > 0) - return true; -#endif - - return false; - } - -bool altivec_check_pvr_emul() - { - bool altivec_capable = false; - -#if defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD) + return (1ULL << CPUID::CPUID_ALTIVEC_BIT); +#elif defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD) /* On PowerPC, MSR 287 is PVR, the Processor Version Number Normally it is only accessible to ring 0, but Linux and NetBSD @@ -119,6 +121,14 @@ bool altivec_check_pvr_emul() PearPC and Linux sources, mostly. */ + uint32_t pvr = 0; + + // TODO: we could run inside SIGILL handler block + asm volatile("mfspr %0, 287" : "=r" (pvr)); + + // Top 16 bit suffice to identify model + pvr >>= 16; + const uint16_t PVR_G4_7400 = 0x000C; const uint16_t PVR_G5_970 = 0x0039; const uint16_t PVR_G5_970FX = 0x003C; @@ -129,29 +139,197 @@ bool altivec_check_pvr_emul() const uint16_t PVR_POWER8 = 0x004B; const uint16_t PVR_CELL_PPU = 0x0070; - // Motorola produced G4s with PVR 0x800[0123C] (at least) - const uint16_t PVR_G4_74xx_24 = 0x800; + if(pvr == PVR_G4_7400 || + pvr == PVR_G5_970 || pvr == PVR_G5_970FX || + pvr == PVR_G5_970MP || pvr == PVR_G5_970GX || + pvr == PVR_POWER6 || pvr == PVR_POWER7 || pvr == PVR_POWER8 || + pvr == PVR_CELL_PPU) + { + return (1ULL << CPUID::CPUID_ALTIVEC_BIT); + } +#else + #warning "No PowerPC feature detection available for this platform" +#endif - uint32_t pvr = 0; + return 0; + } - asm volatile("mfspr %0, 287" : "=r" (pvr)); +#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - // Top 16 bit suffice to identify model - pvr >>= 16; +uint64_t arm_detect_cpu_features(size_t* cache_line_size) + { + uint64_t detected_features = 0; + *cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; + +#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) + errno = 0; + + /* + * On systems with getauxval these bits should normally be defined + * in bits/auxv.h but some buggy? glibc installs seem to miss them. + * These following values are all fixed, for the Linux ELF format, + * so we just hardcode them in ARM_hwcap_bit enum. + */ + + enum ARM_hwcap_bit { +#if defined(BOTAN_TARGET_ARCH_IS_ARM32) + NEON_bit = (1 << 12), + AES_bit = (1 << 0), + PMULL_bit = (1 << 1), + SHA1_bit = (1 << 2), + SHA2_bit = (1 << 3), + + ARCH_hwcap_neon = 16, // AT_HWCAP + ARCH_hwcap_crypto = 26, // AT_HWCAP2 +#elif defined(BOTAN_TARGET_ARCH_IS_ARM64) + NEON_bit = (1 << 1), + AES_bit = (1 << 3), + PMULL_bit = (1 << 4), + SHA1_bit = (1 << 5), + SHA2_bit = (1 << 6), + + ARCH_hwcap_neon = 16, // AT_HWCAP + ARCH_hwcap_crypto = 16, // AT_HWCAP +#endif + }; + + const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon); + if(hwcap_neon & ARM_hwcap_bit::NEON_bit) + detected_features |= CPUID::CPUID_ARM_NEON_BIT; + + /* + On aarch64 this ends up calling getauxval twice with AT_HWCAP + This doesn't seem worth optimizing this out, since getauxval is + just reading a field in the ELF header. + */ + const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto); + if(hwcap_crypto & ARM_hwcap_bit::AES_bit) + detected_features |= CPUID::CPUID_ARM_AES_BIT; + if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) + detected_features |= CPUID::CPUID_ARM_PMULL_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) + detected_features |= CPUID::CPUID_ARM_SHA1_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) + detected_features |= CPUID::CPUID_ARM_SHA2_BIT; + + const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); + + // plausibility check + if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) + *cache_line_size = static_cast<size_t>(dcache_line); +#endif + + // TODO: probe functions + + return detected_features; + } + +#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) + +uint64_t x86_detect_cpu_features(size_t* cache_line_size) + { + uint64_t features_detected = 0; + uint32_t cpuid[4] = { 0 }; + + // CPUID 0: vendor identification, max sublevel + X86_CPUID(0, cpuid); + + const uint32_t max_supported_sublevel = cpuid[0]; + + const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; + const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; + const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); + const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); + + if(max_supported_sublevel >= 1) + { + // CPUID 1: feature bits + X86_CPUID(1, cpuid); + const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; + + enum x86_CPUID_1_bits : uint64_t { + RDTSC = (1ULL << 4), + SSE2 = (1ULL << 26), + CLMUL = (1ULL << 33), + SSSE3 = (1ULL << 41), + SSE41 = (1ULL << 51), + SSE42 = (1ULL << 52), + AESNI = (1ULL << 57), + RDRAND = (1ULL << 62) + }; + + if(flags0 & x86_CPUID_1_bits::RDTSC) + features_detected |= CPUID::CPUID_RDTSC_BIT; + if(flags0 & x86_CPUID_1_bits::SSE2) + features_detected |= CPUID::CPUID_SSE2_BIT; + if(flags0 & x86_CPUID_1_bits::CLMUL) + features_detected |= CPUID::CPUID_CLMUL_BIT; + if(flags0 & x86_CPUID_1_bits::SSSE3) + features_detected |= CPUID::CPUID_SSSE3_BIT; + if(flags0 & x86_CPUID_1_bits::SSE41) + features_detected |= CPUID::CPUID_SSE41_BIT; + if(flags0 & x86_CPUID_1_bits::SSE42) + features_detected |= CPUID::CPUID_SSE42_BIT; + if(flags0 & x86_CPUID_1_bits::AESNI) + features_detected |= CPUID::CPUID_AESNI_BIT; + if(flags0 & x86_CPUID_1_bits::RDRAND) + features_detected |= CPUID::CPUID_RDRAND_BIT; + } + + if(is_intel) + { + // Intel cache line size is in cpuid(1) output + *cache_line_size = 8 * get_byte(2, cpuid[1]); + } + else if(is_amd) + { + // AMD puts it in vendor zone + X86_CPUID(0x80000005, cpuid); + *cache_line_size = get_byte(3, cpuid[2]); + } - altivec_capable |= (pvr == PVR_G4_7400); - altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24); - altivec_capable |= (pvr == PVR_G5_970); - altivec_capable |= (pvr == PVR_G5_970FX); - altivec_capable |= (pvr == PVR_G5_970MP); - altivec_capable |= (pvr == PVR_G5_970GX); - altivec_capable |= (pvr == PVR_POWER6); - altivec_capable |= (pvr == PVR_POWER7); - altivec_capable |= (pvr == PVR_POWER8); - altivec_capable |= (pvr == PVR_CELL_PPU); + if(max_supported_sublevel >= 7) + { + clear_mem(cpuid, 4); + X86_CPUID_SUBLEVEL(7, 0, cpuid); + + enum x86_CPUID_7_bits : uint64_t { + AVX2 = (1ULL << 5), + BMI2 = (1ULL << 8), + AVX512F = (1ULL << 16), + RDSEED = (1ULL << 18), + ADX = (1ULL << 19), + SHA = (1ULL << 29), + }; + uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; + + if(flags7 & x86_CPUID_7_bits::AVX2) + features_detected |= CPUID::CPUID_AVX2_BIT; + if(flags7 & x86_CPUID_7_bits::BMI2) + features_detected |= CPUID::CPUID_BMI2_BIT; + if(flags7 & x86_CPUID_7_bits::AVX512F) + features_detected |= CPUID::CPUID_AVX512F_BIT; + if(flags7 & x86_CPUID_7_bits::RDSEED) + features_detected |= CPUID::CPUID_RDSEED_BIT; + if(flags7 & x86_CPUID_7_bits::ADX) + features_detected |= CPUID::CPUID_ADX_BIT; + if(flags7 & x86_CPUID_7_bits::SHA) + features_detected |= CPUID::CPUID_SHA_BIT; + } + + /* + * If we don't have access to CPUID, we can still safely assume that + * any x86-64 processor has SSE2 and RDTSC + */ +#if defined(BOTAN_TARGET_ARCH_IS_X86_64) + if(features_detected == 0) + { + features_detected |= CPUID::CPUID_SSE2_BIT; + features_detected |= CPUID::CPUID_RDTSC_BIT; + } #endif - return altivec_capable; + return features_detected; } #endif @@ -185,79 +363,46 @@ void CPUID::print(std::ostream& o) CPUID_PRINT(rdtsc); CPUID_PRINT(bmi2); - CPUID_PRINT(clmul); + CPUID_PRINT(adx); + CPUID_PRINT(aes_ni); + CPUID_PRINT(clmul); CPUID_PRINT(rdrand); CPUID_PRINT(rdseed); CPUID_PRINT(intel_sha); - CPUID_PRINT(adx); #endif #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) CPUID_PRINT(altivec); #endif +#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + CPUID_PRINT(neon); + CPUID_PRINT(arm_sha1); + CPUID_PRINT(arm_sha2); + CPUID_PRINT(arm_aes); + CPUID_PRINT(arm_pmull); +#endif + #undef CPUID_PRINT o << "\n"; } void CPUID::initialize() { - clear_mem(g_processor_flags, 2); + g_processor_features = 0; #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - if(altivec_check_sysctl() || altivec_check_pvr_emul()) - { - g_processor_flags[0] |= CPUID_ALTIVEC_BIT; - } + g_processor_features = powerpc_detect_cpu_featutures(); +#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + g_processor_features = arm_detect_cpu_features(&g_cache_line_size); +#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) + g_processor_features = x86_detect_cpu_features(&g_cache_line_size); #endif -#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; - const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; - - uint32_t cpuid[4] = { 0 }; - X86_CPUID(0, cpuid); - - const uint32_t max_supported_sublevel = cpuid[0]; - - if(max_supported_sublevel == 0) - return; - - const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); - const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); - - X86_CPUID(1, cpuid); - - g_processor_flags[0] = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; - - if(is_intel) - g_cache_line_size = 8 * get_byte(2, cpuid[1]); - - if(max_supported_sublevel >= 7) - { - clear_mem(cpuid, 4); - X86_CPUID_SUBLEVEL(7, 0, cpuid); - g_processor_flags[1] = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; - } - - if(is_amd) - { - X86_CPUID(0x80000005, cpuid); - g_cache_line_size = get_byte(3, cpuid[2]); - } - -#endif - -#if defined(BOTAN_TARGET_ARCH_IS_X86_64) - /* - * If we don't have access to CPUID, we can still safely assume that - * any x86-64 processor has SSE2 and RDTSC - */ - if(g_processor_flags[0] == 0) - g_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT); -#endif + g_processor_features |= CPUID::CPUID_INITIALIZED_BIT; + // Check runtime endian const uint32_t endian32 = 0x01234567; const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32); @@ -274,14 +419,13 @@ void CPUID::initialize() throw Internal_Error("Unexpected endian at runtime, neither big nor little"); } - // If we were compiled with a known endian, verify if matches at runtime + // If we were compiled with a known endian, verify it matches at runtime #if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) - BOTAN_ASSERT(g_little_endian, "Little-endian build but big-endian at runtime"); + BOTAN_ASSERT(g_little_endian == true, "Build and runtime endian match"); #elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) - BOTAN_ASSERT(!g_little_endian, "Big-endian build but little-endian at runtime"); + BOTAN_ASSERT(g_little_endian == false, "Build and runtime endian match"); #endif - g_initialized = true; } } diff --git a/src/lib/utils/cpuid.h b/src/lib/utils/cpuid.h index 634305aa1..2bb5a8301 100644 --- a/src/lib/utils/cpuid.h +++ b/src/lib/utils/cpuid.h @@ -1,6 +1,6 @@ /* * Runtime CPU detection -* (C) 2009-2010,2013 Jack Lloyd +* (C) 2009,2010,2013,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -14,9 +14,22 @@ namespace Botan { /** -* A class handling runtime CPU feature detection +* A class handling runtime CPU feature detection. It is limited to +* just the features necessary to implement CPU specific code in Botan, +* rather than being a general purpose utility. * -* Currently this class supports only x86 (via CPUID) and PowerPC (AltiVec detection) +* This class supports: +* +* - x86 features using CPUID. x86 is also the only processor with +* accurate cache line detection currently. +* +* - PowerPC AltiVec detection on Linux, NetBSD, OpenBSD, and Darwin +* +* - ARM NEON and crypto extensions detection. On Linux and Android +* systems which support getauxval, that is used to access CPU +* feature information. Otherwise a relatively portable but +* thread-unsafe mechanism involving executing probe functions which +* catching SIGILL signal is used. */ class BOTAN_DLL CPUID { @@ -35,7 +48,7 @@ class BOTAN_DLL CPUID */ static size_t cache_line_size() { - if(!g_initialized) + if(g_processor_features == 0) { initialize(); } @@ -44,38 +57,51 @@ class BOTAN_DLL CPUID static bool is_little_endian() { - if(!g_initialized) + if(g_processor_features == 0) { initialize(); } return g_little_endian; } - enum CPUID_bits { + enum CPUID_bits : uint64_t { #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) - // This matches the layout of cpuid(1) - CPUID_RDTSC_BIT = 4, - CPUID_SSE2_BIT = 26, - CPUID_CLMUL_BIT = 33, - CPUID_SSSE3_BIT = 41, - CPUID_SSE41_BIT = 51, - CPUID_SSE42_BIT = 52, - CPUID_AESNI_BIT = 57, - CPUID_RDRAND_BIT = 62, - - CPUID_AVX2_BIT = 64+5, - CPUID_BMI2_BIT = 64+8, - CPUID_AVX512F_BIT = 64+16, - CPUID_RDSEED_BIT = 64+18, - CPUID_ADX_BIT = 64+19, - CPUID_SHA_BIT = 64+29, + // These values have no relation to cpuid bitfields + + // SIMD instruction sets + CPUID_SSE2_BIT = (1ULL << 0), + CPUID_SSSE3_BIT = (1ULL << 1), + CPUID_SSE41_BIT = (1ULL << 2), + CPUID_SSE42_BIT = (1ULL << 3), + CPUID_AVX2_BIT = (1ULL << 4), + CPUID_AVX512F_BIT = (1ULL << 5), + + // Misc useful instructions + CPUID_RDTSC_BIT = (1ULL << 10), + CPUID_BMI2_BIT = (1ULL << 11), + CPUID_ADX_BIT = (1ULL << 12), + + // Crypto-specific ISAs + CPUID_AESNI_BIT = (1ULL << 16), + CPUID_CLMUL_BIT = (1ULL << 17), + CPUID_RDRAND_BIT = (1ULL << 18), + CPUID_RDSEED_BIT = (1ULL << 19), + CPUID_SHA_BIT = (1ULL << 20), #endif #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) - CPUID_ALTIVEC_BIT = 0 + CPUID_ALTIVEC_BIT = (1ULL << 0), #endif - // TODO: ARMv8 feature detection +#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + CPUID_ARM_NEON_BIT = (1ULL << 0), + CPUID_ARM_AES_BIT = (1ULL << 16), + CPUID_ARM_PMULL_BIT = (1ULL << 17), + CPUID_ARM_SHA1_BIT = (1ULL << 18), + CPUID_ARM_SHA2_BIT = (1ULL << 19), +#endif + + CPUID_INITIALIZED_BIT = (1ULL << 63) }; #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) @@ -86,6 +112,38 @@ class BOTAN_DLL CPUID { return has_cpuid_bit(CPUID_ALTIVEC_BIT); } #endif +#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + /** + * Check if the processor supports NEON SIMD + */ + static bool has_neon() + { return has_cpuid_bit(CPUID_ARM_NEON_BIT); } + + /** + * Check if the processor supports ARMv8 SHA1 + */ + static bool has_arm_sha1() + { return has_cpuid_bit(CPUID_ARM_SHA1_BIT); } + + /** + * Check if the processor supports ARMv8 SHA2 + */ + static bool has_arm_sha2() + { return has_cpuid_bit(CPUID_ARM_SHA2_BIT); } + + /** + * Check if the processor supports ARMv8 AES + */ + static bool has_arm_aes() + { return has_cpuid_bit(CPUID_ARM_AES_BIT); } + + /** + * Check if the processor supports ARMv8 PMULL + */ + static bool has_arm_pmull() + { return has_cpuid_bit(CPUID_ARM_PMULL_BIT); } +#endif + #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) /** @@ -176,26 +234,31 @@ class BOTAN_DLL CPUID /* * Clear a CPUID bit * Call CPUID::initialize to reset + * + * This is only exposed for testing, don't use unless you know + * what you are doing. */ static void clear_cpuid_bit(CPUID_bits bit) { - const uint64_t mask = ~(static_cast<uint64_t>(1) << (bit % 64)); - g_processor_flags[bit/64] &= mask; + const uint64_t mask = ~(static_cast<uint64_t>(bit)); + g_processor_features &= mask; } + /* + * Don't call this function, use CPUID::has_xxx above + * It should have been private. + */ static bool has_cpuid_bit(CPUID_bits elem) { - if(!g_initialized) + if(g_processor_features == 0) initialize(); - const size_t bit = static_cast<size_t>(elem); - return ((g_processor_flags[bit/64] >> (bit % 64)) & 1); + return ((g_processor_features & static_cast<uint64_t>(elem)) != 0); } private: - static bool g_initialized; static bool g_little_endian; static size_t g_cache_line_size; - static uint64_t g_processor_flags[2]; + static uint64_t g_processor_features; }; } diff --git a/src/lib/utils/os_utils.cpp b/src/lib/utils/os_utils.cpp index 46ce2a056..4dac4f9af 100644 --- a/src/lib/utils/os_utils.cpp +++ b/src/lib/utils/os_utils.cpp @@ -17,6 +17,8 @@ #include <sys/mman.h> #include <sys/resource.h> #include <unistd.h> + #include <signal.h> + #include <setjmp.h> #endif #if defined(BOTAN_TARGET_OS_IS_WINDOWS) || defined(BOTAN_TARGET_OS_IS_MINGW) @@ -298,6 +300,74 @@ void free_locked_pages(void* ptr, size_t length) #endif } +#if defined(BOTAN_TARGET_OS_TYPE_IS_UNIX) +namespace { + +static ::sigjmp_buf g_sigill_jmp_buf; + +void botan_sigill_handler(int) + { + ::siglongjmp(g_sigill_jmp_buf, /*non-zero return value*/1); + } + +} +#endif + +int run_cpu_instruction_probe(std::function<int ()> probe_fn) + { +#if defined(BOTAN_TARGET_OS_TYPE_IS_UNIX) + struct sigaction old_sigaction; + struct sigaction sigaction; + + sigaction.sa_handler = botan_sigill_handler; + sigemptyset(&sigaction.sa_mask); + sigaction.sa_flags = 0; + + int rc = ::sigaction(SIGILL, &sigaction, &old_sigaction); + + if(rc != 0) + throw Exception("run_cpu_instruction_probe sigaction failed"); + + /* + There doesn't seem to be any way for probe_result to not be initialized + by some code path below, but this initializer is left as error just in case. + */ + int probe_result = -3; + + try + { + rc = ::sigsetjmp(g_sigill_jmp_buf, /*save sigs*/1); + + if(rc == 0) + { + // first call to sigsetjmp + probe_result = probe_fn(); + } + else if(rc == 1) + { + // non-local return from siglongjmp in signal handler: return error + probe_result = -1; + } + else + throw Exception("run_cpu_instruction_probe unexpected sigsetjmp return value"); + } + catch(...) + { + probe_result = -2; + } + + rc = ::sigaction(SIGILL, &old_sigaction, nullptr); + if(rc != 0) + throw Exception("run_cpu_instruction_probe sigaction restore failed"); + + return probe_result; +#else + // TODO: Windows support + return -9; // not supported +#endif + } + + } } diff --git a/src/lib/utils/os_utils.h b/src/lib/utils/os_utils.h index 213c5982b..b74debc46 100644 --- a/src/lib/utils/os_utils.h +++ b/src/lib/utils/os_utils.h @@ -1,6 +1,6 @@ /* * OS specific utility functions -* (C) 2015,2016 Jack Lloyd +* (C) 2015,2016,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -9,24 +9,28 @@ #define BOTAN_OS_UTILS_H__ #include <botan/types.h> +#include <functional> namespace Botan { namespace OS { /** -* Returns the OS assigned process ID, if available. Otherwise throws. +* @return process ID assigned by the operating system. +* On Unix and Windows systems, this always returns a result +* On IncludeOS it returns 0 since there is no process ID to speak of +* in a unikernel. */ uint32_t get_process_id(); /** -* Return the highest resolution clock available on the system. +* @return highest resolution clock available on the system. * * The epoch and update rate of this clock is arbitrary and depending * on the hardware it may not tick at a constant rate. * -* Returns the value of the hardware cycle counter, if available. -* On Windows calls QueryPerformanceCounter. +* Uses hardware cycle counter, if available. +* On Windows, always calls QueryPerformanceCounter. * Under GCC or Clang on supported platforms the hardware cycle counter is queried: * x86, PPC, Alpha, SPARC, IA-64, S/390x, and HP-PA * On other platforms clock_gettime is used with some monotonic timer, if available. @@ -35,30 +39,51 @@ uint32_t get_process_id(); uint64_t get_processor_timestamp(); /** -* Returns the value of the system clock with best resolution available, -* normalized to nanoseconds resolution. +* @return system clock with best resolution available, normalized to +* nanoseconds resolution. */ uint64_t get_system_timestamp_ns(); -/* -* Returns the maximum amount of memory (in bytes) we could/should -* hyptothetically allocate. Reads "BOTAN_MLOCK_POOL_SIZE" from -* environment which can be set to zero. +/** +* @return maximum amount of memory (in bytes) Botan could/should +* hyptothetically allocate for the memory poool. Reads environment +* variable "BOTAN_MLOCK_POOL_SIZE", set to "0" to disable pool. */ size_t get_memory_locking_limit(); -/* +/** * Request so many bytes of page-aligned RAM locked into memory using * mlock, VirtualLock, or similar. Returns null on failure. The memory * returned is zeroed. Free it with free_locked_pages. +* @param length requested allocation in bytes */ void* allocate_locked_pages(size_t length); -/* +/** * Free memory allocated by allocate_locked_pages +* @param ptr a pointer returned by allocate_locked_pages +* @param length length passed to allocate_locked_pages */ void free_locked_pages(void* ptr, size_t length); +/** +* Run a probe instruction to test for support for a CPU instruction. +* Runs in system-specific env that catches illegal instructions; this +* function always fails if the OS doesn't provide this. +* Returns value of probe_fn, if it could run. +* If error occurs, returns negative number. +* This allows probe_fn to indicate errors of its own, if it wants. +* For example the instruction might not only be only available on some +* CPUs, but also buggy on some subset of these - the probe function +* can test to make sure the instruction works properly before +* indicating that the instruction is available. +* +* Return codes: +* -1 illegal instruction detected +* -2 exception thrown +*/ +int run_cpu_instruction_probe(std::function<int ()> probe_fn); + } } |