aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-01-18 23:53:21 -0500
committerJack Lloyd <[email protected]>2017-01-18 23:53:21 -0500
commit59fe7e7a9559eb3624d8ef310325bbb917f89033 (patch)
tree138fa33873d84c41725d1cd15f43f81d6b975258
parent4242fabe65b5710ace9799f692b462aa8b6ee37a (diff)
parent7880efc3c8d714088c82f053b8123b81ad569737 (diff)
Merge GH #843 Refactor CPUID impl, add ARM feature detection
-rw-r--r--src/build-data/os/android.txt1
-rw-r--r--src/build-data/os/linux.txt1
-rw-r--r--src/lib/utils/cpuid.cpp374
-rw-r--r--src/lib/utils/cpuid.h125
-rw-r--r--src/lib/utils/os_utils.cpp70
-rw-r--r--src/lib/utils/os_utils.h51
6 files changed, 463 insertions, 159 deletions
diff --git a/src/build-data/os/android.txt b/src/build-data/os/android.txt
index 50d97fd8f..db06bab1d 100644
--- a/src/build-data/os/android.txt
+++ b/src/build-data/os/android.txt
@@ -7,6 +7,7 @@ clock_gettime
gettimeofday
posix_mlock
gmtime_r
+getauxval
dlopen
readdir
threads
diff --git a/src/build-data/os/linux.txt b/src/build-data/os/linux.txt
index 6bd81f7f2..59f995fc2 100644
--- a/src/build-data/os/linux.txt
+++ b/src/build-data/os/linux.txt
@@ -8,6 +8,7 @@ gettimeofday
posix_mlock
gmtime_r
dlopen
+getauxval
readdir
timegm
sockets
diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp
index 428ca2715..325421d71 100644
--- a/src/lib/utils/cpuid.cpp
+++ b/src/lib/utils/cpuid.cpp
@@ -1,6 +1,6 @@
/*
* Runtime CPU detection
-* (C) 2009-2010,2013 Jack Lloyd
+* (C) 2009,2010,2013,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -14,59 +14,66 @@
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+/*
+* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec
+*/
#if defined(BOTAN_TARGET_OS_IS_DARWIN)
#include <sys/sysctl.h>
-#endif
-
-#if defined(BOTAN_TARGET_OS_IS_OPENBSD)
+#elif defined(BOTAN_TARGET_OS_IS_OPENBSD)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif
+#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+
+/*
+* On ARM, use getauxval if available, otherwise fall back to
+* running probe functions with a SIGILL handler.
+*/
+#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
+ #include <sys/auxv.h>
#endif
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+#include <botan/internal/os_utils.h>
-#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-#include <intrin.h>
+#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
-#define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
-#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
+ #include <intrin.h>
+ #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
-#include <ia32intrin.h>
-
-#define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
-#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
+ #include <ia32intrin.h>
+ #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
-#define X86_CPUID(type, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type))
+ #define X86_CPUID(type, out) \
+ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
+ : "0" (type))
-#define X86_CPUID_SUBLEVEL(type, level, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type), "2" (level))
+ #define X86_CPUID_SUBLEVEL(type, level, out) \
+ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
+ : "0" (type), "2" (level))
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
-#include <cpuid.h>
+ #include <cpuid.h>
-#define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
+ #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
-#define X86_CPUID_SUBLEVEL(type, level, out) \
- do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) \
+ do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
#else
-
-#warning "No way of calling cpuid for this compiler"
-
-#define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
-#define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
+ #warning "No way of calling cpuid for this compiler"
+ #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
#endif
@@ -74,18 +81,22 @@
namespace Botan {
-uint64_t CPUID::g_processor_flags[2] = { 0, 0 };
+uint64_t CPUID::g_processor_features = 0;
size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
-bool CPUID::g_initialized = false;
bool CPUID::g_little_endian = false;
namespace {
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
-bool altivec_check_sysctl()
+/*
+* PowerPC specific block: check for AltiVec using either
+* sysctl or by reading processor version number register.
+*/
+uint64_t powerpc_detect_cpu_featutures()
{
#if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD)
+ // On Darwin/OS X and OpenBSD, use sysctl
#if defined(BOTAN_TARGET_OS_IS_OPENBSD)
int sels[2] = { CTL_MACHDEP, CPU_ALTIVEC };
@@ -98,18 +109,9 @@ bool altivec_check_sysctl()
int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
if(error == 0 && vector_type > 0)
- return true;
-#endif
-
- return false;
- }
-
-bool altivec_check_pvr_emul()
- {
- bool altivec_capable = false;
-
-#if defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD)
+ return (1ULL << CPUID::CPUID_ALTIVEC_BIT);
+#elif defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD)
/*
On PowerPC, MSR 287 is PVR, the Processor Version Number
Normally it is only accessible to ring 0, but Linux and NetBSD
@@ -119,6 +121,14 @@ bool altivec_check_pvr_emul()
PearPC and Linux sources, mostly.
*/
+ uint32_t pvr = 0;
+
+ // TODO: we could run inside SIGILL handler block
+ asm volatile("mfspr %0, 287" : "=r" (pvr));
+
+ // Top 16 bit suffice to identify model
+ pvr >>= 16;
+
const uint16_t PVR_G4_7400 = 0x000C;
const uint16_t PVR_G5_970 = 0x0039;
const uint16_t PVR_G5_970FX = 0x003C;
@@ -129,29 +139,197 @@ bool altivec_check_pvr_emul()
const uint16_t PVR_POWER8 = 0x004B;
const uint16_t PVR_CELL_PPU = 0x0070;
- // Motorola produced G4s with PVR 0x800[0123C] (at least)
- const uint16_t PVR_G4_74xx_24 = 0x800;
+ if(pvr == PVR_G4_7400 ||
+ pvr == PVR_G5_970 || pvr == PVR_G5_970FX ||
+ pvr == PVR_G5_970MP || pvr == PVR_G5_970GX ||
+ pvr == PVR_POWER6 || pvr == PVR_POWER7 || pvr == PVR_POWER8 ||
+ pvr == PVR_CELL_PPU)
+ {
+ return (1ULL << CPUID::CPUID_ALTIVEC_BIT);
+ }
+#else
+ #warning "No PowerPC feature detection available for this platform"
+#endif
- uint32_t pvr = 0;
+ return 0;
+ }
- asm volatile("mfspr %0, 287" : "=r" (pvr));
+#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- // Top 16 bit suffice to identify model
- pvr >>= 16;
+uint64_t arm_detect_cpu_features(size_t* cache_line_size)
+ {
+ uint64_t detected_features = 0;
+ *cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
+
+#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
+ errno = 0;
+
+ /*
+ * On systems with getauxval these bits should normally be defined
+ * in bits/auxv.h but some buggy? glibc installs seem to miss them.
+ * These following values are all fixed, for the Linux ELF format,
+ * so we just hardcode them in ARM_hwcap_bit enum.
+ */
+
+ enum ARM_hwcap_bit {
+#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
+ NEON_bit = (1 << 12),
+ AES_bit = (1 << 0),
+ PMULL_bit = (1 << 1),
+ SHA1_bit = (1 << 2),
+ SHA2_bit = (1 << 3),
+
+ ARCH_hwcap_neon = 16, // AT_HWCAP
+ ARCH_hwcap_crypto = 26, // AT_HWCAP2
+#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
+ NEON_bit = (1 << 1),
+ AES_bit = (1 << 3),
+ PMULL_bit = (1 << 4),
+ SHA1_bit = (1 << 5),
+ SHA2_bit = (1 << 6),
+
+ ARCH_hwcap_neon = 16, // AT_HWCAP
+ ARCH_hwcap_crypto = 16, // AT_HWCAP
+#endif
+ };
+
+ const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
+ if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
+ detected_features |= CPUID::CPUID_ARM_NEON_BIT;
+
+ /*
+ On aarch64 this ends up calling getauxval twice with AT_HWCAP
+ This doesn't seem worth optimizing this out, since getauxval is
+ just reading a field in the ELF header.
+ */
+ const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
+ if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
+ detected_features |= CPUID::CPUID_ARM_AES_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
+ detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
+
+ const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
+
+ // plausibility check
+ if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
+ *cache_line_size = static_cast<size_t>(dcache_line);
+#endif
+
+ // TODO: probe functions
+
+ return detected_features;
+ }
+
+#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+uint64_t x86_detect_cpu_features(size_t* cache_line_size)
+ {
+ uint64_t features_detected = 0;
+ uint32_t cpuid[4] = { 0 };
+
+ // CPUID 0: vendor identification, max sublevel
+ X86_CPUID(0, cpuid);
+
+ const uint32_t max_supported_sublevel = cpuid[0];
+
+ const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
+ const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
+ const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
+ const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
+
+ if(max_supported_sublevel >= 1)
+ {
+ // CPUID 1: feature bits
+ X86_CPUID(1, cpuid);
+ const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
+
+ enum x86_CPUID_1_bits : uint64_t {
+ RDTSC = (1ULL << 4),
+ SSE2 = (1ULL << 26),
+ CLMUL = (1ULL << 33),
+ SSSE3 = (1ULL << 41),
+ SSE41 = (1ULL << 51),
+ SSE42 = (1ULL << 52),
+ AESNI = (1ULL << 57),
+ RDRAND = (1ULL << 62)
+ };
+
+ if(flags0 & x86_CPUID_1_bits::RDTSC)
+ features_detected |= CPUID::CPUID_RDTSC_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE2)
+ features_detected |= CPUID::CPUID_SSE2_BIT;
+ if(flags0 & x86_CPUID_1_bits::CLMUL)
+ features_detected |= CPUID::CPUID_CLMUL_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSSE3)
+ features_detected |= CPUID::CPUID_SSSE3_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE41)
+ features_detected |= CPUID::CPUID_SSE41_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE42)
+ features_detected |= CPUID::CPUID_SSE42_BIT;
+ if(flags0 & x86_CPUID_1_bits::AESNI)
+ features_detected |= CPUID::CPUID_AESNI_BIT;
+ if(flags0 & x86_CPUID_1_bits::RDRAND)
+ features_detected |= CPUID::CPUID_RDRAND_BIT;
+ }
+
+ if(is_intel)
+ {
+ // Intel cache line size is in cpuid(1) output
+ *cache_line_size = 8 * get_byte(2, cpuid[1]);
+ }
+ else if(is_amd)
+ {
+ // AMD puts it in vendor zone
+ X86_CPUID(0x80000005, cpuid);
+ *cache_line_size = get_byte(3, cpuid[2]);
+ }
- altivec_capable |= (pvr == PVR_G4_7400);
- altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24);
- altivec_capable |= (pvr == PVR_G5_970);
- altivec_capable |= (pvr == PVR_G5_970FX);
- altivec_capable |= (pvr == PVR_G5_970MP);
- altivec_capable |= (pvr == PVR_G5_970GX);
- altivec_capable |= (pvr == PVR_POWER6);
- altivec_capable |= (pvr == PVR_POWER7);
- altivec_capable |= (pvr == PVR_POWER8);
- altivec_capable |= (pvr == PVR_CELL_PPU);
+ if(max_supported_sublevel >= 7)
+ {
+ clear_mem(cpuid, 4);
+ X86_CPUID_SUBLEVEL(7, 0, cpuid);
+
+ enum x86_CPUID_7_bits : uint64_t {
+ AVX2 = (1ULL << 5),
+ BMI2 = (1ULL << 8),
+ AVX512F = (1ULL << 16),
+ RDSEED = (1ULL << 18),
+ ADX = (1ULL << 19),
+ SHA = (1ULL << 29),
+ };
+ uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
+
+ if(flags7 & x86_CPUID_7_bits::AVX2)
+ features_detected |= CPUID::CPUID_AVX2_BIT;
+ if(flags7 & x86_CPUID_7_bits::BMI2)
+ features_detected |= CPUID::CPUID_BMI2_BIT;
+ if(flags7 & x86_CPUID_7_bits::AVX512F)
+ features_detected |= CPUID::CPUID_AVX512F_BIT;
+ if(flags7 & x86_CPUID_7_bits::RDSEED)
+ features_detected |= CPUID::CPUID_RDSEED_BIT;
+ if(flags7 & x86_CPUID_7_bits::ADX)
+ features_detected |= CPUID::CPUID_ADX_BIT;
+ if(flags7 & x86_CPUID_7_bits::SHA)
+ features_detected |= CPUID::CPUID_SHA_BIT;
+ }
+
+ /*
+ * If we don't have access to CPUID, we can still safely assume that
+ * any x86-64 processor has SSE2 and RDTSC
+ */
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
+ if(features_detected == 0)
+ {
+ features_detected |= CPUID::CPUID_SSE2_BIT;
+ features_detected |= CPUID::CPUID_RDTSC_BIT;
+ }
#endif
- return altivec_capable;
+ return features_detected;
}
#endif
@@ -185,79 +363,46 @@ void CPUID::print(std::ostream& o)
CPUID_PRINT(rdtsc);
CPUID_PRINT(bmi2);
- CPUID_PRINT(clmul);
+ CPUID_PRINT(adx);
+
CPUID_PRINT(aes_ni);
+ CPUID_PRINT(clmul);
CPUID_PRINT(rdrand);
CPUID_PRINT(rdseed);
CPUID_PRINT(intel_sha);
- CPUID_PRINT(adx);
#endif
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
CPUID_PRINT(altivec);
#endif
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ CPUID_PRINT(neon);
+ CPUID_PRINT(arm_sha1);
+ CPUID_PRINT(arm_sha2);
+ CPUID_PRINT(arm_aes);
+ CPUID_PRINT(arm_pmull);
+#endif
+
#undef CPUID_PRINT
o << "\n";
}
void CPUID::initialize()
{
- clear_mem(g_processor_flags, 2);
+ g_processor_features = 0;
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- if(altivec_check_sysctl() || altivec_check_pvr_emul())
- {
- g_processor_flags[0] |= CPUID_ALTIVEC_BIT;
- }
+ g_processor_features = powerpc_detect_cpu_featutures();
+#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ g_processor_features = arm_detect_cpu_features(&g_cache_line_size);
+#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+ g_processor_features = x86_detect_cpu_features(&g_cache_line_size);
#endif
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
- const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
-
- uint32_t cpuid[4] = { 0 };
- X86_CPUID(0, cpuid);
-
- const uint32_t max_supported_sublevel = cpuid[0];
-
- if(max_supported_sublevel == 0)
- return;
-
- const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
- const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
-
- X86_CPUID(1, cpuid);
-
- g_processor_flags[0] = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
-
- if(is_intel)
- g_cache_line_size = 8 * get_byte(2, cpuid[1]);
-
- if(max_supported_sublevel >= 7)
- {
- clear_mem(cpuid, 4);
- X86_CPUID_SUBLEVEL(7, 0, cpuid);
- g_processor_flags[1] = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
- }
-
- if(is_amd)
- {
- X86_CPUID(0x80000005, cpuid);
- g_cache_line_size = get_byte(3, cpuid[2]);
- }
-
-#endif
-
-#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
- /*
- * If we don't have access to CPUID, we can still safely assume that
- * any x86-64 processor has SSE2 and RDTSC
- */
- if(g_processor_flags[0] == 0)
- g_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT);
-#endif
+ g_processor_features |= CPUID::CPUID_INITIALIZED_BIT;
+ // Check runtime endian
const uint32_t endian32 = 0x01234567;
const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32);
@@ -274,14 +419,13 @@ void CPUID::initialize()
throw Internal_Error("Unexpected endian at runtime, neither big nor little");
}
- // If we were compiled with a known endian, verify if matches at runtime
+ // If we were compiled with a known endian, verify it matches at runtime
#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- BOTAN_ASSERT(g_little_endian, "Little-endian build but big-endian at runtime");
+ BOTAN_ASSERT(g_little_endian == true, "Build and runtime endian match");
#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- BOTAN_ASSERT(!g_little_endian, "Big-endian build but little-endian at runtime");
+ BOTAN_ASSERT(g_little_endian == false, "Build and runtime endian match");
#endif
- g_initialized = true;
}
}
diff --git a/src/lib/utils/cpuid.h b/src/lib/utils/cpuid.h
index 634305aa1..2bb5a8301 100644
--- a/src/lib/utils/cpuid.h
+++ b/src/lib/utils/cpuid.h
@@ -1,6 +1,6 @@
/*
* Runtime CPU detection
-* (C) 2009-2010,2013 Jack Lloyd
+* (C) 2009,2010,2013,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -14,9 +14,22 @@
namespace Botan {
/**
-* A class handling runtime CPU feature detection
+* A class handling runtime CPU feature detection. It is limited to
+* just the features necessary to implement CPU specific code in Botan,
+* rather than being a general purpose utility.
*
-* Currently this class supports only x86 (via CPUID) and PowerPC (AltiVec detection)
+* This class supports:
+*
+* - x86 features using CPUID. x86 is also the only processor with
+* accurate cache line detection currently.
+*
+* - PowerPC AltiVec detection on Linux, NetBSD, OpenBSD, and Darwin
+*
+* - ARM NEON and crypto extensions detection. On Linux and Android
+* systems which support getauxval, that is used to access CPU
+* feature information. Otherwise a relatively portable but
+* thread-unsafe mechanism involving executing probe functions which
+* catching SIGILL signal is used.
*/
class BOTAN_DLL CPUID
{
@@ -35,7 +48,7 @@ class BOTAN_DLL CPUID
*/
static size_t cache_line_size()
{
- if(!g_initialized)
+ if(g_processor_features == 0)
{
initialize();
}
@@ -44,38 +57,51 @@ class BOTAN_DLL CPUID
static bool is_little_endian()
{
- if(!g_initialized)
+ if(g_processor_features == 0)
{
initialize();
}
return g_little_endian;
}
- enum CPUID_bits {
+ enum CPUID_bits : uint64_t {
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- // This matches the layout of cpuid(1)
- CPUID_RDTSC_BIT = 4,
- CPUID_SSE2_BIT = 26,
- CPUID_CLMUL_BIT = 33,
- CPUID_SSSE3_BIT = 41,
- CPUID_SSE41_BIT = 51,
- CPUID_SSE42_BIT = 52,
- CPUID_AESNI_BIT = 57,
- CPUID_RDRAND_BIT = 62,
-
- CPUID_AVX2_BIT = 64+5,
- CPUID_BMI2_BIT = 64+8,
- CPUID_AVX512F_BIT = 64+16,
- CPUID_RDSEED_BIT = 64+18,
- CPUID_ADX_BIT = 64+19,
- CPUID_SHA_BIT = 64+29,
+ // These values have no relation to cpuid bitfields
+
+ // SIMD instruction sets
+ CPUID_SSE2_BIT = (1ULL << 0),
+ CPUID_SSSE3_BIT = (1ULL << 1),
+ CPUID_SSE41_BIT = (1ULL << 2),
+ CPUID_SSE42_BIT = (1ULL << 3),
+ CPUID_AVX2_BIT = (1ULL << 4),
+ CPUID_AVX512F_BIT = (1ULL << 5),
+
+ // Misc useful instructions
+ CPUID_RDTSC_BIT = (1ULL << 10),
+ CPUID_BMI2_BIT = (1ULL << 11),
+ CPUID_ADX_BIT = (1ULL << 12),
+
+ // Crypto-specific ISAs
+ CPUID_AESNI_BIT = (1ULL << 16),
+ CPUID_CLMUL_BIT = (1ULL << 17),
+ CPUID_RDRAND_BIT = (1ULL << 18),
+ CPUID_RDSEED_BIT = (1ULL << 19),
+ CPUID_SHA_BIT = (1ULL << 20),
#endif
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- CPUID_ALTIVEC_BIT = 0
+ CPUID_ALTIVEC_BIT = (1ULL << 0),
#endif
- // TODO: ARMv8 feature detection
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ CPUID_ARM_NEON_BIT = (1ULL << 0),
+ CPUID_ARM_AES_BIT = (1ULL << 16),
+ CPUID_ARM_PMULL_BIT = (1ULL << 17),
+ CPUID_ARM_SHA1_BIT = (1ULL << 18),
+ CPUID_ARM_SHA2_BIT = (1ULL << 19),
+#endif
+
+ CPUID_INITIALIZED_BIT = (1ULL << 63)
};
#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
@@ -86,6 +112,38 @@ class BOTAN_DLL CPUID
{ return has_cpuid_bit(CPUID_ALTIVEC_BIT); }
#endif
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ /**
+ * Check if the processor supports NEON SIMD
+ */
+ static bool has_neon()
+ { return has_cpuid_bit(CPUID_ARM_NEON_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SHA1
+ */
+ static bool has_arm_sha1()
+ { return has_cpuid_bit(CPUID_ARM_SHA1_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SHA2
+ */
+ static bool has_arm_sha2()
+ { return has_cpuid_bit(CPUID_ARM_SHA2_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 AES
+ */
+ static bool has_arm_aes()
+ { return has_cpuid_bit(CPUID_ARM_AES_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 PMULL
+ */
+ static bool has_arm_pmull()
+ { return has_cpuid_bit(CPUID_ARM_PMULL_BIT); }
+#endif
+
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
/**
@@ -176,26 +234,31 @@ class BOTAN_DLL CPUID
/*
* Clear a CPUID bit
* Call CPUID::initialize to reset
+ *
+ * This is only exposed for testing, don't use unless you know
+ * what you are doing.
*/
static void clear_cpuid_bit(CPUID_bits bit)
{
- const uint64_t mask = ~(static_cast<uint64_t>(1) << (bit % 64));
- g_processor_flags[bit/64] &= mask;
+ const uint64_t mask = ~(static_cast<uint64_t>(bit));
+ g_processor_features &= mask;
}
+ /*
+ * Don't call this function, use CPUID::has_xxx above
+ * It should have been private.
+ */
static bool has_cpuid_bit(CPUID_bits elem)
{
- if(!g_initialized)
+ if(g_processor_features == 0)
initialize();
- const size_t bit = static_cast<size_t>(elem);
- return ((g_processor_flags[bit/64] >> (bit % 64)) & 1);
+ return ((g_processor_features & static_cast<uint64_t>(elem)) != 0);
}
private:
- static bool g_initialized;
static bool g_little_endian;
static size_t g_cache_line_size;
- static uint64_t g_processor_flags[2];
+ static uint64_t g_processor_features;
};
}
diff --git a/src/lib/utils/os_utils.cpp b/src/lib/utils/os_utils.cpp
index 46ce2a056..4dac4f9af 100644
--- a/src/lib/utils/os_utils.cpp
+++ b/src/lib/utils/os_utils.cpp
@@ -17,6 +17,8 @@
#include <sys/mman.h>
#include <sys/resource.h>
#include <unistd.h>
+ #include <signal.h>
+ #include <setjmp.h>
#endif
#if defined(BOTAN_TARGET_OS_IS_WINDOWS) || defined(BOTAN_TARGET_OS_IS_MINGW)
@@ -298,6 +300,74 @@ void free_locked_pages(void* ptr, size_t length)
#endif
}
+#if defined(BOTAN_TARGET_OS_TYPE_IS_UNIX)
+namespace {
+
+static ::sigjmp_buf g_sigill_jmp_buf;
+
+void botan_sigill_handler(int)
+ {
+ ::siglongjmp(g_sigill_jmp_buf, /*non-zero return value*/1);
+ }
+
+}
+#endif
+
+int run_cpu_instruction_probe(std::function<int ()> probe_fn)
+ {
+#if defined(BOTAN_TARGET_OS_TYPE_IS_UNIX)
+ struct sigaction old_sigaction;
+ struct sigaction sigaction;
+
+ sigaction.sa_handler = botan_sigill_handler;
+ sigemptyset(&sigaction.sa_mask);
+ sigaction.sa_flags = 0;
+
+ int rc = ::sigaction(SIGILL, &sigaction, &old_sigaction);
+
+ if(rc != 0)
+ throw Exception("run_cpu_instruction_probe sigaction failed");
+
+ /*
+ There doesn't seem to be any way for probe_result to not be initialized
+ by some code path below, but this initializer is left as error just in case.
+ */
+ int probe_result = -3;
+
+ try
+ {
+ rc = ::sigsetjmp(g_sigill_jmp_buf, /*save sigs*/1);
+
+ if(rc == 0)
+ {
+ // first call to sigsetjmp
+ probe_result = probe_fn();
+ }
+ else if(rc == 1)
+ {
+ // non-local return from siglongjmp in signal handler: return error
+ probe_result = -1;
+ }
+ else
+ throw Exception("run_cpu_instruction_probe unexpected sigsetjmp return value");
+ }
+ catch(...)
+ {
+ probe_result = -2;
+ }
+
+ rc = ::sigaction(SIGILL, &old_sigaction, nullptr);
+ if(rc != 0)
+ throw Exception("run_cpu_instruction_probe sigaction restore failed");
+
+ return probe_result;
+#else
+ // TODO: Windows support
+ return -9; // not supported
+#endif
+ }
+
+
}
}
diff --git a/src/lib/utils/os_utils.h b/src/lib/utils/os_utils.h
index 213c5982b..b74debc46 100644
--- a/src/lib/utils/os_utils.h
+++ b/src/lib/utils/os_utils.h
@@ -1,6 +1,6 @@
/*
* OS specific utility functions
-* (C) 2015,2016 Jack Lloyd
+* (C) 2015,2016,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
@@ -9,24 +9,28 @@
#define BOTAN_OS_UTILS_H__
#include <botan/types.h>
+#include <functional>
namespace Botan {
namespace OS {
/**
-* Returns the OS assigned process ID, if available. Otherwise throws.
+* @return process ID assigned by the operating system.
+* On Unix and Windows systems, this always returns a result
+* On IncludeOS it returns 0 since there is no process ID to speak of
+* in a unikernel.
*/
uint32_t get_process_id();
/**
-* Return the highest resolution clock available on the system.
+* @return highest resolution clock available on the system.
*
* The epoch and update rate of this clock is arbitrary and depending
* on the hardware it may not tick at a constant rate.
*
-* Returns the value of the hardware cycle counter, if available.
-* On Windows calls QueryPerformanceCounter.
+* Uses hardware cycle counter, if available.
+* On Windows, always calls QueryPerformanceCounter.
* Under GCC or Clang on supported platforms the hardware cycle counter is queried:
* x86, PPC, Alpha, SPARC, IA-64, S/390x, and HP-PA
* On other platforms clock_gettime is used with some monotonic timer, if available.
@@ -35,30 +39,51 @@ uint32_t get_process_id();
uint64_t get_processor_timestamp();
/**
-* Returns the value of the system clock with best resolution available,
-* normalized to nanoseconds resolution.
+* @return system clock with best resolution available, normalized to
+* nanoseconds resolution.
*/
uint64_t get_system_timestamp_ns();
-/*
-* Returns the maximum amount of memory (in bytes) we could/should
-* hyptothetically allocate. Reads "BOTAN_MLOCK_POOL_SIZE" from
-* environment which can be set to zero.
+/**
+* @return maximum amount of memory (in bytes) Botan could/should
+* hyptothetically allocate for the memory poool. Reads environment
+* variable "BOTAN_MLOCK_POOL_SIZE", set to "0" to disable pool.
*/
size_t get_memory_locking_limit();
-/*
+/**
* Request so many bytes of page-aligned RAM locked into memory using
* mlock, VirtualLock, or similar. Returns null on failure. The memory
* returned is zeroed. Free it with free_locked_pages.
+* @param length requested allocation in bytes
*/
void* allocate_locked_pages(size_t length);
-/*
+/**
* Free memory allocated by allocate_locked_pages
+* @param ptr a pointer returned by allocate_locked_pages
+* @param length length passed to allocate_locked_pages
*/
void free_locked_pages(void* ptr, size_t length);
+/**
+* Run a probe instruction to test for support for a CPU instruction.
+* Runs in system-specific env that catches illegal instructions; this
+* function always fails if the OS doesn't provide this.
+* Returns value of probe_fn, if it could run.
+* If error occurs, returns negative number.
+* This allows probe_fn to indicate errors of its own, if it wants.
+* For example the instruction might not only be only available on some
+* CPUs, but also buggy on some subset of these - the probe function
+* can test to make sure the instruction works properly before
+* indicating that the instruction is available.
+*
+* Return codes:
+* -1 illegal instruction detected
+* -2 exception thrown
+*/
+int run_cpu_instruction_probe(std::function<int ()> probe_fn);
+
}
}