aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-09-18 10:50:29 -0400
committerJack Lloyd <[email protected]>2017-09-18 10:50:29 -0400
commit3da7f2d709a7b8045f531d1c0c4a3de8f4125eb1 (patch)
treef132e1f28f2f2e2f82da6587ac7679400d3f6940 /src
parent12653256d4084b644b5ac9e534d03ad7d58632a5 (diff)
parenteaa19ec15fc1eeefa2d11f28a11ae85a63c0befa (diff)
Merge GH #1213 Improve ARM processor detection for systems without getauxval
Diffstat (limited to 'src')
-rw-r--r--src/lib/utils/cpuid.cpp501
-rw-r--r--src/lib/utils/cpuid/cpuid.cpp169
-rw-r--r--src/lib/utils/cpuid/cpuid.h (renamed from src/lib/utils/cpuid.h)8
-rw-r--r--src/lib/utils/cpuid/cpuid_arm.cpp214
-rw-r--r--src/lib/utils/cpuid/cpuid_ppc.cpp101
-rw-r--r--src/lib/utils/cpuid/cpuid_x86.cpp167
-rw-r--r--src/lib/utils/cpuid/info.txt7
-rw-r--r--src/lib/utils/info.txt5
8 files changed, 670 insertions, 502 deletions
diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp
deleted file mode 100644
index 82654b9b4..000000000
--- a/src/lib/utils/cpuid.cpp
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
-* Runtime CPU detection
-* (C) 2009,2010,2013,2017 Jack Lloyd
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#include <botan/cpuid.h>
-#include <botan/types.h>
-#include <botan/loadstor.h>
-#include <botan/exceptn.h>
-#include <botan/mem_ops.h>
-#include <botan/parsing.h>
-#include <botan/internal/os_utils.h>
-#include <ostream>
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
-
-/*
-* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec
-*/
-#if defined(BOTAN_TARGET_OS_IS_DARWIN)
- #include <sys/sysctl.h>
-#elif defined(BOTAN_TARGET_OS_IS_OPENBSD)
- #include <sys/param.h>
- #include <sys/sysctl.h>
- #include <machine/cpu.h>
-#endif
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
-
-/*
-* On ARM, use getauxval if available, otherwise fall back to
-* running probe functions with a SIGILL handler.
-*/
-#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
- #include <sys/auxv.h>
-#endif
-
-#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
-/*
-* On x86, use CPUID instruction
-*/
-
-#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
- #include <intrin.h>
-#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
- #include <ia32intrin.h>
-#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
- #include <cpuid.h>
-#endif
-
-#endif
-
-namespace Botan {
-
-uint64_t CPUID::g_processor_features = 0;
-size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
-CPUID::Endian_status CPUID::g_endian_status = ENDIAN_UNKNOWN;
-
-namespace {
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
-
-/*
-* PowerPC specific block: check for AltiVec using either
-* sysctl or by reading processor version number register.
-*/
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
-#if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD)
- // On Darwin/OS X and OpenBSD, use sysctl
-
- int sels[2] = {
-#if defined(BOTAN_TARGET_OS_IS_OPENBSD)
- CTL_MACHDEP, CPU_ALTIVEC
-#else
- CTL_HW, HW_VECTORUNIT
-#endif
- };
-
- int vector_type = 0;
- size_t length = sizeof(vector_type);
- int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
-
- if(error == 0 && vector_type > 0)
- return CPUID::CPUID_ALTIVEC_BIT;
-
-#else
-
- /*
- On PowerPC, MSR 287 is PVR, the Processor Version Number
- Normally it is only accessible to ring 0, but Linux and NetBSD
- (others, too, maybe?) will trap and emulate it for us.
- */
-
- int pvr = OS::run_cpu_instruction_probe([]() -> int {
- uint32_t pvr = 0;
- asm volatile("mfspr %0, 287" : "=r" (pvr));
- // Top 16 bits suffice to identify the model
- return static_cast<int>(pvr >> 16);
- });
-
- if(pvr > 0)
- {
- const uint16_t ALTIVEC_PVR[] = {
- 0x003E, // IBM POWER6,
- 0x003F, // IBM POWER7,
- 0x004B, // IBM POWER8,
- 0x000C, // G4-7400
- 0x0039, // G5 970
- 0x003C, // G5 970FX
- 0x0044, // G5 970MP
- 0x0070, // Cell PPU
- 0, // end
- };
-
- for(size_t i = 0; ALTIVEC_PVR[i]; ++i)
- {
- if(pvr == ALTIVEC_PVR[i])
- return CPUID::CPUID_ALTIVEC_BIT;
- }
-
- return 0;
- }
-
- // TODO try direct instruction probing
-
-#endif
-
- return 0;
- }
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
-
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
- uint64_t detected_features = 0;
-
-#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
- /*
- * On systems with getauxval these bits should normally be defined
- * in bits/auxv.h but some buggy? glibc installs seem to miss them.
- * These following values are all fixed, for the Linux ELF format,
- * so we just hardcode them in ARM_hwcap_bit enum.
- */
-
- enum ARM_hwcap_bit {
-#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
- NEON_bit = (1 << 12),
- AES_bit = (1 << 0),
- PMULL_bit = (1 << 1),
- SHA1_bit = (1 << 2),
- SHA2_bit = (1 << 3),
-
- ARCH_hwcap_neon = 16, // AT_HWCAP
- ARCH_hwcap_crypto = 26, // AT_HWCAP2
-#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
- NEON_bit = (1 << 1),
- AES_bit = (1 << 3),
- PMULL_bit = (1 << 4),
- SHA1_bit = (1 << 5),
- SHA2_bit = (1 << 6),
-
- ARCH_hwcap_neon = 16, // AT_HWCAP
- ARCH_hwcap_crypto = 16, // AT_HWCAP
-#endif
- };
-
- const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
- if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
- detected_features |= CPUID::CPUID_ARM_NEON_BIT;
-
- /*
- On aarch64 this ends up calling getauxval twice with AT_HWCAP
- It doesn't seem worth optimizing this out, since getauxval is
- just reading a field in the ELF header.
- */
- const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
- if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
- detected_features |= CPUID::CPUID_ARM_AES_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
- detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
- detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
- if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
- detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
-
-#if defined(AT_DCACHEBSIZE)
- const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
-
- // plausibility check
- if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
- *cache_line_size = static_cast<size_t>(dcache_line);
-#endif
-
-#else
- // No getauxval API available, fall back on probe functions
-
- // TODO: probe functions
-
-#endif
-
- return detected_features;
- }
-
-#elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
-uint64_t detect_cpu_features(size_t* cache_line_size)
- {
-#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
- #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
-
-#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
- #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
-
-#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
- #define X86_CPUID(type, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type))
-
- #define X86_CPUID_SUBLEVEL(type, level, out) \
- asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
- : "0" (type), "2" (level))
-
-#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
- #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
-
- #define X86_CPUID_SUBLEVEL(type, level, out) \
- do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
-#else
- #warning "No way of calling x86 cpuid instruction for this compiler"
- #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
- #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
-#endif
-
- uint64_t features_detected = 0;
- uint32_t cpuid[4] = { 0 };
-
- // CPUID 0: vendor identification, max sublevel
- X86_CPUID(0, cpuid);
-
- const uint32_t max_supported_sublevel = cpuid[0];
-
- const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
- const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
- const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
- const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
-
- if(max_supported_sublevel >= 1)
- {
- // CPUID 1: feature bits
- X86_CPUID(1, cpuid);
- const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
-
- enum x86_CPUID_1_bits : uint64_t {
- RDTSC = (1ULL << 4),
- SSE2 = (1ULL << 26),
- CLMUL = (1ULL << 33),
- SSSE3 = (1ULL << 41),
- SSE41 = (1ULL << 51),
- SSE42 = (1ULL << 52),
- AESNI = (1ULL << 57),
- RDRAND = (1ULL << 62)
- };
-
- if(flags0 & x86_CPUID_1_bits::RDTSC)
- features_detected |= CPUID::CPUID_RDTSC_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE2)
- features_detected |= CPUID::CPUID_SSE2_BIT;
- if(flags0 & x86_CPUID_1_bits::CLMUL)
- features_detected |= CPUID::CPUID_CLMUL_BIT;
- if(flags0 & x86_CPUID_1_bits::SSSE3)
- features_detected |= CPUID::CPUID_SSSE3_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE41)
- features_detected |= CPUID::CPUID_SSE41_BIT;
- if(flags0 & x86_CPUID_1_bits::SSE42)
- features_detected |= CPUID::CPUID_SSE42_BIT;
- if(flags0 & x86_CPUID_1_bits::AESNI)
- features_detected |= CPUID::CPUID_AESNI_BIT;
- if(flags0 & x86_CPUID_1_bits::RDRAND)
- features_detected |= CPUID::CPUID_RDRAND_BIT;
- }
-
- if(is_intel)
- {
- // Intel cache line size is in cpuid(1) output
- *cache_line_size = 8 * get_byte(2, cpuid[1]);
- }
- else if(is_amd)
- {
- // AMD puts it in vendor zone
- X86_CPUID(0x80000005, cpuid);
- *cache_line_size = get_byte(3, cpuid[2]);
- }
-
- if(max_supported_sublevel >= 7)
- {
- clear_mem(cpuid, 4);
- X86_CPUID_SUBLEVEL(7, 0, cpuid);
-
- enum x86_CPUID_7_bits : uint64_t {
- AVX2 = (1ULL << 5),
- BMI2 = (1ULL << 8),
- AVX512F = (1ULL << 16),
- RDSEED = (1ULL << 18),
- ADX = (1ULL << 19),
- SHA = (1ULL << 29),
- };
- uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
-
- if(flags7 & x86_CPUID_7_bits::AVX2)
- features_detected |= CPUID::CPUID_AVX2_BIT;
- if(flags7 & x86_CPUID_7_bits::BMI2)
- features_detected |= CPUID::CPUID_BMI2_BIT;
- if(flags7 & x86_CPUID_7_bits::AVX512F)
- features_detected |= CPUID::CPUID_AVX512F_BIT;
- if(flags7 & x86_CPUID_7_bits::RDSEED)
- features_detected |= CPUID::CPUID_RDSEED_BIT;
- if(flags7 & x86_CPUID_7_bits::ADX)
- features_detected |= CPUID::CPUID_ADX_BIT;
- if(flags7 & x86_CPUID_7_bits::SHA)
- features_detected |= CPUID::CPUID_SHA_BIT;
- }
-
-#undef X86_CPUID
-#undef X86_CPUID_SUBLEVEL
-
- /*
- * If we don't have access to CPUID, we can still safely assume that
- * any x86-64 processor has SSE2 and RDTSC
- */
-#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
- if(features_detected == 0)
- {
- features_detected |= CPUID::CPUID_SSE2_BIT;
- features_detected |= CPUID::CPUID_RDTSC_BIT;
- }
-#endif
-
- return features_detected;
- }
-
-#endif
-
-}
-
-bool CPUID::has_simd_32()
- {
-#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
- return CPUID::has_sse2();
-#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
- return CPUID::has_altivec();
-#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
- return CPUID::has_neon();
-#else
- return true;
-#endif
- }
-
-//static
-std::string CPUID::to_string()
- {
- std::vector<std::string> flags;
-
-#define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0)
-
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- CPUID_PRINT(sse2);
- CPUID_PRINT(ssse3);
- CPUID_PRINT(sse41);
- CPUID_PRINT(sse42);
- CPUID_PRINT(avx2);
- CPUID_PRINT(avx512f);
-
- CPUID_PRINT(rdtsc);
- CPUID_PRINT(bmi2);
- CPUID_PRINT(adx);
-
- CPUID_PRINT(aes_ni);
- CPUID_PRINT(clmul);
- CPUID_PRINT(rdrand);
- CPUID_PRINT(rdseed);
- CPUID_PRINT(intel_sha);
-#endif
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- CPUID_PRINT(altivec);
-#endif
-
-#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- CPUID_PRINT(neon);
- CPUID_PRINT(arm_sha1);
- CPUID_PRINT(arm_sha2);
- CPUID_PRINT(arm_aes);
- CPUID_PRINT(arm_pmull);
-#endif
-
-#undef CPUID_PRINT
-
- return string_join(flags, ' ');
- }
-
-//static
-void CPUID::print(std::ostream& o)
- {
- o << "CPUID flags: " << CPUID::to_string() << "\n";
- }
-
-//static
-void CPUID::initialize()
- {
- g_processor_features = 0;
-
-#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \
- defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \
- defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
-
- g_processor_features = detect_cpu_features(&g_cache_line_size);
-
-#endif
-
- g_processor_features |= CPUID::CPUID_INITIALIZED_BIT;
- }
-
-//static
-CPUID::Endian_status CPUID::runtime_check_endian()
- {
- // Check runtime endian
- const uint32_t endian32 = 0x01234567;
- const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32);
-
- Endian_status endian = ENDIAN_UNKNOWN;
-
- if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67)
- {
- endian = ENDIAN_BIG;
- }
- else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01)
- {
- endian = ENDIAN_LITTLE;
- }
- else
- {
- throw Internal_Error("Unexpected endian at runtime, neither big nor little");
- }
-
- // If we were compiled with a known endian, verify it matches at runtime
-#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
- BOTAN_ASSERT(endian == ENDIAN_LITTLE, "Build and runtime endian match");
-#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
- BOTAN_ASSERT(endian == ENDIAN_BIG, "Build and runtime endian match");
-#endif
-
- return endian;
- }
-
-std::vector<Botan::CPUID::CPUID_bits>
-CPUID::bit_from_string(const std::string& tok)
- {
-#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
- if(tok == "sse2" || tok == "simd")
- return {Botan::CPUID::CPUID_SSE2_BIT};
- if(tok == "ssse3")
- return {Botan::CPUID::CPUID_SSSE3_BIT};
- if(tok == "aesni")
- return {Botan::CPUID::CPUID_AESNI_BIT};
- if(tok == "clmul")
- return {Botan::CPUID::CPUID_CLMUL_BIT};
- if(tok == "avx2")
- return {Botan::CPUID::CPUID_AVX2_BIT};
- if(tok == "sha")
- return {Botan::CPUID::CPUID_SHA_BIT};
-
-#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
- if(tok == "altivec" || tok == "simd")
- return {Botan::CPUID::CPUID_ALTIVEC_BIT};
-
-#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- if(tok == "neon" || tok == "simd")
- return {Botan::CPUID::CPUID_ARM_NEON_BIT};
- if(tok == "armv8sha1")
- return {Botan::CPUID::CPUID_ARM_SHA1_BIT};
- if(tok == "armv8sha2")
- return {Botan::CPUID::CPUID_ARM_SHA2_BIT};
- if(tok == "armv8aes")
- return {Botan::CPUID::CPUID_ARM_AES_BIT};
- if(tok == "armv8pmull")
- return {Botan::CPUID::CPUID_ARM_PMULL_BIT};
-
-#else
- BOTAN_UNUSED(tok);
-#endif
-
- return {};
- }
-
-}
diff --git a/src/lib/utils/cpuid/cpuid.cpp b/src/lib/utils/cpuid/cpuid.cpp
new file mode 100644
index 000000000..3948853ac
--- /dev/null
+++ b/src/lib/utils/cpuid/cpuid.cpp
@@ -0,0 +1,169 @@
+/*
+* Runtime CPU detection
+* (C) 2009,2010,2013,2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/cpuid.h>
+#include <botan/types.h>
+#include <botan/exceptn.h>
+#include <ostream>
+
+namespace Botan {
+
+uint64_t CPUID::g_processor_features = 0;
+size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
+CPUID::Endian_status CPUID::g_endian_status = ENDIAN_UNKNOWN;
+
+bool CPUID::has_simd_32()
+ {
+#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
+ return CPUID::has_sse2();
+#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
+ return CPUID::has_altivec();
+#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
+ return CPUID::has_neon();
+#else
+ return true;
+#endif
+ }
+
+//static
+std::string CPUID::to_string()
+ {
+ std::vector<std::string> flags;
+
+#define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0)
+
+#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+ CPUID_PRINT(sse2);
+ CPUID_PRINT(ssse3);
+ CPUID_PRINT(sse41);
+ CPUID_PRINT(sse42);
+ CPUID_PRINT(avx2);
+ CPUID_PRINT(avx512f);
+
+ CPUID_PRINT(rdtsc);
+ CPUID_PRINT(bmi2);
+ CPUID_PRINT(adx);
+
+ CPUID_PRINT(aes_ni);
+ CPUID_PRINT(clmul);
+ CPUID_PRINT(rdrand);
+ CPUID_PRINT(rdseed);
+ CPUID_PRINT(intel_sha);
+#endif
+
+#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+ CPUID_PRINT(altivec);
+#endif
+
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ CPUID_PRINT(neon);
+ CPUID_PRINT(arm_sha1);
+ CPUID_PRINT(arm_sha2);
+ CPUID_PRINT(arm_aes);
+ CPUID_PRINT(arm_pmull);
+#endif
+
+#undef CPUID_PRINT
+
+ return string_join(flags, ' ');
+ }
+
+//static
+void CPUID::print(std::ostream& o)
+ {
+ o << "CPUID flags: " << CPUID::to_string() << "\n";
+ }
+
+//static
+void CPUID::initialize()
+ {
+ g_processor_features = 0;
+
+#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \
+ defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \
+ defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+ g_processor_features = CPUID::detect_cpu_features(&g_cache_line_size);
+
+#endif
+
+ g_processor_features |= CPUID::CPUID_INITIALIZED_BIT;
+ }
+
+//static
+CPUID::Endian_status CPUID::runtime_check_endian()
+ {
+ // Check runtime endian
+ const uint32_t endian32 = 0x01234567;
+ const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32);
+
+ Endian_status endian = ENDIAN_UNKNOWN;
+
+ if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67)
+ {
+ endian = ENDIAN_BIG;
+ }
+ else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01)
+ {
+ endian = ENDIAN_LITTLE;
+ }
+ else
+ {
+ throw Internal_Error("Unexpected endian at runtime, neither big nor little");
+ }
+
+ // If we were compiled with a known endian, verify it matches at runtime
+#if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
+ BOTAN_ASSERT(endian == ENDIAN_LITTLE, "Build and runtime endian match");
+#elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
+ BOTAN_ASSERT(endian == ENDIAN_BIG, "Build and runtime endian match");
+#endif
+
+ return endian;
+ }
+
+std::vector<Botan::CPUID::CPUID_bits>
+CPUID::bit_from_string(const std::string& tok)
+ {
+#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+ if(tok == "sse2" || tok == "simd")
+ return {Botan::CPUID::CPUID_SSE2_BIT};
+ if(tok == "ssse3")
+ return {Botan::CPUID::CPUID_SSSE3_BIT};
+ if(tok == "aesni")
+ return {Botan::CPUID::CPUID_AESNI_BIT};
+ if(tok == "clmul")
+ return {Botan::CPUID::CPUID_CLMUL_BIT};
+ if(tok == "avx2")
+ return {Botan::CPUID::CPUID_AVX2_BIT};
+ if(tok == "sha")
+ return {Botan::CPUID::CPUID_SHA_BIT};
+
+#elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+ if(tok == "altivec" || tok == "simd")
+ return {Botan::CPUID::CPUID_ALTIVEC_BIT};
+
+#elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ if(tok == "neon" || tok == "simd")
+ return {Botan::CPUID::CPUID_ARM_NEON_BIT};
+ if(tok == "armv8sha1")
+ return {Botan::CPUID::CPUID_ARM_SHA1_BIT};
+ if(tok == "armv8sha2")
+ return {Botan::CPUID::CPUID_ARM_SHA2_BIT};
+ if(tok == "armv8aes")
+ return {Botan::CPUID::CPUID_ARM_AES_BIT};
+ if(tok == "armv8pmull")
+ return {Botan::CPUID::CPUID_ARM_PMULL_BIT};
+
+#else
+ BOTAN_UNUSED(tok);
+#endif
+
+ return {};
+ }
+
+}
diff --git a/src/lib/utils/cpuid.h b/src/lib/utils/cpuid/cpuid.h
index 119395c0d..b32d040ea 100644
--- a/src/lib/utils/cpuid.h
+++ b/src/lib/utils/cpuid/cpuid.h
@@ -281,6 +281,14 @@ class BOTAN_DLL CPUID
ENDIAN_LITTLE = 0x67452301,
};
+#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \
+ defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \
+ defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+ static uint64_t detect_cpu_features(size_t* cache_line_size);
+
+#endif
+
static Endian_status runtime_check_endian();
static Endian_status endian_status()
diff --git a/src/lib/utils/cpuid/cpuid_arm.cpp b/src/lib/utils/cpuid/cpuid_arm.cpp
new file mode 100644
index 000000000..39b6db652
--- /dev/null
+++ b/src/lib/utils/cpuid/cpuid_arm.cpp
@@ -0,0 +1,214 @@
+/*
+* Runtime CPU detection for ARM
+* (C) 2009,2010,2013,2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/cpuid.h>
+
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+
+#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
+ #include <sys/auxv.h>
+
+#elif defined(BOTAN_TARGET_OS_IS_IOS)
+ #include <sys/types.h>
+ #include <sys/sysctl.h>
+
+#else
+ #include <botan/internal/os_utils.h>
+
+#endif
+
+#endif
+
+namespace Botan {
+
+#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+
+#if defined(BOTAN_TARGET_OS_IS_IOS)
+
+namespace {
+
+uint64_t flags_by_ios_machine_type(const std::string& machine)
+ {
+ /*
+ * This relies on a map of known machine names to features. This
+ * will quickly grow out of date as new products are introduced, but
+ * is apparently the best we can do for iOS.
+ */
+
+ struct version_info {
+ std::string name;
+ size_t min_version_neon;
+ size_t min_version_armv8;
+ };
+
+ static const version_info min_versions[] = {
+ { "iPhone", 2, 6 },
+ { "iPad", 1, 4 },
+ { "iPod", 4, 7 },
+ { "AppleTV", 2, 5 },
+ };
+
+ if(machine.size() < 3)
+ return 0;
+
+ auto comma = machine.find(',');
+
+ // Simulator, or something we don't know about
+ if(comma == std::string::npos)
+ return 0;
+
+ std::string product = machine.substr(0, comma);
+
+ size_t version = 0;
+ size_t place = 1;
+ while(product.size() > 1 && ::isdigit(product.back()))
+ {
+ const size_t digit = product.back() - '0';
+ version += digit * place;
+ place *= 10;
+ product.pop_back();
+ }
+
+ if(version == 0)
+ return 0;
+
+ for(const version_info& info : min_versions)
+ {
+ if(info.name != product)
+ continue;
+
+ if(version >= info.min_version_armv8)
+ {
+ return CPUID::CPUID_ARM_AES_BIT |
+ CPUID::CPUID_ARM_PMULL_BIT |
+ CPUID::CPUID_ARM_SHA1_BIT |
+ CPUID::CPUID_ARM_SHA2_BIT |
+ CPUID::CPUID_ARM_NEON_BIT;
+ }
+
+ if(version >= info.min_version_neon)
+ return CPUID::CPUID_ARM_NEON_BIT;
+ }
+
+ // Some other product we don't know about
+ return 0;
+ }
+
+}
+
+#endif
+
+uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
+ {
+ uint64_t detected_features = 0;
+
+#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
+ /*
+ * On systems with getauxval these bits should normally be defined
+ * in bits/auxv.h but some buggy? glibc installs seem to miss them.
+ * These following values are all fixed, for the Linux ELF format,
+ * so we just hardcode them in ARM_hwcap_bit enum.
+ */
+
+ enum ARM_hwcap_bit {
+#if defined(BOTAN_TARGET_ARCH_IS_ARM32)
+ NEON_bit = (1 << 12),
+ AES_bit = (1 << 0),
+ PMULL_bit = (1 << 1),
+ SHA1_bit = (1 << 2),
+ SHA2_bit = (1 << 3),
+
+ ARCH_hwcap_neon = 16, // AT_HWCAP
+ ARCH_hwcap_crypto = 26, // AT_HWCAP2
+#elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
+ NEON_bit = (1 << 1),
+ AES_bit = (1 << 3),
+ PMULL_bit = (1 << 4),
+ SHA1_bit = (1 << 5),
+ SHA2_bit = (1 << 6),
+
+ ARCH_hwcap_neon = 16, // AT_HWCAP
+ ARCH_hwcap_crypto = 16, // AT_HWCAP
+#endif
+ };
+
+#if defined(AT_DCACHEBSIZE)
+ const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
+
+ // plausibility check
+ if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
+ *cache_line_size = static_cast<size_t>(dcache_line);
+#endif
+
+ const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
+ if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
+ detected_features |= CPUID::CPUID_ARM_NEON_BIT;
+
+ /*
+ On aarch64 this ends up calling getauxval twice with AT_HWCAP
+ It doesn't seem worth optimizing this out, since getauxval is
+ just reading a field in the ELF header.
+ */
+ const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
+ if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
+ detected_features |= CPUID::CPUID_ARM_AES_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
+ detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
+
+#elif defined(BOTAN_TARGET_OS_IS_IOS)
+
+ char machine[64] = { 0 };
+ size_t size = sizeof(machine) - 1;
+ ::sysctlbyname("hw.machine", machine, &size, nullptr, 0);
+
+ detected_features = flags_by_ios_machine_type(machine);
+
+#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
+
+ /*
+ No getauxval API available, fall back on probe functions. We only
+ bother with Aarch64 here to simplify the code and because going to
+ extreme contortions to support detect NEON on devices that probably
+ don't support it doesn't seem worthwhile.
+
+ NEON registers v0-v7 are caller saved in Aarch64
+ */
+
+ auto neon_probe = []() -> int { asm("and v0.16b, v0.16b, v0.16b"); return 1; };
+ auto aes_probe = []() -> int { asm(".word 0x4e284800"); return 1; };
+ auto pmull_probe = []() -> int { asm(".word 0x0ee0e000"); return 1; };
+ auto sha1_probe = []() -> int { asm(".word 0x5e280800"); return 1; };
+ auto sha2_probe = []() -> int { asm(".word 0x5e282800"); return 1; };
+
+ // Only bother running the crypto detection if we found NEON
+
+ if(OS::run_cpu_instruction_probe(neon_probe) == 1)
+ {
+ detected_features |= CPUID::CPUID_ARM_NEON_BIT;
+
+ if(OS::run_cpu_instruction_probe(aes_probe) == 1)
+ detected_features |= CPUID::CPUID_ARM_AES_BIT;
+ if(OS::run_cpu_instruction_probe(pmull_probe) == 1)
+ detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
+ if(OS::run_cpu_instruction_probe(sha1_probe) == 1)
+ detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
+ if(OS::run_cpu_instruction_probe(sha2_probe) == 1)
+ detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
+ }
+
+#endif
+
+ return detected_features;
+ }
+
+#endif
+
+}
diff --git a/src/lib/utils/cpuid/cpuid_ppc.cpp b/src/lib/utils/cpuid/cpuid_ppc.cpp
new file mode 100644
index 000000000..647672a71
--- /dev/null
+++ b/src/lib/utils/cpuid/cpuid_ppc.cpp
@@ -0,0 +1,101 @@
+/*
+* Runtime CPU detection for POWER/PowerPC
+* (C) 2009,2010,2013,2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/cpuid.h>
+#include <botan/internal/os_utils.h>
+
+#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+
+/*
+* On Darwin and OpenBSD ppc, use sysctl to detect AltiVec
+*/
+#if defined(BOTAN_TARGET_OS_IS_DARWIN)
+ #include <sys/sysctl.h>
+#elif defined(BOTAN_TARGET_OS_IS_OPENBSD)
+ #include <sys/param.h>
+ #include <sys/sysctl.h>
+ #include <machine/cpu.h>
+#endif
+
+#endif
+
+namespace Botan {
+
+#if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
+
+/*
+* PowerPC specific block: check for AltiVec using either
+* sysctl or by reading processor version number register.
+*/
+uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
+ {
+#if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD)
+ // On Darwin/OS X and OpenBSD, use sysctl
+
+ int sels[2] = {
+#if defined(BOTAN_TARGET_OS_IS_OPENBSD)
+ CTL_MACHDEP, CPU_ALTIVEC
+#else
+ CTL_HW, HW_VECTORUNIT
+#endif
+ };
+
+ int vector_type = 0;
+ size_t length = sizeof(vector_type);
+ int error = ::sysctl(sels, 2, &vector_type, &length, NULL, 0);
+
+ if(error == 0 && vector_type > 0)
+ return CPUID::CPUID_ALTIVEC_BIT;
+
+#else
+
+ /*
+ On PowerPC, MSR 287 is PVR, the Processor Version Number
+ Normally it is only accessible to ring 0, but Linux and NetBSD
+ (others, too, maybe?) will trap and emulate it for us.
+ */
+
+ int pvr = OS::run_cpu_instruction_probe([]() -> int {
+ uint32_t pvr = 0;
+ asm volatile("mfspr %0, 287" : "=r" (pvr));
+ // Top 16 bits suffice to identify the model
+ return static_cast<int>(pvr >> 16);
+ });
+
+ if(pvr > 0)
+ {
+ const uint16_t ALTIVEC_PVR[] = {
+ 0x003E, // IBM POWER6,
+ 0x003F, // IBM POWER7,
+ 0x004B, // IBM POWER8,
+ 0x000C, // G4-7400
+ 0x0039, // G5 970
+ 0x003C, // G5 970FX
+ 0x0044, // G5 970MP
+ 0x0070, // Cell PPU
+ 0, // end
+ };
+
+ for(size_t i = 0; ALTIVEC_PVR[i]; ++i)
+ {
+ if(pvr == ALTIVEC_PVR[i])
+ return CPUID::CPUID_ALTIVEC_BIT;
+ }
+
+ return 0;
+ }
+
+ // TODO try direct instruction probing
+
+#endif
+
+ return 0;
+ }
+
+#endif
+
+}
diff --git a/src/lib/utils/cpuid/cpuid_x86.cpp b/src/lib/utils/cpuid/cpuid_x86.cpp
new file mode 100644
index 000000000..be6c75a55
--- /dev/null
+++ b/src/lib/utils/cpuid/cpuid_x86.cpp
@@ -0,0 +1,167 @@
+/*
+* Runtime CPU detection for x86
+* (C) 2009,2010,2013,2017 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/cpuid.h>
+#include <botan/mem_ops.h>
+#include <botan/loadstor.h>
+
+#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+ #include <intrin.h>
+#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
+ #include <ia32intrin.h>
+#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
+ #include <cpuid.h>
+#endif
+
+#endif
+
+namespace Botan {
+
+#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
+ {
+#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+ #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
+
+#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
+ #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
+ #define X86_CPUID(type, out) \
+ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
+ : "0" (type))
+
+ #define X86_CPUID_SUBLEVEL(type, level, out) \
+ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
+ : "0" (type), "2" (level))
+
+#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
+ #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
+
+ #define X86_CPUID_SUBLEVEL(type, level, out) \
+ do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
+#else
+ #warning "No way of calling x86 cpuid instruction for this compiler"
+ #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
+ #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
+#endif
+
+ uint64_t features_detected = 0;
+ uint32_t cpuid[4] = { 0 };
+
+ // CPUID 0: vendor identification, max sublevel
+ X86_CPUID(0, cpuid);
+
+ const uint32_t max_supported_sublevel = cpuid[0];
+
+ const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
+ const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
+ const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
+ const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
+
+ if(max_supported_sublevel >= 1)
+ {
+ // CPUID 1: feature bits
+ X86_CPUID(1, cpuid);
+ const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
+
+ enum x86_CPUID_1_bits : uint64_t {
+ RDTSC = (1ULL << 4),
+ SSE2 = (1ULL << 26),
+ CLMUL = (1ULL << 33),
+ SSSE3 = (1ULL << 41),
+ SSE41 = (1ULL << 51),
+ SSE42 = (1ULL << 52),
+ AESNI = (1ULL << 57),
+ RDRAND = (1ULL << 62)
+ };
+
+ if(flags0 & x86_CPUID_1_bits::RDTSC)
+ features_detected |= CPUID::CPUID_RDTSC_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE2)
+ features_detected |= CPUID::CPUID_SSE2_BIT;
+ if(flags0 & x86_CPUID_1_bits::CLMUL)
+ features_detected |= CPUID::CPUID_CLMUL_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSSE3)
+ features_detected |= CPUID::CPUID_SSSE3_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE41)
+ features_detected |= CPUID::CPUID_SSE41_BIT;
+ if(flags0 & x86_CPUID_1_bits::SSE42)
+ features_detected |= CPUID::CPUID_SSE42_BIT;
+ if(flags0 & x86_CPUID_1_bits::AESNI)
+ features_detected |= CPUID::CPUID_AESNI_BIT;
+ if(flags0 & x86_CPUID_1_bits::RDRAND)
+ features_detected |= CPUID::CPUID_RDRAND_BIT;
+ }
+
+ if(is_intel)
+ {
+ // Intel cache line size is in cpuid(1) output
+ *cache_line_size = 8 * get_byte(2, cpuid[1]);
+ }
+ else if(is_amd)
+ {
+ // AMD puts it in vendor zone
+ X86_CPUID(0x80000005, cpuid);
+ *cache_line_size = get_byte(3, cpuid[2]);
+ }
+
+ if(max_supported_sublevel >= 7)
+ {
+ clear_mem(cpuid, 4);
+ X86_CPUID_SUBLEVEL(7, 0, cpuid);
+
+ enum x86_CPUID_7_bits : uint64_t {
+ AVX2 = (1ULL << 5),
+ BMI2 = (1ULL << 8),
+ AVX512F = (1ULL << 16),
+ RDSEED = (1ULL << 18),
+ ADX = (1ULL << 19),
+ SHA = (1ULL << 29),
+ };
+ uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
+
+ if(flags7 & x86_CPUID_7_bits::AVX2)
+ features_detected |= CPUID::CPUID_AVX2_BIT;
+ if(flags7 & x86_CPUID_7_bits::BMI2)
+ features_detected |= CPUID::CPUID_BMI2_BIT;
+ if(flags7 & x86_CPUID_7_bits::AVX512F)
+ features_detected |= CPUID::CPUID_AVX512F_BIT;
+ if(flags7 & x86_CPUID_7_bits::RDSEED)
+ features_detected |= CPUID::CPUID_RDSEED_BIT;
+ if(flags7 & x86_CPUID_7_bits::ADX)
+ features_detected |= CPUID::CPUID_ADX_BIT;
+ if(flags7 & x86_CPUID_7_bits::SHA)
+ features_detected |= CPUID::CPUID_SHA_BIT;
+ }
+
+#undef X86_CPUID
+#undef X86_CPUID_SUBLEVEL
+
+ /*
+ * If we don't have access to CPUID, we can still safely assume that
+ * any x86-64 processor has SSE2 and RDTSC
+ */
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
+ if(features_detected == 0)
+ {
+ features_detected |= CPUID::CPUID_SSE2_BIT;
+ features_detected |= CPUID::CPUID_RDTSC_BIT;
+ }
+#endif
+
+ return features_detected;
+ }
+
+#endif
+
+}
diff --git a/src/lib/utils/cpuid/info.txt b/src/lib/utils/cpuid/info.txt
new file mode 100644
index 000000000..987d7eae4
--- /dev/null
+++ b/src/lib/utils/cpuid/info.txt
@@ -0,0 +1,7 @@
+<defines>
+CPUID -> 20170917
+</defines>
+
+<header:public>
+cpuid.h
+</header:public>
diff --git a/src/lib/utils/info.txt b/src/lib/utils/info.txt
index 8bbe2925b..644bff901 100644
--- a/src/lib/utils/info.txt
+++ b/src/lib/utils/info.txt
@@ -9,7 +9,6 @@ assert.h
bswap.h
calendar.h
charset.h
-cpuid.h
compiler.h
data_src.h
database.h
@@ -45,3 +44,7 @@ linux -> rt
mingw -> ws2_32
windows -> ws2_32.lib
</libs>
+
+<requires>
+cpuid
+</requires>