diff options
Diffstat (limited to 'src/lib/utils/cpuid/cpuid_arm.cpp')
-rw-r--r-- | src/lib/utils/cpuid/cpuid_arm.cpp | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/src/lib/utils/cpuid/cpuid_arm.cpp b/src/lib/utils/cpuid/cpuid_arm.cpp new file mode 100644 index 000000000..39b6db652 --- /dev/null +++ b/src/lib/utils/cpuid/cpuid_arm.cpp @@ -0,0 +1,214 @@ +/* +* Runtime CPU detection for ARM +* (C) 2009,2010,2013,2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/cpuid.h> + +#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + +#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) + #include <sys/auxv.h> + +#elif defined(BOTAN_TARGET_OS_IS_IOS) + #include <sys/types.h> + #include <sys/sysctl.h> + +#else + #include <botan/internal/os_utils.h> + +#endif + +#endif + +namespace Botan { + +#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + +#if defined(BOTAN_TARGET_OS_IS_IOS) + +namespace { + +uint64_t flags_by_ios_machine_type(const std::string& machine) + { + /* + * This relies on a map of known machine names to features. This + * will quickly grow out of date as new products are introduced, but + * is apparently the best we can do for iOS. + */ + + struct version_info { + std::string name; + size_t min_version_neon; + size_t min_version_armv8; + }; + + static const version_info min_versions[] = { + { "iPhone", 2, 6 }, + { "iPad", 1, 4 }, + { "iPod", 4, 7 }, + { "AppleTV", 2, 5 }, + }; + + if(machine.size() < 3) + return 0; + + auto comma = machine.find(','); + + // Simulator, or something we don't know about + if(comma == std::string::npos) + return 0; + + std::string product = machine.substr(0, comma); + + size_t version = 0; + size_t place = 1; + while(product.size() > 1 && ::isdigit(product.back())) + { + const size_t digit = product.back() - '0'; + version += digit * place; + place *= 10; + product.pop_back(); + } + + if(version == 0) + return 0; + + for(const version_info& info : min_versions) + { + if(info.name != product) + continue; + + if(version >= info.min_version_armv8) + { + return CPUID::CPUID_ARM_AES_BIT | + CPUID::CPUID_ARM_PMULL_BIT | + CPUID::CPUID_ARM_SHA1_BIT | + CPUID::CPUID_ARM_SHA2_BIT | + CPUID::CPUID_ARM_NEON_BIT; + } + + if(version >= info.min_version_neon) + return CPUID::CPUID_ARM_NEON_BIT; + } + + // Some other product we don't know about + return 0; + } + +} + +#endif + +uint64_t CPUID::detect_cpu_features(size_t* cache_line_size) + { + uint64_t detected_features = 0; + +#if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) + /* + * On systems with getauxval these bits should normally be defined + * in bits/auxv.h but some buggy? glibc installs seem to miss them. + * These following values are all fixed, for the Linux ELF format, + * so we just hardcode them in ARM_hwcap_bit enum. + */ + + enum ARM_hwcap_bit { +#if defined(BOTAN_TARGET_ARCH_IS_ARM32) + NEON_bit = (1 << 12), + AES_bit = (1 << 0), + PMULL_bit = (1 << 1), + SHA1_bit = (1 << 2), + SHA2_bit = (1 << 3), + + ARCH_hwcap_neon = 16, // AT_HWCAP + ARCH_hwcap_crypto = 26, // AT_HWCAP2 +#elif defined(BOTAN_TARGET_ARCH_IS_ARM64) + NEON_bit = (1 << 1), + AES_bit = (1 << 3), + PMULL_bit = (1 << 4), + SHA1_bit = (1 << 5), + SHA2_bit = (1 << 6), + + ARCH_hwcap_neon = 16, // AT_HWCAP + ARCH_hwcap_crypto = 16, // AT_HWCAP +#endif + }; + +#if defined(AT_DCACHEBSIZE) + const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); + + // plausibility check + if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) + *cache_line_size = static_cast<size_t>(dcache_line); +#endif + + const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon); + if(hwcap_neon & ARM_hwcap_bit::NEON_bit) + detected_features |= CPUID::CPUID_ARM_NEON_BIT; + + /* + On aarch64 this ends up calling getauxval twice with AT_HWCAP + It doesn't seem worth optimizing this out, since getauxval is + just reading a field in the ELF header. + */ + const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto); + if(hwcap_crypto & ARM_hwcap_bit::AES_bit) + detected_features |= CPUID::CPUID_ARM_AES_BIT; + if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) + detected_features |= CPUID::CPUID_ARM_PMULL_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) + detected_features |= CPUID::CPUID_ARM_SHA1_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) + detected_features |= CPUID::CPUID_ARM_SHA2_BIT; + +#elif defined(BOTAN_TARGET_OS_IS_IOS) + + char machine[64] = { 0 }; + size_t size = sizeof(machine) - 1; + ::sysctlbyname("hw.machine", machine, &size, nullptr, 0); + + detected_features = flags_by_ios_machine_type(machine); + +#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM64) + + /* + No getauxval API available, fall back on probe functions. We only + bother with Aarch64 here to simplify the code and because going to + extreme contortions to support detect NEON on devices that probably + don't support it doesn't seem worthwhile. + + NEON registers v0-v7 are caller saved in Aarch64 + */ + + auto neon_probe = []() -> int { asm("and v0.16b, v0.16b, v0.16b"); return 1; }; + auto aes_probe = []() -> int { asm(".word 0x4e284800"); return 1; }; + auto pmull_probe = []() -> int { asm(".word 0x0ee0e000"); return 1; }; + auto sha1_probe = []() -> int { asm(".word 0x5e280800"); return 1; }; + auto sha2_probe = []() -> int { asm(".word 0x5e282800"); return 1; }; + + // Only bother running the crypto detection if we found NEON + + if(OS::run_cpu_instruction_probe(neon_probe) == 1) + { + detected_features |= CPUID::CPUID_ARM_NEON_BIT; + + if(OS::run_cpu_instruction_probe(aes_probe) == 1) + detected_features |= CPUID::CPUID_ARM_AES_BIT; + if(OS::run_cpu_instruction_probe(pmull_probe) == 1) + detected_features |= CPUID::CPUID_ARM_PMULL_BIT; + if(OS::run_cpu_instruction_probe(sha1_probe) == 1) + detected_features |= CPUID::CPUID_ARM_SHA1_BIT; + if(OS::run_cpu_instruction_probe(sha2_probe) == 1) + detected_features |= CPUID::CPUID_ARM_SHA2_BIT; + } + +#endif + + return detected_features; + } + +#endif + +} |