/* * Runtime CPU detection for ARM * (C) 2009,2010,2013,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ #include #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) #if defined(BOTAN_TARGET_OS_IS_IOS) #include #include #else #include #endif #endif namespace Botan { #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) #if defined(BOTAN_TARGET_OS_IS_IOS) namespace { uint64_t flags_by_ios_machine_type(const std::string& machine) { /* * This relies on a map of known machine names to features. This * will quickly grow out of date as new products are introduced, but * is apparently the best we can do for iOS. */ struct version_info { std::string name; size_t min_version_neon; size_t min_version_armv8; }; static const version_info min_versions[] = { { "iPhone", 2, 6 }, { "iPad", 1, 4 }, { "iPod", 4, 7 }, { "AppleTV", 2, 5 }, }; if(machine.size() < 3) return 0; auto comma = machine.find(','); // Simulator, or something we don't know about if(comma == std::string::npos) return 0; std::string product = machine.substr(0, comma); size_t version = 0; size_t place = 1; while(product.size() > 1 && ::isdigit(product.back())) { const size_t digit = product.back() - '0'; version += digit * place; place *= 10; product.pop_back(); } if(version == 0) return 0; for(const version_info& info : min_versions) { if(info.name != product) continue; if(version >= info.min_version_armv8) { return CPUID::CPUID_ARM_AES_BIT | CPUID::CPUID_ARM_PMULL_BIT | CPUID::CPUID_ARM_SHA1_BIT | CPUID::CPUID_ARM_SHA2_BIT | CPUID::CPUID_ARM_NEON_BIT; } if(version >= info.min_version_neon) return CPUID::CPUID_ARM_NEON_BIT; } // Some other product we don't know about return 0; } } #endif uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size) { uint64_t detected_features = 0; #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) || defined(BOTAN_TARGET_OS_HAS_ELF_AUX_INFO) /* * On systems with getauxval these bits should normally be defined * in bits/auxv.h but some buggy? glibc installs seem to miss them. * These following values are all fixed, for the Linux ELF format, * so we just hardcode them in ARM_hwcap_bit enum. */ enum ARM_hwcap_bit { #if defined(BOTAN_TARGET_ARCH_IS_ARM32) NEON_bit = (1 << 12), AES_bit = (1 << 0), PMULL_bit = (1 << 1), SHA1_bit = (1 << 2), SHA2_bit = (1 << 3), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 26, // AT_HWCAP2 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64) NEON_bit = (1 << 1), AES_bit = (1 << 3), PMULL_bit = (1 << 4), SHA1_bit = (1 << 5), SHA2_bit = (1 << 6), SHA3_bit = (1 << 17), SM3_bit = (1 << 18), SM4_bit = (1 << 19), SHA2_512_bit = (1 << 21), SVE_bit = (1 << 22), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 16, // AT_HWCAP #endif }; #if defined(AT_DCACHEBSIZE) // Exists only on Linux const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); // plausibility check if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) *cache_line_size = static_cast(dcache_line); #endif const unsigned long hwcap_neon = OS::get_auxval(ARM_hwcap_bit::ARCH_hwcap_neon); if(hwcap_neon & ARM_hwcap_bit::NEON_bit) detected_features |= CPUID::CPUID_ARM_NEON_BIT; /* On aarch64 this ends up calling getauxval twice with AT_HWCAP It doesn't seem worth optimizing this out, since getauxval is just reading a field in the ELF header. */ const unsigned long hwcap_crypto = OS::get_auxval(ARM_hwcap_bit::ARCH_hwcap_crypto); if(hwcap_crypto & ARM_hwcap_bit::AES_bit) detected_features |= CPUID::CPUID_ARM_AES_BIT; if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) detected_features |= CPUID::CPUID_ARM_PMULL_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) detected_features |= CPUID::CPUID_ARM_SHA1_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) detected_features |= CPUID::CPUID_ARM_SHA2_BIT; #if defined(BOTAN_TARGET_ARCH_IS_ARM64) if(hwcap_crypto & ARM_hwcap_bit::SHA3_bit) detected_features |= CPUID::CPUID_ARM_SHA3_BIT; if(hwcap_crypto & ARM_hwcap_bit::SM3_bit) detected_features |= CPUID::CPUID_ARM_SM3_BIT; if(hwcap_crypto & ARM_hwcap_bit::SM4_bit) detected_features |= CPUID::CPUID_ARM_SM4_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA2_512_bit) detected_features |= CPUID::CPUID_ARM_SHA2_512_BIT; if(hwcap_crypto & ARM_hwcap_bit::SVE_bit) detected_features |= CPUID::CPUID_ARM_SVE_BIT; #endif #elif defined(BOTAN_TARGET_OS_IS_IOS) char machine[64] = { 0 }; size_t size = sizeof(machine) - 1; ::sysctlbyname("hw.machine", machine, &size, nullptr, 0); detected_features = flags_by_ios_machine_type(machine); #elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM64) /* No getauxval API available, fall back on probe functions. We only bother with Aarch64 here to simplify the code and because going to extreme contortions to support detect NEON on devices that probably don't support it doesn't seem worthwhile. NEON registers v0-v7 are caller saved in Aarch64 */ auto neon_probe = []() -> int { asm("and v0.16b, v0.16b, v0.16b"); return 1; }; auto aes_probe = []() -> int { asm(".word 0x4e284800"); return 1; }; auto pmull_probe = []() -> int { asm(".word 0x0ee0e000"); return 1; }; auto sha1_probe = []() -> int { asm(".word 0x5e280800"); return 1; }; auto sha2_probe = []() -> int { asm(".word 0x5e282800"); return 1; }; // Only bother running the crypto detection if we found NEON if(OS::run_cpu_instruction_probe(neon_probe) == 1) { detected_features |= CPUID::CPUID_ARM_NEON_BIT; if(OS::run_cpu_instruction_probe(aes_probe) == 1) detected_features |= CPUID::CPUID_ARM_AES_BIT; if(OS::run_cpu_instruction_probe(pmull_probe) == 1) detected_features |= CPUID::CPUID_ARM_PMULL_BIT; if(OS::run_cpu_instruction_probe(sha1_probe) == 1) detected_features |= CPUID::CPUID_ARM_SHA1_BIT; if(OS::run_cpu_instruction_probe(sha2_probe) == 1) detected_features |= CPUID::CPUID_ARM_SHA2_BIT; } #endif return detected_features; } #endif }