/* * Runtime CPU detection * (C) 2009,2010,2013,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ #include #include #include #include #include #include #include #include #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) /* * On Darwin and OpenBSD ppc, use sysctl to detect AltiVec */ #if defined(BOTAN_TARGET_OS_IS_DARWIN) #include #elif defined(BOTAN_TARGET_OS_IS_OPENBSD) #include #include #include #endif #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) /* * On ARM, use getauxval if available, otherwise fall back to * running probe functions with a SIGILL handler. */ #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) #include #endif #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) /* * On x86, use CPUID instruction */ #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) #include #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) #include #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) #include #endif #endif namespace Botan { uint64_t CPUID::g_processor_features = 0; size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; CPUID::Endian_status CPUID::g_endian_status = ENDIAN_UNKNOWN; namespace { #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) /* * PowerPC specific block: check for AltiVec using either * sysctl or by reading processor version number register. */ uint64_t detect_cpu_features(size_t* cache_line_size) { #if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD) // On Darwin/OS X and OpenBSD, use sysctl int sels[2] = { #if defined(BOTAN_TARGET_OS_IS_OPENBSD) CTL_MACHDEP, CPU_ALTIVEC #else CTL_HW, HW_VECTORUNIT #endif }; int vector_type = 0; size_t length = sizeof(vector_type); int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); if(error == 0 && vector_type > 0) return CPUID::CPUID_ALTIVEC_BIT; #else /* On PowerPC, MSR 287 is PVR, the Processor Version Number Normally it is only accessible to ring 0, but Linux and NetBSD (others, too, maybe?) will trap and emulate it for us. */ int pvr = OS::run_cpu_instruction_probe([]() -> int { uint32_t pvr = 0; asm volatile("mfspr %0, 287" : "=r" (pvr)); // Top 16 bits suffice to identify the model return static_cast(pvr >> 16); }); if(pvr > 0) { const uint16_t ALTIVEC_PVR[] = { 0x003E, // IBM POWER6, 0x003F, // IBM POWER7, 0x004B, // IBM POWER8, 0x000C, // G4-7400 0x0039, // G5 970 0x003C, // G5 970FX 0x0044, // G5 970MP 0x0070, // Cell PPU 0, // end }; for(size_t i = 0; ALTIVEC_PVR[i]; ++i) { if(pvr == ALTIVEC_PVR[i]) return CPUID::CPUID_ALTIVEC_BIT; } return 0; } // TODO try direct instruction probing #endif return 0; } #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) uint64_t detect_cpu_features(size_t* cache_line_size) { uint64_t detected_features = 0; #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) /* * On systems with getauxval these bits should normally be defined * in bits/auxv.h but some buggy? glibc installs seem to miss them. * These following values are all fixed, for the Linux ELF format, * so we just hardcode them in ARM_hwcap_bit enum. */ enum ARM_hwcap_bit { #if defined(BOTAN_TARGET_ARCH_IS_ARM32) NEON_bit = (1 << 12), AES_bit = (1 << 0), PMULL_bit = (1 << 1), SHA1_bit = (1 << 2), SHA2_bit = (1 << 3), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 26, // AT_HWCAP2 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64) NEON_bit = (1 << 1), AES_bit = (1 << 3), PMULL_bit = (1 << 4), SHA1_bit = (1 << 5), SHA2_bit = (1 << 6), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 16, // AT_HWCAP #endif }; const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon); if(hwcap_neon & ARM_hwcap_bit::NEON_bit) detected_features |= CPUID::CPUID_ARM_NEON_BIT; /* On aarch64 this ends up calling getauxval twice with AT_HWCAP It doesn't seem worth optimizing this out, since getauxval is just reading a field in the ELF header. */ const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto); if(hwcap_crypto & ARM_hwcap_bit::AES_bit) detected_features |= CPUID::CPUID_ARM_AES_BIT; if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) detected_features |= CPUID::CPUID_ARM_PMULL_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) detected_features |= CPUID::CPUID_ARM_SHA1_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) detected_features |= CPUID::CPUID_ARM_SHA2_BIT; #if defined(AT_DCACHEBSIZE) const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); // plausibility check if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) *cache_line_size = static_cast(dcache_line); #endif #else // No getauxval API available, fall back on probe functions // TODO: probe functions #endif return detected_features; } #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) uint64_t detect_cpu_features(size_t* cache_line_size) { #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM) #define X86_CPUID(type, out) \ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ : "0" (type)) #define X86_CPUID_SUBLEVEL(type, level, out) \ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ : "0" (type), "2" (level)) #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) \ do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) #else #warning "No way of calling x86 cpuid instruction for this compiler" #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) #endif uint64_t features_detected = 0; uint32_t cpuid[4] = { 0 }; // CPUID 0: vendor identification, max sublevel X86_CPUID(0, cpuid); const uint32_t max_supported_sublevel = cpuid[0]; const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); if(max_supported_sublevel >= 1) { // CPUID 1: feature bits X86_CPUID(1, cpuid); const uint64_t flags0 = (static_cast(cpuid[2]) << 32) | cpuid[3]; enum x86_CPUID_1_bits : uint64_t { RDTSC = (1ULL << 4), SSE2 = (1ULL << 26), CLMUL = (1ULL << 33), SSSE3 = (1ULL << 41), SSE41 = (1ULL << 51), SSE42 = (1ULL << 52), AESNI = (1ULL << 57), RDRAND = (1ULL << 62) }; if(flags0 & x86_CPUID_1_bits::RDTSC) features_detected |= CPUID::CPUID_RDTSC_BIT; if(flags0 & x86_CPUID_1_bits::SSE2) features_detected |= CPUID::CPUID_SSE2_BIT; if(flags0 & x86_CPUID_1_bits::CLMUL) features_detected |= CPUID::CPUID_CLMUL_BIT; if(flags0 & x86_CPUID_1_bits::SSSE3) features_detected |= CPUID::CPUID_SSSE3_BIT; if(flags0 & x86_CPUID_1_bits::SSE41) features_detected |= CPUID::CPUID_SSE41_BIT; if(flags0 & x86_CPUID_1_bits::SSE42) features_detected |= CPUID::CPUID_SSE42_BIT; if(flags0 & x86_CPUID_1_bits::AESNI) features_detected |= CPUID::CPUID_AESNI_BIT; if(flags0 & x86_CPUID_1_bits::RDRAND) features_detected |= CPUID::CPUID_RDRAND_BIT; } if(is_intel) { // Intel cache line size is in cpuid(1) output *cache_line_size = 8 * get_byte(2, cpuid[1]); } else if(is_amd) { // AMD puts it in vendor zone X86_CPUID(0x80000005, cpuid); *cache_line_size = get_byte(3, cpuid[2]); } if(max_supported_sublevel >= 7) { clear_mem(cpuid, 4); X86_CPUID_SUBLEVEL(7, 0, cpuid); enum x86_CPUID_7_bits : uint64_t { AVX2 = (1ULL << 5), BMI2 = (1ULL << 8), AVX512F = (1ULL << 16), RDSEED = (1ULL << 18), ADX = (1ULL << 19), SHA = (1ULL << 29), }; uint64_t flags7 = (static_cast(cpuid[2]) << 32) | cpuid[1]; if(flags7 & x86_CPUID_7_bits::AVX2) features_detected |= CPUID::CPUID_AVX2_BIT; if(flags7 & x86_CPUID_7_bits::BMI2) features_detected |= CPUID::CPUID_BMI2_BIT; if(flags7 & x86_CPUID_7_bits::AVX512F) features_detected |= CPUID::CPUID_AVX512F_BIT; if(flags7 & x86_CPUID_7_bits::RDSEED) features_detected |= CPUID::CPUID_RDSEED_BIT; if(flags7 & x86_CPUID_7_bits::ADX) features_detected |= CPUID::CPUID_ADX_BIT; if(flags7 & x86_CPUID_7_bits::SHA) features_detected |= CPUID::CPUID_SHA_BIT; } #undef X86_CPUID #undef X86_CPUID_SUBLEVEL /* * If we don't have access to CPUID, we can still safely assume that * any x86-64 processor has SSE2 and RDTSC */ #if defined(BOTAN_TARGET_ARCH_IS_X86_64) if(features_detected == 0) { features_detected |= CPUID::CPUID_SSE2_BIT; features_detected |= CPUID::CPUID_RDTSC_BIT; } #endif return features_detected; } #endif } bool CPUID::has_simd_32() { #if defined(BOTAN_TARGET_SUPPORTS_SSE2) return CPUID::has_sse2(); #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) return CPUID::has_altivec(); #elif defined(BOTAN_TARGET_SUPPORTS_NEON) return CPUID::has_neon(); #else return true; #endif } //static std::string CPUID::to_string() { std::vector flags; #define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0) #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) CPUID_PRINT(sse2); CPUID_PRINT(ssse3); CPUID_PRINT(sse41); CPUID_PRINT(sse42); CPUID_PRINT(avx2); CPUID_PRINT(avx512f); CPUID_PRINT(rdtsc); CPUID_PRINT(bmi2); CPUID_PRINT(adx); CPUID_PRINT(aes_ni); CPUID_PRINT(clmul); CPUID_PRINT(rdrand); CPUID_PRINT(rdseed); CPUID_PRINT(intel_sha); #endif #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) CPUID_PRINT(altivec); #endif #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) CPUID_PRINT(neon); CPUID_PRINT(arm_sha1); CPUID_PRINT(arm_sha2); CPUID_PRINT(arm_aes); CPUID_PRINT(arm_pmull); #endif #undef CPUID_PRINT return string_join(flags, ' '); } //static void CPUID::print(std::ostream& o) { o << "CPUID flags: " << CPUID::to_string() << "\n"; } //static void CPUID::initialize() { g_processor_features = 0; #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) || \ defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) || \ defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) g_processor_features = detect_cpu_features(&g_cache_line_size); #endif g_processor_features |= CPUID::CPUID_INITIALIZED_BIT; } //static CPUID::Endian_status CPUID::runtime_check_endian() { // Check runtime endian const uint32_t endian32 = 0x01234567; const uint8_t* e8 = reinterpret_cast(&endian32); Endian_status endian = ENDIAN_UNKNOWN; if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67) { endian = ENDIAN_BIG; } else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01) { endian = ENDIAN_LITTLE; } else { throw Internal_Error("Unexpected endian at runtime, neither big nor little"); } // If we were compiled with a known endian, verify it matches at runtime #if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) BOTAN_ASSERT(endian == ENDIAN_LITTLE, "Build and runtime endian match"); #elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) BOTAN_ASSERT(endian == ENDIAN_BIG, "Build and runtime endian match"); #endif return endian; } std::vector CPUID::bit_from_string(const std::string& tok) { #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) if(tok == "sse2" || tok == "simd") return {Botan::CPUID::CPUID_SSE2_BIT}; if(tok == "ssse3") return {Botan::CPUID::CPUID_SSSE3_BIT}; if(tok == "aesni") return {Botan::CPUID::CPUID_AESNI_BIT}; if(tok == "clmul") return {Botan::CPUID::CPUID_CLMUL_BIT}; if(tok == "avx2") return {Botan::CPUID::CPUID_AVX2_BIT}; if(tok == "sha") return {Botan::CPUID::CPUID_SHA_BIT}; #elif defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) if(tok == "altivec" || tok == "simd") return {Botan::CPUID::CPUID_ALTIVEC_BIT}; #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) if(tok == "neon" || tok == "simd") return {Botan::CPUID::CPUID_ARM_NEON_BIT}; if(tok == "armv8sha1") return {Botan::CPUID::CPUID_ARM_SHA1_BIT}; if(tok == "armv8sha2") return {Botan::CPUID::CPUID_ARM_SHA2_BIT}; if(tok == "armv8aes") return {Botan::CPUID::CPUID_ARM_AES_BIT}; if(tok == "armv8pmull") return {Botan::CPUID::CPUID_ARM_PMULL_BIT}; #else BOTAN_UNUSED(tok); #endif return {}; } }