/* * Runtime CPU detection * (C) 2009,2010,2013,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ #include #include #include #include #include #include #include #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) /* * On Darwin and OpenBSD ppc, use sysctl to detect AltiVec */ #if defined(BOTAN_TARGET_OS_IS_DARWIN) #include #elif defined(BOTAN_TARGET_OS_IS_OPENBSD) #include #include #include #endif #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) /* * On ARM, use getauxval if available, otherwise fall back to * running probe functions with a SIGILL handler. */ #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) #include #else #include #endif #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) /* * On x86, use CPUID instruction */ #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) #include #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) #include #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) #include #endif #endif namespace Botan { uint64_t CPUID::g_processor_features = 0; size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; bool CPUID::g_little_endian = false; namespace { #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) /* * PowerPC specific block: check for AltiVec using either * sysctl or by reading processor version number register. */ uint64_t powerpc_detect_cpu_featutures() { #if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD) // On Darwin/OS X and OpenBSD, use sysctl #if defined(BOTAN_TARGET_OS_IS_OPENBSD) int sels[2] = { CTL_MACHDEP, CPU_ALTIVEC }; #else // From Apple's docs int sels[2] = { CTL_HW, HW_VECTORUNIT }; #endif int vector_type = 0; size_t length = sizeof(vector_type); int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); if(error == 0 && vector_type > 0) return (1ULL << CPUID::CPUID_ALTIVEC_BIT); #elif defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD) /* On PowerPC, MSR 287 is PVR, the Processor Version Number Normally it is only accessible to ring 0, but Linux and NetBSD (others, too, maybe?) will trap and emulate it for us. PVR identifiers for various AltiVec enabled CPUs. Taken from PearPC and Linux sources, mostly. */ uint32_t pvr = 0; // TODO: we could run inside SIGILL handler block asm volatile("mfspr %0, 287" : "=r" (pvr)); // Top 16 bit suffice to identify model pvr >>= 16; const uint16_t PVR_G4_7400 = 0x000C; const uint16_t PVR_G5_970 = 0x0039; const uint16_t PVR_G5_970FX = 0x003C; const uint16_t PVR_G5_970MP = 0x0044; const uint16_t PVR_G5_970GX = 0x0045; const uint16_t PVR_POWER6 = 0x003E; const uint16_t PVR_POWER7 = 0x003F; const uint16_t PVR_POWER8 = 0x004B; const uint16_t PVR_CELL_PPU = 0x0070; if(pvr == PVR_G4_7400 || pvr == PVR_G5_970 || pvr == PVR_G5_970FX || pvr == PVR_G5_970MP || pvr == PVR_G5_970GX || pvr == PVR_POWER6 || pvr == PVR_POWER7 || pvr == PVR_POWER8 || pvr == PVR_CELL_PPU) { return (1ULL << CPUID::CPUID_ALTIVEC_BIT); } #else #warning "No PowerPC feature detection available for this platform" #endif return 0; } #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) uint64_t arm_detect_cpu_features(size_t* cache_line_size) { uint64_t detected_features = 0; *cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE; #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL) errno = 0; /* * On systems with getauxval these bits should normally be defined * in bits/auxv.h but some buggy? glibc installs seem to miss them. * These following values are all fixed, for the Linux ELF format, * so we just hardcode them in ARM_hwcap_bit enum. */ enum ARM_hwcap_bit { #if defined(BOTAN_TARGET_ARCH_IS_ARM32) NEON_bit = (1 << 12), AES_bit = (1 << 0), PMULL_bit = (1 << 1), SHA1_bit = (1 << 2), SHA2_bit = (1 << 3), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 26, // AT_HWCAP2 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64) NEON_bit = (1 << 1), AES_bit = (1 << 3), PMULL_bit = (1 << 4), SHA1_bit = (1 << 5), SHA2_bit = (1 << 6), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 16, // AT_HWCAP #endif }; const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon); if(hwcap_neon & ARM_hwcap_bit::NEON_bit) detected_features |= CPUID::CPUID_ARM_NEON_BIT; /* On aarch64 this ends up calling getauxval twice with AT_HWCAP It doesn't seem worth optimizing this out, since getauxval is just reading a field in the ELF header. */ const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto); if(hwcap_crypto & ARM_hwcap_bit::AES_bit) detected_features |= CPUID::CPUID_ARM_AES_BIT; if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit) detected_features |= CPUID::CPUID_ARM_PMULL_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit) detected_features |= CPUID::CPUID_ARM_SHA1_BIT; if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) detected_features |= CPUID::CPUID_ARM_SHA2_BIT; #if defined(AT_DCACHEBSIZE) const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE); // plausibility check if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128) *cache_line_size = static_cast(dcache_line); #endif #else // No getauxval API available, fall back on probe functions // TODO: probe functions #endif return detected_features; } #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) uint64_t x86_detect_cpu_features(size_t* cache_line_size) { #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM) #define X86_CPUID(type, out) \ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ : "0" (type)) #define X86_CPUID_SUBLEVEL(type, level, out) \ asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ : "0" (type), "2" (level)) #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) \ do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) #else #warning "No way of calling x86 cpuid instruction for this compiler" #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) #endif uint64_t features_detected = 0; uint32_t cpuid[4] = { 0 }; // CPUID 0: vendor identification, max sublevel X86_CPUID(0, cpuid); const uint32_t max_supported_sublevel = cpuid[0]; const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); if(max_supported_sublevel >= 1) { // CPUID 1: feature bits X86_CPUID(1, cpuid); const uint64_t flags0 = (static_cast(cpuid[2]) << 32) | cpuid[3]; enum x86_CPUID_1_bits : uint64_t { RDTSC = (1ULL << 4), SSE2 = (1ULL << 26), CLMUL = (1ULL << 33), SSSE3 = (1ULL << 41), SSE41 = (1ULL << 51), SSE42 = (1ULL << 52), AESNI = (1ULL << 57), RDRAND = (1ULL << 62) }; if(flags0 & x86_CPUID_1_bits::RDTSC) features_detected |= CPUID::CPUID_RDTSC_BIT; if(flags0 & x86_CPUID_1_bits::SSE2) features_detected |= CPUID::CPUID_SSE2_BIT; if(flags0 & x86_CPUID_1_bits::CLMUL) features_detected |= CPUID::CPUID_CLMUL_BIT; if(flags0 & x86_CPUID_1_bits::SSSE3) features_detected |= CPUID::CPUID_SSSE3_BIT; if(flags0 & x86_CPUID_1_bits::SSE41) features_detected |= CPUID::CPUID_SSE41_BIT; if(flags0 & x86_CPUID_1_bits::SSE42) features_detected |= CPUID::CPUID_SSE42_BIT; if(flags0 & x86_CPUID_1_bits::AESNI) features_detected |= CPUID::CPUID_AESNI_BIT; if(flags0 & x86_CPUID_1_bits::RDRAND) features_detected |= CPUID::CPUID_RDRAND_BIT; } if(is_intel) { // Intel cache line size is in cpuid(1) output *cache_line_size = 8 * get_byte(2, cpuid[1]); } else if(is_amd) { // AMD puts it in vendor zone X86_CPUID(0x80000005, cpuid); *cache_line_size = get_byte(3, cpuid[2]); } if(max_supported_sublevel >= 7) { clear_mem(cpuid, 4); X86_CPUID_SUBLEVEL(7, 0, cpuid); enum x86_CPUID_7_bits : uint64_t { AVX2 = (1ULL << 5), BMI2 = (1ULL << 8), AVX512F = (1ULL << 16), RDSEED = (1ULL << 18), ADX = (1ULL << 19), SHA = (1ULL << 29), }; uint64_t flags7 = (static_cast(cpuid[2]) << 32) | cpuid[1]; if(flags7 & x86_CPUID_7_bits::AVX2) features_detected |= CPUID::CPUID_AVX2_BIT; if(flags7 & x86_CPUID_7_bits::BMI2) features_detected |= CPUID::CPUID_BMI2_BIT; if(flags7 & x86_CPUID_7_bits::AVX512F) features_detected |= CPUID::CPUID_AVX512F_BIT; if(flags7 & x86_CPUID_7_bits::RDSEED) features_detected |= CPUID::CPUID_RDSEED_BIT; if(flags7 & x86_CPUID_7_bits::ADX) features_detected |= CPUID::CPUID_ADX_BIT; if(flags7 & x86_CPUID_7_bits::SHA) features_detected |= CPUID::CPUID_SHA_BIT; } #undef X86_CPUID #undef X86_CPUID_SUBLEVEL /* * If we don't have access to CPUID, we can still safely assume that * any x86-64 processor has SSE2 and RDTSC */ #if defined(BOTAN_TARGET_ARCH_IS_X86_64) if(features_detected == 0) { features_detected |= CPUID::CPUID_SSE2_BIT; features_detected |= CPUID::CPUID_RDTSC_BIT; } #endif return features_detected; } #endif } bool CPUID::has_simd_32() { #if defined(BOTAN_TARGET_SUPPORTS_SSE2) return CPUID::has_sse2(); #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) return CPUID::has_altivec(); #elif defined(BOTAN_TARGET_SUPPORTS_NEON) return CPUID::has_neon(); #else return true; #endif } //static std::string CPUID::to_string() { std::vector flags; #define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0) #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) CPUID_PRINT(sse2); CPUID_PRINT(ssse3); CPUID_PRINT(sse41); CPUID_PRINT(sse42); CPUID_PRINT(avx2); CPUID_PRINT(avx512f); CPUID_PRINT(rdtsc); CPUID_PRINT(bmi2); CPUID_PRINT(adx); CPUID_PRINT(aes_ni); CPUID_PRINT(clmul); CPUID_PRINT(rdrand); CPUID_PRINT(rdseed); CPUID_PRINT(intel_sha); #endif #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) CPUID_PRINT(altivec); #endif #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) CPUID_PRINT(neon); CPUID_PRINT(arm_sha1); CPUID_PRINT(arm_sha2); CPUID_PRINT(arm_aes); CPUID_PRINT(arm_pmull); #endif #undef CPUID_PRINT return string_join(flags, ' '); } //static void CPUID::print(std::ostream& o) { o << "CPUID flags: " << CPUID::to_string() << "\n"; } void CPUID::initialize() { g_processor_features = 0; #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY) g_processor_features = powerpc_detect_cpu_featutures(); #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) g_processor_features = arm_detect_cpu_features(&g_cache_line_size); #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) g_processor_features = x86_detect_cpu_features(&g_cache_line_size); #endif g_processor_features |= CPUID::CPUID_INITIALIZED_BIT; // Check runtime endian const uint32_t endian32 = 0x01234567; const uint8_t* e8 = reinterpret_cast(&endian32); if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67) { g_little_endian = false; } else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01) { g_little_endian = true; } else { throw Internal_Error("Unexpected endian at runtime, neither big nor little"); } // If we were compiled with a known endian, verify it matches at runtime #if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN) BOTAN_ASSERT(g_little_endian == true, "Build and runtime endian match"); #elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN) BOTAN_ASSERT(g_little_endian == false, "Build and runtime endian match"); #endif } }