diff options
author | Jack Lloyd <[email protected]> | 2017-10-24 20:56:54 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-10-24 20:56:54 -0400 |
commit | 710daea322295bd8901ec3d2a03447985a89f39d (patch) | |
tree | d1b3b8dc2e4af8a4b592b21719a36ec1399f9ec2 | |
parent | 4bff25a393c771618f0d14428adbd40e37625d2e (diff) |
Skip ARM32 specific byteswap code, enable MSVC byteswap intrinsics
While older versions of GCC did very badly with __builtin_bswap on
ARM, I checked GCC 4.8 and it behaves correctly, emitting either rev
or else the same optimal sequence as was used in the inline asm
(depending on if ARMv7 is enabled or not.)
Enable MSVC byteswap intrinsics, which (hopefully) work on all platforms.
Drop the x86-32 specific asm for byteswap.
-rw-r--r-- | src/lib/utils/bit_ops.h | 2 | ||||
-rw-r--r-- | src/lib/utils/bswap.h | 47 |
2 files changed, 15 insertions, 34 deletions
diff --git a/src/lib/utils/bit_ops.h b/src/lib/utils/bit_ops.h index 2da0e55fb..aa41db391 100644 --- a/src/lib/utils/bit_ops.h +++ b/src/lib/utils/bit_ops.h @@ -102,7 +102,7 @@ inline size_t ctz(T n) return 8*sizeof(T); } -#if defined(BOTAN_BUILD_COMPILER_IS_GCC) +#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) template<> inline size_t ctz(uint32_t n) diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h index c1aa8b594..8d5731f1b 100644 --- a/src/lib/utils/bswap.h +++ b/src/lib/utils/bswap.h @@ -12,6 +12,10 @@ #include <botan/types.h> #include <botan/rotate.h> +#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) + #include <stdlib.h> +#endif + namespace Botan { /** @@ -27,45 +31,22 @@ inline uint16_t reverse_bytes(uint16_t val) */ inline uint32_t reverse_bytes(uint32_t val) { -#if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_ARCH_IS_ARM32) - /* - GCC intrinsic added in 4.3, works for a number of CPUs - - However avoid under ARM, as it branches to a function in libgcc - instead of generating inline asm, so slower even than the generic - rotate version below. - */ +#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) return __builtin_bswap32(val); +#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) + return _byteswap_ulong(val); + #elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) // GCC-style inline assembly for x86 or x86-64 asm("bswapl %0" : "=r" (val) : "0" (val)); return val; -#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM32) - - asm ("eor r3, %1, %1, ror #16\n\t" - "bic r3, r3, #0x00FF0000\n\t" - "mov %0, %1, ror #8\n\t" - "eor %0, %0, r3, lsr #8" - : "=r" (val) - : "0" (val) - : "r3", "cc"); - - return val; - -#elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32) - - // Visual C++ inline asm for 32-bit x86, by Yves Jerschow - __asm mov eax, val; - __asm bswap eax; - #else // Generic implementation - return (rotr<8>(val) & 0xFF00FF00) | - (rotl<8>(val) & 0x00FF00FF); + return (rotr<8>(val) & 0xFF00FF00) | (rotl<8>(val) & 0x00FF00FF); #endif } @@ -75,11 +56,12 @@ inline uint32_t reverse_bytes(uint32_t val) */ inline uint64_t reverse_bytes(uint64_t val) { -#if BOTAN_GCC_VERSION >= 430 - - // GCC intrinsic added in 4.3, works for a number of CPUs +#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) return __builtin_bswap64(val); +#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) + return _byteswap_uint64(val); + #elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64) // GCC-style inline assembly for x86-64 asm("bswapq %0" : "=r" (val) : "0" (val)); @@ -87,8 +69,7 @@ inline uint64_t reverse_bytes(uint64_t val) #else /* Generic implementation. Defined in terms of 32-bit bswap so any - * optimizations in that version can help here (particularly - * useful for 32-bit x86). + * optimizations in that version can help. */ uint32_t hi = static_cast<uint32_t>(val >> 32); |