aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-10-24 20:56:54 -0400
committerJack Lloyd <[email protected]>2017-10-24 20:56:54 -0400
commit710daea322295bd8901ec3d2a03447985a89f39d (patch)
treed1b3b8dc2e4af8a4b592b21719a36ec1399f9ec2
parent4bff25a393c771618f0d14428adbd40e37625d2e (diff)
Skip ARM32 specific byteswap code, enable MSVC byteswap intrinsics
While older versions of GCC did very badly with __builtin_bswap on ARM, I checked GCC 4.8 and it behaves correctly, emitting either rev or else the same optimal sequence as was used in the inline asm (depending on if ARMv7 is enabled or not.) Enable MSVC byteswap intrinsics, which (hopefully) work on all platforms. Drop the x86-32 specific asm for byteswap.
-rw-r--r--src/lib/utils/bit_ops.h2
-rw-r--r--src/lib/utils/bswap.h47
2 files changed, 15 insertions, 34 deletions
diff --git a/src/lib/utils/bit_ops.h b/src/lib/utils/bit_ops.h
index 2da0e55fb..aa41db391 100644
--- a/src/lib/utils/bit_ops.h
+++ b/src/lib/utils/bit_ops.h
@@ -102,7 +102,7 @@ inline size_t ctz(T n)
return 8*sizeof(T);
}
-#if defined(BOTAN_BUILD_COMPILER_IS_GCC)
+#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
template<>
inline size_t ctz(uint32_t n)
diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h
index c1aa8b594..8d5731f1b 100644
--- a/src/lib/utils/bswap.h
+++ b/src/lib/utils/bswap.h
@@ -12,6 +12,10 @@
#include <botan/types.h>
#include <botan/rotate.h>
+#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+ #include <stdlib.h>
+#endif
+
namespace Botan {
/**
@@ -27,45 +31,22 @@ inline uint16_t reverse_bytes(uint16_t val)
*/
inline uint32_t reverse_bytes(uint32_t val)
{
-#if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_ARCH_IS_ARM32)
- /*
- GCC intrinsic added in 4.3, works for a number of CPUs
-
- However avoid under ARM, as it branches to a function in libgcc
- instead of generating inline asm, so slower even than the generic
- rotate version below.
- */
+#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
return __builtin_bswap32(val);
+#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+ return _byteswap_ulong(val);
+
#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
// GCC-style inline assembly for x86 or x86-64
asm("bswapl %0" : "=r" (val) : "0" (val));
return val;
-#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
-
- asm ("eor r3, %1, %1, ror #16\n\t"
- "bic r3, r3, #0x00FF0000\n\t"
- "mov %0, %1, ror #8\n\t"
- "eor %0, %0, r3, lsr #8"
- : "=r" (val)
- : "0" (val)
- : "r3", "cc");
-
- return val;
-
-#elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32)
-
- // Visual C++ inline asm for 32-bit x86, by Yves Jerschow
- __asm mov eax, val;
- __asm bswap eax;
-
#else
// Generic implementation
- return (rotr<8>(val) & 0xFF00FF00) |
- (rotl<8>(val) & 0x00FF00FF);
+ return (rotr<8>(val) & 0xFF00FF00) | (rotl<8>(val) & 0x00FF00FF);
#endif
}
@@ -75,11 +56,12 @@ inline uint32_t reverse_bytes(uint32_t val)
*/
inline uint64_t reverse_bytes(uint64_t val)
{
-#if BOTAN_GCC_VERSION >= 430
-
- // GCC intrinsic added in 4.3, works for a number of CPUs
+#if defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
return __builtin_bswap64(val);
+#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
+ return _byteswap_uint64(val);
+
#elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
// GCC-style inline assembly for x86-64
asm("bswapq %0" : "=r" (val) : "0" (val));
@@ -87,8 +69,7 @@ inline uint64_t reverse_bytes(uint64_t val)
#else
/* Generic implementation. Defined in terms of 32-bit bswap so any
- * optimizations in that version can help here (particularly
- * useful for 32-bit x86).
+ * optimizations in that version can help.
*/
uint32_t hi = static_cast<uint32_t>(val >> 32);