aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/utils/bswap.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/utils/bswap.h')
-rw-r--r--src/lib/utils/bswap.h142
1 files changed, 142 insertions, 0 deletions
diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h
new file mode 100644
index 000000000..9d2c9bc28
--- /dev/null
+++ b/src/lib/utils/bswap.h
@@ -0,0 +1,142 @@
+/*
+* Byte Swapping Operations
+* (C) 1999-2011 Jack Lloyd
+* (C) 2007 Yves Jerschow
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_BYTE_SWAP_H__
+#define BOTAN_BYTE_SWAP_H__
+
+#include <botan/types.h>
+#include <botan/rotate.h>
+
+#if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS)
+ #include <emmintrin.h>
+#endif
+
+namespace Botan {
+
+/**
+* Swap a 16 bit integer
+*/
+inline u16bit reverse_bytes(u16bit val)
+ {
+ return rotate_left(val, 8);
+ }
+
+/**
+* Swap a 32 bit integer
+*/
+inline u32bit reverse_bytes(u32bit val)
+ {
+#if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+ /*
+ GCC intrinsic added in 4.3, works for a number of CPUs
+
+ However avoid under ARM, as it branches to a function in libgcc
+ instead of generating inline asm, so slower even than the generic
+ rotate version below.
+ */
+ return __builtin_bswap32(val);
+
+#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
+
+ // GCC-style inline assembly for x86 or x86-64
+ asm("bswapl %0" : "=r" (val) : "0" (val));
+ return val;
+
+#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
+
+ asm ("eor r3, %1, %1, ror #16\n\t"
+ "bic r3, r3, #0x00FF0000\n\t"
+ "mov %0, %1, ror #8\n\t"
+ "eor %0, %0, r3, lsr #8"
+ : "=r" (val)
+ : "0" (val)
+ : "r3", "cc");
+
+ return val;
+
+#elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32)
+
+ // Visual C++ inline asm for 32-bit x86, by Yves Jerschow
+ __asm mov eax, val;
+ __asm bswap eax;
+
+#else
+
+ // Generic implementation
+ return (rotate_right(val, 8) & 0xFF00FF00) |
+ (rotate_left (val, 8) & 0x00FF00FF);
+
+#endif
+ }
+
+/**
+* Swap a 64 bit integer
+*/
+inline u64bit reverse_bytes(u64bit val)
+ {
+#if BOTAN_GCC_VERSION >= 430
+
+ // GCC intrinsic added in 4.3, works for a number of CPUs
+ return __builtin_bswap64(val);
+
+#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_X86_64)
+ // GCC-style inline assembly for x86-64
+ asm("bswapq %0" : "=r" (val) : "0" (val));
+ return val;
+
+#else
+ /* Generic implementation. Defined in terms of 32-bit bswap so any
+ * optimizations in that version can help here (particularly
+ * useful for 32-bit x86).
+ */
+
+ u32bit hi = static_cast<u32bit>(val >> 32);
+ u32bit lo = static_cast<u32bit>(val);
+
+ hi = reverse_bytes(hi);
+ lo = reverse_bytes(lo);
+
+ return (static_cast<u64bit>(lo) << 32) | hi;
+#endif
+ }
+
+/**
+* Swap 4 Ts in an array
+*/
+template<typename T>
+inline void bswap_4(T x[4])
+ {
+ x[0] = reverse_bytes(x[0]);
+ x[1] = reverse_bytes(x[1]);
+ x[2] = reverse_bytes(x[2]);
+ x[3] = reverse_bytes(x[3]);
+ }
+
+#if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS)
+
+/**
+* Swap 4 u32bits in an array using SSE2 shuffle instructions
+*/
+template<>
+inline void bswap_4(u32bit x[4])
+ {
+ __m128i T = _mm_loadu_si128(reinterpret_cast<const __m128i*>(x));
+
+ T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+ T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+
+ T = _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8));
+
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(x), T);
+ }
+
+#endif
+
+}
+
+#endif