diff options
Diffstat (limited to 'src/lib/utils/bswap.h')
-rw-r--r-- | src/lib/utils/bswap.h | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h new file mode 100644 index 000000000..9d2c9bc28 --- /dev/null +++ b/src/lib/utils/bswap.h @@ -0,0 +1,142 @@ +/* +* Byte Swapping Operations +* (C) 1999-2011 Jack Lloyd +* (C) 2007 Yves Jerschow +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_BYTE_SWAP_H__ +#define BOTAN_BYTE_SWAP_H__ + +#include <botan/types.h> +#include <botan/rotate.h> + +#if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS) + #include <emmintrin.h> +#endif + +namespace Botan { + +/** +* Swap a 16 bit integer +*/ +inline u16bit reverse_bytes(u16bit val) + { + return rotate_left(val, 8); + } + +/** +* Swap a 32 bit integer +*/ +inline u32bit reverse_bytes(u32bit val) + { +#if BOTAN_GCC_VERSION >= 430 && !defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + /* + GCC intrinsic added in 4.3, works for a number of CPUs + + However avoid under ARM, as it branches to a function in libgcc + instead of generating inline asm, so slower even than the generic + rotate version below. + */ + return __builtin_bswap32(val); + +#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) + + // GCC-style inline assembly for x86 or x86-64 + asm("bswapl %0" : "=r" (val) : "0" (val)); + return val; + +#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) + + asm ("eor r3, %1, %1, ror #16\n\t" + "bic r3, r3, #0x00FF0000\n\t" + "mov %0, %1, ror #8\n\t" + "eor %0, %0, r3, lsr #8" + : "=r" (val) + : "0" (val) + : "r3", "cc"); + + return val; + +#elif defined(_MSC_VER) && defined(BOTAN_TARGET_ARCH_IS_X86_32) + + // Visual C++ inline asm for 32-bit x86, by Yves Jerschow + __asm mov eax, val; + __asm bswap eax; + +#else + + // Generic implementation + return (rotate_right(val, 8) & 0xFF00FF00) | + (rotate_left (val, 8) & 0x00FF00FF); + +#endif + } + +/** +* Swap a 64 bit integer +*/ +inline u64bit reverse_bytes(u64bit val) + { +#if BOTAN_GCC_VERSION >= 430 + + // GCC intrinsic added in 4.3, works for a number of CPUs + return __builtin_bswap64(val); + +#elif BOTAN_USE_GCC_INLINE_ASM && defined(BOTAN_TARGET_ARCH_IS_X86_64) + // GCC-style inline assembly for x86-64 + asm("bswapq %0" : "=r" (val) : "0" (val)); + return val; + +#else + /* Generic implementation. Defined in terms of 32-bit bswap so any + * optimizations in that version can help here (particularly + * useful for 32-bit x86). + */ + + u32bit hi = static_cast<u32bit>(val >> 32); + u32bit lo = static_cast<u32bit>(val); + + hi = reverse_bytes(hi); + lo = reverse_bytes(lo); + + return (static_cast<u64bit>(lo) << 32) | hi; +#endif + } + +/** +* Swap 4 Ts in an array +*/ +template<typename T> +inline void bswap_4(T x[4]) + { + x[0] = reverse_bytes(x[0]); + x[1] = reverse_bytes(x[1]); + x[2] = reverse_bytes(x[2]); + x[3] = reverse_bytes(x[3]); + } + +#if defined(BOTAN_TARGET_CPU_HAS_SSE2) && !defined(BOTAN_NO_SSE_INTRINSICS) + +/** +* Swap 4 u32bits in an array using SSE2 shuffle instructions +*/ +template<> +inline void bswap_4(u32bit x[4]) + { + __m128i T = _mm_loadu_si128(reinterpret_cast<const __m128i*>(x)); + + T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + + T = _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(x), T); + } + +#endif + +} + +#endif |