diff options
author | lloyd <[email protected]> | 2009-12-23 00:14:36 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-12-23 00:14:36 +0000 |
commit | db37d31b6878bf0dda2819fe87f373f99263d936 (patch) | |
tree | 904beccaa39a5884460fd54e52de0e79bb57a3dd /src | |
parent | 04ab1fd8c61a4ba6d02b86167c90320e43099f41 (diff) |
Add bswap_4 which swaps 4 Ts in parallel. Normally just uses reverse_bytes,
but if SSE2 or SSSE3 is available uses SIMD magic to swap 4 32 bit values
at once.
Diffstat (limited to 'src')
-rw-r--r-- | src/utils/bswap.h | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/src/utils/bswap.h b/src/utils/bswap.h index 1a5349fd0..96ec4982a 100644 --- a/src/utils/bswap.h +++ b/src/utils/bswap.h @@ -12,6 +12,14 @@ #include <botan/types.h> #include <botan/rotate.h> +#if defined(BOTAN_TARGET_CPU_HAS_SSE2) + #include <emmintrin.h> +#endif + +#if defined(BOTAN_TARGET_CPU_HAS_SSSE3) + #include <tmmintrin.h> +#endif + namespace Botan { /* @@ -66,6 +74,48 @@ inline u64bit reverse_bytes(u64bit input) #endif } +template<typename T> +inline void bswap_4(T x[4]) + { + x[0] = reverse_bytes(x[0]); + x[1] = reverse_bytes(x[1]); + x[2] = reverse_bytes(x[2]); + x[3] = reverse_bytes(x[3]); + } + +#if defined(BOTAN_TARGET_CPU_HAS_SSSE3) + +template<> +inline void bswap_4(u32bit x[4]) + { + const __m128i bswap_mask = _mm_set_epi8( + 12, 13, 14, 15, + 8, 9, 10, 11, + 4, 5, 6, 7, + 0, 1, 2, 3); + + __m128i T = _mm_loadu_si128((const __m128i*)x); + T = _mm_shuffle_epi8(T, bswap_mask); + _mm_storeu_si128((__m128i*)x, T); + } + +#elif defined(BOTAN_TARGET_CPU_HAS_SSE2) + +template<> +inline void bswap_4(u32bit x[4]) + { + __m128i T = _mm_loadu_si128((const __m128i*)x); + + T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + + T = _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)); + + _mm_storeu_si128((__m128i*)x, T); + } + +#endif + } #endif |