aboutsummaryrefslogtreecommitdiffstats
path: root/src/utils
diff options
context:
space:
mode:
authorlloyd <[email protected]>2009-12-23 00:14:36 +0000
committerlloyd <[email protected]>2009-12-23 00:14:36 +0000
commitdb37d31b6878bf0dda2819fe87f373f99263d936 (patch)
tree904beccaa39a5884460fd54e52de0e79bb57a3dd /src/utils
parent04ab1fd8c61a4ba6d02b86167c90320e43099f41 (diff)
Add bswap_4 which swaps 4 Ts in parallel. Normally just uses reverse_bytes,
but if SSE2 or SSSE3 is available uses SIMD magic to swap 4 32 bit values at once.
Diffstat (limited to 'src/utils')
-rw-r--r--src/utils/bswap.h50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/utils/bswap.h b/src/utils/bswap.h
index 1a5349fd0..96ec4982a 100644
--- a/src/utils/bswap.h
+++ b/src/utils/bswap.h
@@ -12,6 +12,14 @@
#include <botan/types.h>
#include <botan/rotate.h>
+#if defined(BOTAN_TARGET_CPU_HAS_SSE2)
+ #include <emmintrin.h>
+#endif
+
+#if defined(BOTAN_TARGET_CPU_HAS_SSSE3)
+ #include <tmmintrin.h>
+#endif
+
namespace Botan {
/*
@@ -66,6 +74,48 @@ inline u64bit reverse_bytes(u64bit input)
#endif
}
+template<typename T>
+inline void bswap_4(T x[4])
+ {
+ x[0] = reverse_bytes(x[0]);
+ x[1] = reverse_bytes(x[1]);
+ x[2] = reverse_bytes(x[2]);
+ x[3] = reverse_bytes(x[3]);
+ }
+
+#if defined(BOTAN_TARGET_CPU_HAS_SSSE3)
+
+template<>
+inline void bswap_4(u32bit x[4])
+ {
+ const __m128i bswap_mask = _mm_set_epi8(
+ 12, 13, 14, 15,
+ 8, 9, 10, 11,
+ 4, 5, 6, 7,
+ 0, 1, 2, 3);
+
+ __m128i T = _mm_loadu_si128((const __m128i*)x);
+ T = _mm_shuffle_epi8(T, bswap_mask);
+ _mm_storeu_si128((__m128i*)x, T);
+ }
+
+#elif defined(BOTAN_TARGET_CPU_HAS_SSE2)
+
+template<>
+inline void bswap_4(u32bit x[4])
+ {
+ __m128i T = _mm_loadu_si128((const __m128i*)x);
+
+ T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+ T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
+
+ T = _mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8));
+
+ _mm_storeu_si128((__m128i*)x, T);
+ }
+
+#endif
+
}
#endif