aboutsummaryrefslogtreecommitdiffstats
path: root/src/block/serpent_sse2
diff options
context:
space:
mode:
Diffstat (limited to 'src/block/serpent_sse2')
-rw-r--r--src/block/serpent_sse2/info.txt27
-rw-r--r--src/block/serpent_sse2/serp_sse2.cpp170
-rw-r--r--src/block/serpent_sse2/serp_sse2.h29
3 files changed, 226 insertions, 0 deletions
diff --git a/src/block/serpent_sse2/info.txt b/src/block/serpent_sse2/info.txt
new file mode 100644
index 000000000..ad8323f53
--- /dev/null
+++ b/src/block/serpent_sse2/info.txt
@@ -0,0 +1,27 @@
+realname "Serpent (SSE2)"
+
+define SERPENT_SSE2
+
+load_on auto
+
+<add>
+serp_sse2.cpp
+serp_sse2.h
+</add>
+
+<arch>
+pentium-m
+pentium4
+prescott
+amd64
+</arch>
+
+<cc>
+gcc
+icc
+</cc>
+
+<requires>
+serpent
+sse2_eng
+</requires>
diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp
new file mode 100644
index 000000000..759f3e1d6
--- /dev/null
+++ b/src/block/serpent_sse2/serp_sse2.cpp
@@ -0,0 +1,170 @@
+/*
+* Serpent (SSE2)
+* (C) 2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/serp_sse2.h>
+#include <botan/loadstor.h>
+#include <emmintrin.h>
+
+namespace Botan {
+
+namespace {
+
+#define SBoxE1(b0, b1, b2, b3, b4) \
+ do { \
+ b3 = _mm_xor_si128(b3, b0); \
+ b4 = b1; \
+ b1 = _mm_and_si128(b1, b3); \
+ b4 = _mm_xor_si128(b4, b2); \
+ b1 = _mm_xor_si128(b1, b0); \
+ b0 = _mm_or_si128(b0, b3); \
+ b0 = _mm_xor_si128(b0, b4); \
+ b4 = _mm_xor_si128(b4, b3); \
+ b3 = _mm_xor_si128(b3, b2); \
+ b2 = _mm_or_si128(b2, b1); \
+ b2 = _mm_xor_si128(b2, b4); \
+ b4 = _mm_andnot_si128(b4, all_ones); \
+ b4 = _mm_or_si128(b4, b1); \
+ b1 = _mm_xor_si128(b1, b3); \
+ b1 = _mm_xor_si128(b1, b4); \
+ b3 = _mm_or_si128(b3, b0); \
+ b1 = _mm_xor_si128(b1, b3); \
+ b4 = _mm_xor_si128(b4, b3); \
+ b3 = b0; b0 = b1; b1 = b4; \
+ } while(0);
+
+#define rotate_left_m128(vec, rot) \
+ _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot))
+
+#define key_xor(round, b0, b1, b2, b3) \
+ do { \
+ __m128i key = _mm_loadu_si128(keys + round); \
+ b0 = _mm_xor_si128(b0, _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0))); \
+ b1 = _mm_xor_si128(b1, _mm_shuffle_epi32(key, _MM_SHUFFLE(1,1,1,1))); \
+ b2 = _mm_xor_si128(b2, _mm_shuffle_epi32(key, _MM_SHUFFLE(2,2,2,2))); \
+ b3 = _mm_xor_si128(b3, _mm_shuffle_epi32(key, _MM_SHUFFLE(3,3,3,3))); \
+ } while(0);
+
+#define transform(b0, b1, b2, b3) \
+ do \
+ { \
+ b0 = rotate_left_m128(b0, 13); \
+ b2 = rotate_left_m128(b2, 3); \
+ b1 = _mm_xor_si128(b1, _mm_xor_si128(b0, b2)); \
+ b3 = _mm_xor_si128(b3, _mm_xor_si128(b2, _mm_slli_epi32(b0, 3))); \
+ b1 = rotate_left_m128(b1, 1); \
+ b3 = rotate_left_m128(b3, 7); \
+ b0 = _mm_xor_si128(b0, _mm_xor_si128(b1, b3)); \
+ b2 = _mm_xor_si128(b2, _mm_xor_si128(b3, _mm_slli_epi32(b1, 7))); \
+ b0 = rotate_left_m128(b0, 5); \
+ b2 = rotate_left_m128(b2, 22); \
+ } while(0);
+
+void print_simd(const char* name, __m128i vec)
+ {
+ union { __m128i v; int32_t ints[4]; } u = { vec };
+
+ printf("%s: ", name);
+ for(u32bit i = 0; i != 4; ++i)
+ printf("%08X ", u.ints[i]);
+ printf("\n");
+ }
+
+void serpent_encrypt_4(const byte in[64],
+ byte out[64],
+ const u32bit keys_32[132])
+ {
+ const __m128i* keys = (const __m128i*)(keys_32);
+
+ /*
+ FIXME: figure out a fast way to do this with 4 loads with
+ _mm_loadu_si128 plus shuffle/interleave ops
+ */
+ union { __m128i v; u32bit u32[4]; } convert;
+
+ convert.u32[0] = load_le<u32bit>(in, 0);
+ convert.u32[1] = load_le<u32bit>(in, 4);
+ convert.u32[2] = load_le<u32bit>(in, 8);
+ convert.u32[3] = load_le<u32bit>(in, 12);
+
+ __m128i b0 = convert.v;
+
+ convert.u32[0] = load_le<u32bit>(in, 1);
+ convert.u32[1] = load_le<u32bit>(in, 5);
+ convert.u32[2] = load_le<u32bit>(in, 9);
+ convert.u32[3] = load_le<u32bit>(in, 13);
+
+ __m128i b1 = convert.v;
+
+ convert.u32[0] = load_le<u32bit>(in, 2);
+ convert.u32[1] = load_le<u32bit>(in, 6);
+ convert.u32[2] = load_le<u32bit>(in, 10);
+ convert.u32[3] = load_le<u32bit>(in, 14);
+
+ __m128i b2 = convert.v;
+
+ convert.u32[0] = load_le<u32bit>(in, 3);
+ convert.u32[1] = load_le<u32bit>(in, 7);
+ convert.u32[2] = load_le<u32bit>(in, 11);
+ convert.u32[3] = load_le<u32bit>(in, 15);
+
+
+ __m128i b3 = convert.v;
+
+ __m128i b4; // temp
+
+ const __m128i all_ones = _mm_set1_epi8(0xFF);
+
+ key_xor(0, b0, b1, b2, b3);
+ SBoxE1(b0, b1, b2, b3, b4);
+ transform(b0, b1, b2, b3);
+
+ key_xor(b0, b1, b2, b3, 1);
+
+ print_simd("b0", b0);
+ print_simd("b1", b1);
+ print_simd("b2", b2);
+ print_simd("b3", b3);
+ }
+
+}
+
+/*
+* Serpent Encryption
+*/
+void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ while(blocks >= 4)
+ {
+ serpent_encrypt_4(in, out, this->round_key);
+ //Serpent::encrypt_n(in, out, 4);
+ in += 4 * BLOCK_SIZE;
+ out += 4 * BLOCK_SIZE;
+ blocks -= 4;
+ }
+
+ for(u32bit i = 0; i != blocks; ++i)
+ {
+ Serpent::encrypt_n(in, out, 1);
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+/*
+* Serpent Decryption
+*/
+void Serpent_SSE2::decrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ for(u32bit i = 0; i != blocks; ++i)
+ {
+ Serpent::decrypt_n(in, out, 1);
+ in += BLOCK_SIZE;
+ out += BLOCK_SIZE;
+ }
+ }
+
+}
diff --git a/src/block/serpent_sse2/serp_sse2.h b/src/block/serpent_sse2/serp_sse2.h
new file mode 100644
index 000000000..f1e5c2028
--- /dev/null
+++ b/src/block/serpent_sse2/serp_sse2.h
@@ -0,0 +1,29 @@
+/*
+* Serpent (SSE2)
+* (C) 2009 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_SERPENT_SSE2_H__
+#define BOTAN_SERPENT_SSE2_H__
+
+#include <botan/serpent.h>
+
+namespace Botan {
+
+/*
+* Serpent
+*/
+class BOTAN_DLL Serpent_SSE2 : public Serpent
+ {
+ public:
+ void encrypt_n(const byte in[], byte out[], u32bit blocks) const;
+ void decrypt_n(const byte in[], byte out[], u32bit blocks) const;
+
+ BlockCipher* clone() const { return new Serpent_SSE2; }
+ };
+
+}
+
+#endif