aboutsummaryrefslogtreecommitdiffstats
path: root/src/block/noekeon_simd
diff options
context:
space:
mode:
authorlloyd <lloyd@randombit.net>2010-01-12 16:15:29 +0000
committerlloyd <lloyd@randombit.net>2010-01-12 16:15:29 +0000
commit118e42c730001fb3ef2f30a295b099c73bc99668 (patch)
treeb4689813d0054dbc09703df60cd1e23e0649337f /src/block/noekeon_simd
parent3f9791ba4d118143c03a613280f53a29a6e1e993 (diff)
Add SIMD version of Noekeon. On a Core2, about 2.7x faster using SIMD_SSE2
and 1.6x faster using SIMD_Scalar.
Diffstat (limited to 'src/block/noekeon_simd')
-rw-r--r--src/block/noekeon_simd/info.txt7
-rw-r--r--src/block/noekeon_simd/noekeon_simd.cpp161
-rw-r--r--src/block/noekeon_simd/noekeon_simd.h29
3 files changed, 197 insertions, 0 deletions
diff --git a/src/block/noekeon_simd/info.txt b/src/block/noekeon_simd/info.txt
new file mode 100644
index 000000000..b73954cff
--- /dev/null
+++ b/src/block/noekeon_simd/info.txt
@@ -0,0 +1,7 @@
+define NOEKEON_SIMD
+
+<requires>
+noekeon
+simd_32
+simd_engine
+</requires>
diff --git a/src/block/noekeon_simd/noekeon_simd.cpp b/src/block/noekeon_simd/noekeon_simd.cpp
new file mode 100644
index 000000000..f44104901
--- /dev/null
+++ b/src/block/noekeon_simd/noekeon_simd.cpp
@@ -0,0 +1,161 @@
+/*
+* Noekeon in SIMD
+* (C) 2010 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/noekeon_simd.h>
+#include <botan/internal/simd_32.h>
+
+namespace Botan {
+
+/*
+* Noekeon's Theta Operation
+*/
+#define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \
+ do { \
+ SIMD_32 T = A0 ^ A2; \
+ T ^= rotate_left(T, 8) ^ rotate_right(T, 8); \
+ A1 ^= T; \
+ A3 ^= T; \
+ \
+ A0 ^= K0; \
+ A1 ^= K1; \
+ A2 ^= K2; \
+ A3 ^= K3; \
+ \
+ T = A1 ^ A3; \
+ T ^= rotate_left(T, 8) ^ rotate_right(T, 8); \
+ A0 ^= T; \
+ A2 ^= T; \
+ } while(0)
+
+/*
+* Noekeon's Gamma S-Box Layer
+*/
+#define NOK_SIMD_GAMMA(A0, A1, A2, A3) \
+ do \
+ { \
+ A1 ^= A3.andc(~A2); \
+ A0 ^= A2 & A1; \
+ \
+ SIMD_32 T = A3; \
+ A3 = A0; \
+ A0 = T; \
+ \
+ A2 ^= A0 ^ A1 ^ A3; \
+ \
+ A1 ^= A3.andc(~A2); \
+ A0 ^= A2 & A1; \
+ } while(0)
+
+/*
+* Noekeon Encryption
+*/
+void Noekeon_SIMD::encrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ SIMD_32 K0 = SIMD_32(EK[0]);
+ SIMD_32 K1 = SIMD_32(EK[1]);
+ SIMD_32 K2 = SIMD_32(EK[2]);
+ SIMD_32 K3 = SIMD_32(EK[3]);
+
+ while(blocks >= 4)
+ {
+ SIMD_32 A0 = SIMD_32::load_be(in );
+ SIMD_32 A1 = SIMD_32::load_be(in + 16);
+ SIMD_32 A2 = SIMD_32::load_be(in + 32);
+ SIMD_32 A3 = SIMD_32::load_be(in + 48);
+
+ SIMD_32::transpose(A0, A1, A2, A3);
+
+ for(u32bit i = 0; i != 16; ++i)
+ {
+ A0 ^= SIMD_32(RC[i]);
+
+ NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
+
+ A1.rotate_left(1);
+ A2.rotate_left(5);
+ A3.rotate_left(2);
+
+ NOK_SIMD_GAMMA(A0, A1, A2, A3);
+
+ A1.rotate_right(1);
+ A2.rotate_right(5);
+ A3.rotate_right(2);
+ }
+
+ A0 ^= SIMD_32(RC[16]);
+ NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
+
+ SIMD_32::transpose(A0, A1, A2, A3);
+
+ A0.store_be(out);
+ A1.store_be(out + 16);
+ A2.store_be(out + 32);
+ A3.store_be(out + 48);
+
+ in += 64;
+ out += 64;
+ blocks -= 4;
+ }
+
+ Noekeon::encrypt_n(in, out, blocks);
+ }
+
+/*
+* Noekeon Encryption
+*/
+void Noekeon_SIMD::decrypt_n(const byte in[], byte out[], u32bit blocks) const
+ {
+ SIMD_32 K0 = SIMD_32(DK[0]);
+ SIMD_32 K1 = SIMD_32(DK[1]);
+ SIMD_32 K2 = SIMD_32(DK[2]);
+ SIMD_32 K3 = SIMD_32(DK[3]);
+
+ while(blocks >= 4)
+ {
+ SIMD_32 A0 = SIMD_32::load_be(in );
+ SIMD_32 A1 = SIMD_32::load_be(in + 16);
+ SIMD_32 A2 = SIMD_32::load_be(in + 32);
+ SIMD_32 A3 = SIMD_32::load_be(in + 48);
+
+ SIMD_32::transpose(A0, A1, A2, A3);
+
+ for(u32bit i = 0; i != 16; ++i)
+ {
+ NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
+
+ A0 ^= SIMD_32(RC[16-i]);
+
+ A1.rotate_left(1);
+ A2.rotate_left(5);
+ A3.rotate_left(2);
+
+ NOK_SIMD_GAMMA(A0, A1, A2, A3);
+
+ A1.rotate_right(1);
+ A2.rotate_right(5);
+ A3.rotate_right(2);
+ }
+
+ NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3);
+ A0 ^= SIMD_32(RC[0]);
+
+ SIMD_32::transpose(A0, A1, A2, A3);
+
+ A0.store_be(out);
+ A1.store_be(out + 16);
+ A2.store_be(out + 32);
+ A3.store_be(out + 48);
+
+ in += 64;
+ out += 64;
+ blocks -= 4;
+ }
+
+ Noekeon::decrypt_n(in, out, blocks);
+ }
+
+}
diff --git a/src/block/noekeon_simd/noekeon_simd.h b/src/block/noekeon_simd/noekeon_simd.h
new file mode 100644
index 000000000..466c4b741
--- /dev/null
+++ b/src/block/noekeon_simd/noekeon_simd.h
@@ -0,0 +1,29 @@
+/*
+* Noekeon in SIMD
+* (C) 2010 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_NOEKEON_SIMD_H__
+#define BOTAN_NOEKEON_SIMD_H__
+
+#include <botan/noekeon.h>
+
+namespace Botan {
+
+/*
+* Noekeon
+*/
+class BOTAN_DLL Noekeon_SIMD : public Noekeon
+ {
+ public:
+ void encrypt_n(const byte in[], byte out[], u32bit blocks) const;
+ void decrypt_n(const byte in[], byte out[], u32bit blocks) const;
+
+ BlockCipher* clone() const { return new Noekeon_SIMD; }
+ };
+
+}
+
+#endif