diff options
author | lloyd <[email protected]> | 2010-01-12 16:15:29 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2010-01-12 16:15:29 +0000 |
commit | 118e42c730001fb3ef2f30a295b099c73bc99668 (patch) | |
tree | b4689813d0054dbc09703df60cd1e23e0649337f /src/block | |
parent | 3f9791ba4d118143c03a613280f53a29a6e1e993 (diff) |
Add SIMD version of Noekeon. On a Core2, about 2.7x faster using SIMD_SSE2
and 1.6x faster using SIMD_Scalar.
Diffstat (limited to 'src/block')
-rw-r--r-- | src/block/noekeon/noekeon.h | 2 | ||||
-rw-r--r-- | src/block/noekeon_simd/info.txt | 7 | ||||
-rw-r--r-- | src/block/noekeon_simd/noekeon_simd.cpp | 161 | ||||
-rw-r--r-- | src/block/noekeon_simd/noekeon_simd.h | 29 |
4 files changed, 198 insertions, 1 deletions
diff --git a/src/block/noekeon/noekeon.h b/src/block/noekeon/noekeon.h index 4532c1be2..22ef65342 100644 --- a/src/block/noekeon/noekeon.h +++ b/src/block/noekeon/noekeon.h @@ -26,7 +26,7 @@ class BOTAN_DLL Noekeon : public BlockCipher BlockCipher* clone() const { return new Noekeon; } Noekeon() : BlockCipher(16, 16) {} - private: + protected: void key_schedule(const byte[], u32bit); static const byte RC[17]; diff --git a/src/block/noekeon_simd/info.txt b/src/block/noekeon_simd/info.txt new file mode 100644 index 000000000..b73954cff --- /dev/null +++ b/src/block/noekeon_simd/info.txt @@ -0,0 +1,7 @@ +define NOEKEON_SIMD + +<requires> +noekeon +simd_32 +simd_engine +</requires> diff --git a/src/block/noekeon_simd/noekeon_simd.cpp b/src/block/noekeon_simd/noekeon_simd.cpp new file mode 100644 index 000000000..f44104901 --- /dev/null +++ b/src/block/noekeon_simd/noekeon_simd.cpp @@ -0,0 +1,161 @@ +/* +* Noekeon in SIMD +* (C) 2010 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include <botan/noekeon_simd.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +/* +* Noekeon's Theta Operation +*/ +#define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \ + do { \ + SIMD_32 T = A0 ^ A2; \ + T ^= rotate_left(T, 8) ^ rotate_right(T, 8); \ + A1 ^= T; \ + A3 ^= T; \ + \ + A0 ^= K0; \ + A1 ^= K1; \ + A2 ^= K2; \ + A3 ^= K3; \ + \ + T = A1 ^ A3; \ + T ^= rotate_left(T, 8) ^ rotate_right(T, 8); \ + A0 ^= T; \ + A2 ^= T; \ + } while(0) + +/* +* Noekeon's Gamma S-Box Layer +*/ +#define NOK_SIMD_GAMMA(A0, A1, A2, A3) \ + do \ + { \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + \ + SIMD_32 T = A3; \ + A3 = A0; \ + A0 = T; \ + \ + A2 ^= A0 ^ A1 ^ A3; \ + \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + } while(0) + +/* +* Noekeon Encryption +*/ +void Noekeon_SIMD::encrypt_n(const byte in[], byte out[], u32bit blocks) const + { + SIMD_32 K0 = SIMD_32(EK[0]); + SIMD_32 K1 = SIMD_32(EK[1]); + SIMD_32 K2 = SIMD_32(EK[2]); + SIMD_32 K3 = SIMD_32(EK[3]); + + while(blocks >= 4) + { + SIMD_32 A0 = SIMD_32::load_be(in ); + SIMD_32 A1 = SIMD_32::load_be(in + 16); + SIMD_32 A2 = SIMD_32::load_be(in + 32); + SIMD_32 A3 = SIMD_32::load_be(in + 48); + + SIMD_32::transpose(A0, A1, A2, A3); + + for(u32bit i = 0; i != 16; ++i) + { + A0 ^= SIMD_32(RC[i]); + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A1.rotate_left(1); + A2.rotate_left(5); + A3.rotate_left(2); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1.rotate_right(1); + A2.rotate_right(5); + A3.rotate_right(2); + } + + A0 ^= SIMD_32(RC[16]); + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + SIMD_32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + + in += 64; + out += 64; + blocks -= 4; + } + + Noekeon::encrypt_n(in, out, blocks); + } + +/* +* Noekeon Encryption +*/ +void Noekeon_SIMD::decrypt_n(const byte in[], byte out[], u32bit blocks) const + { + SIMD_32 K0 = SIMD_32(DK[0]); + SIMD_32 K1 = SIMD_32(DK[1]); + SIMD_32 K2 = SIMD_32(DK[2]); + SIMD_32 K3 = SIMD_32(DK[3]); + + while(blocks >= 4) + { + SIMD_32 A0 = SIMD_32::load_be(in ); + SIMD_32 A1 = SIMD_32::load_be(in + 16); + SIMD_32 A2 = SIMD_32::load_be(in + 32); + SIMD_32 A3 = SIMD_32::load_be(in + 48); + + SIMD_32::transpose(A0, A1, A2, A3); + + for(u32bit i = 0; i != 16; ++i) + { + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A0 ^= SIMD_32(RC[16-i]); + + A1.rotate_left(1); + A2.rotate_left(5); + A3.rotate_left(2); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1.rotate_right(1); + A2.rotate_right(5); + A3.rotate_right(2); + } + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + A0 ^= SIMD_32(RC[0]); + + SIMD_32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + + in += 64; + out += 64; + blocks -= 4; + } + + Noekeon::decrypt_n(in, out, blocks); + } + +} diff --git a/src/block/noekeon_simd/noekeon_simd.h b/src/block/noekeon_simd/noekeon_simd.h new file mode 100644 index 000000000..466c4b741 --- /dev/null +++ b/src/block/noekeon_simd/noekeon_simd.h @@ -0,0 +1,29 @@ +/* +* Noekeon in SIMD +* (C) 2010 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_NOEKEON_SIMD_H__ +#define BOTAN_NOEKEON_SIMD_H__ + +#include <botan/noekeon.h> + +namespace Botan { + +/* +* Noekeon +*/ +class BOTAN_DLL Noekeon_SIMD : public Noekeon + { + public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + + BlockCipher* clone() const { return new Noekeon_SIMD; } + }; + +} + +#endif |