diff options
author | lloyd <[email protected]> | 2009-10-29 01:40:55 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-10-29 01:40:55 +0000 |
commit | 78cb47ae1b43c51d6e91531f701ccf03fa8ea2c6 (patch) | |
tree | c71ff6ff187cc7ef15cf3c233ea7bdb43ef6f6df /src/block | |
parent | ee169027c6cd30923aa30a735eb801836ee593d2 (diff) | |
parent | 1bc4d2fb37c8f1e6e94a65ec67062826393dda7f (diff) |
propagate from branch 'net.randombit.botan' (head 8fb69dd1c599ada1008c4cab2a6d502cbcc468e0)
to branch 'net.randombit.botan.general-simd' (head c05c9a6d398659891fb8cca170ed514ea7e6476d)
Diffstat (limited to 'src/block')
-rw-r--r-- | src/block/serpent_simd/info.txt | 8 | ||||
-rw-r--r-- | src/block/serpent_simd/serp_simd.cpp (renamed from src/block/serpent_sse2/serp_sse2.cpp) | 154 | ||||
-rw-r--r-- | src/block/serpent_simd/serp_simd.h (renamed from src/block/serpent_sse2/serp_sse2.h) | 10 | ||||
-rw-r--r-- | src/block/serpent_simd/serp_simd_sbox.h | 426 | ||||
-rw-r--r-- | src/block/serpent_sse2/info.txt | 6 | ||||
-rw-r--r-- | src/block/serpent_sse2/serp_sse2_sbox.h | 434 | ||||
-rw-r--r-- | src/block/xtea/xtea.h | 2 | ||||
-rw-r--r-- | src/block/xtea_simd/info.txt | 16 | ||||
-rw-r--r-- | src/block/xtea_simd/xtea_simd.cpp | 124 | ||||
-rw-r--r-- | src/block/xtea_simd/xtea_simd.h | 28 |
10 files changed, 668 insertions, 540 deletions
diff --git a/src/block/serpent_simd/info.txt b/src/block/serpent_simd/info.txt new file mode 100644 index 000000000..5d9115a4d --- /dev/null +++ b/src/block/serpent_simd/info.txt @@ -0,0 +1,8 @@ +realname "Serpent (SIMD)" +define SERPENT_SIMD + +<requires> +serpent +simd_32 +simd_engine +</requires> diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_simd/serp_simd.cpp index c51bb69ab..b394b0c26 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_simd/serp_simd.cpp @@ -1,99 +1,71 @@ /* -* Serpent (SSE2) +* Serpent (SIMD) * (C) 2009 Jack Lloyd * * Distributed under the terms of the Botan license */ -#include <botan/serp_sse2.h> -#include <botan/serp_sse2_sbox.h> +#include <botan/serp_simd.h> +#include <botan/serp_simd_sbox.h> +#include <botan/simd_32.h> #include <botan/loadstor.h> -#include <emmintrin.h> namespace Botan { namespace { -#define key_xor(round, B0, B1, B2, B3) \ - do { \ - __m128i key = _mm_loadu_si128(keys + round); \ - B0 = _mm_xor_si128(B0, _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0))); \ - B1 = _mm_xor_si128(B1, _mm_shuffle_epi32(key, _MM_SHUFFLE(1,1,1,1))); \ - B2 = _mm_xor_si128(B2, _mm_shuffle_epi32(key, _MM_SHUFFLE(2,2,2,2))); \ - B3 = _mm_xor_si128(B3, _mm_shuffle_epi32(key, _MM_SHUFFLE(3,3,3,3))); \ +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + B0 ^= SIMD_32(keys[4*round ]); \ + B1 ^= SIMD_32(keys[4*round+1]); \ + B2 ^= SIMD_32(keys[4*round+2]); \ + B3 ^= SIMD_32(keys[4*round+3]); \ } while(0); /* * Serpent's linear transformations */ -#define rotate_left_m128(vec, rot) \ - _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot)) - -#define rotate_right_m128(vec, rot) \ - _mm_or_si128(_mm_srli_epi32(vec, rot), _mm_slli_epi32(vec, 32-rot)) - -#define transform(B0, B1, B2, B3) \ - do { \ - B0 = rotate_left_m128(B0, 13); \ - B2 = rotate_left_m128(B2, 3); \ - B1 = _mm_xor_si128(B1, _mm_xor_si128(B0, B2)); \ - B3 = _mm_xor_si128(B3, _mm_xor_si128(B2, _mm_slli_epi32(B0, 3))); \ - B1 = rotate_left_m128(B1, 1); \ - B3 = rotate_left_m128(B3, 7); \ - B0 = _mm_xor_si128(B0, _mm_xor_si128(B1, B3)); \ - B2 = _mm_xor_si128(B2, _mm_xor_si128(B3, _mm_slli_epi32(B1, 7))); \ - B0 = rotate_left_m128(B0, 5); \ - B2 = rotate_left_m128(B2, 22); \ +#define transform(B0, B1, B2, B3) \ + do { \ + B0.rotate_left(13); \ + B2.rotate_left(3); \ + B1 ^= B0 ^ B2; \ + B3 ^= B2 ^ (B0 << 3); \ + B1.rotate_left(1); \ + B3.rotate_left(7); \ + B0 ^= B1 ^ B3; \ + B2 ^= B3 ^ (B1 << 7); \ + B0.rotate_left(5); \ + B2.rotate_left(22); \ } while(0); -#define i_transform(B0, B1, B2, B3) \ - do { \ - B2 = rotate_right_m128(B2, 22); \ - B0 = rotate_right_m128(B0, 5); \ - B2 = _mm_xor_si128(B2, _mm_xor_si128(B3, _mm_slli_epi32(B1, 7))); \ - B0 = _mm_xor_si128(B0, _mm_xor_si128(B1, B3)); \ - B3 = rotate_right_m128(B3, 7); \ - B1 = rotate_right_m128(B1, 1); \ - B3 = _mm_xor_si128(B3, _mm_xor_si128(B2, _mm_slli_epi32(B0, 3))); \ - B1 = _mm_xor_si128(B1, _mm_xor_si128(B0, B2)); \ - B2 = rotate_right_m128(B2, 3); \ - B0 = rotate_right_m128(B0, 13); \ +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2.rotate_right(22); \ + B0.rotate_right(5); \ + B2 ^= B3 ^ (B1 << 7); \ + B0 ^= B1 ^ B3; \ + B3.rotate_right(7); \ + B1.rotate_right(1); \ + B3 ^= B2 ^ (B0 << 3); \ + B1 ^= B0 ^ B2; \ + B2.rotate_right(3); \ + B0.rotate_right(13); \ } while(0); /* -* 4x4 SSE2 integer matrix transpose -*/ -#define transpose(B0, B1, B2, B3) \ - do { \ - __m128i T0 = _mm_unpacklo_epi32(B0, B1); \ - __m128i T1 = _mm_unpacklo_epi32(B2, B3); \ - __m128i T2 = _mm_unpackhi_epi32(B0, B1); \ - __m128i T3 = _mm_unpackhi_epi32(B2, B3); \ - B0 = _mm_unpacklo_epi64(T0, T1); \ - B1 = _mm_unpackhi_epi64(T0, T1); \ - B2 = _mm_unpacklo_epi64(T2, T3); \ - B3 = _mm_unpackhi_epi64(T2, T3); \ - } while(0); - -/* -* SSE2 Serpent Encryption of 4 blocks in parallel +* SIMD Serpent Encryption of 4 blocks in parallel */ void serpent_encrypt_4(const byte in[64], byte out[64], - const u32bit keys_32[132]) + const u32bit keys[132]) { - const __m128i all_ones = _mm_set1_epi8(0xFF); + SIMD_32 B0 = SIMD_32::load_le(in); + SIMD_32 B1 = SIMD_32::load_le(in + 16); + SIMD_32 B2 = SIMD_32::load_le(in + 32); + SIMD_32 B3 = SIMD_32::load_le(in + 48); - const __m128i* keys = (const __m128i*)(keys_32); - __m128i* out_mm = (__m128i*)(out); - __m128i* in_mm = (__m128i*)(in); - - __m128i B0 = _mm_loadu_si128(in_mm); - __m128i B1 = _mm_loadu_si128(in_mm + 1); - __m128i B2 = _mm_loadu_si128(in_mm + 2); - __m128i B3 = _mm_loadu_si128(in_mm + 3); - - transpose(B0, B1, B2, B3); + SIMD_32::transpose(B0, B1, B2, B3); key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -131,33 +103,27 @@ void serpent_encrypt_4(const byte in[64], key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); - transpose(B0, B1, B2, B3); + SIMD_32::transpose(B0, B1, B2, B3); - _mm_storeu_si128(out_mm , B0); - _mm_storeu_si128(out_mm + 1, B1); - _mm_storeu_si128(out_mm + 2, B2); - _mm_storeu_si128(out_mm + 3, B3); + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); } /* -* SSE2 Serpent Decryption of 4 blocks in parallel +* SIMD Serpent Decryption of 4 blocks in parallel */ void serpent_decrypt_4(const byte in[64], byte out[64], - const u32bit keys_32[132]) + const u32bit keys[132]) { - const __m128i all_ones = _mm_set1_epi8(0xFF); - - const __m128i* keys = (const __m128i*)(keys_32); - __m128i* out_mm = (__m128i*)(out); - __m128i* in_mm = (__m128i*)(in); - - __m128i B0 = _mm_loadu_si128(in_mm); - __m128i B1 = _mm_loadu_si128(in_mm + 1); - __m128i B2 = _mm_loadu_si128(in_mm + 2); - __m128i B3 = _mm_loadu_si128(in_mm + 3); + SIMD_32 B0 = SIMD_32::load_le(in); + SIMD_32 B1 = SIMD_32::load_le(in + 16); + SIMD_32 B2 = SIMD_32::load_le(in + 32); + SIMD_32 B3 = SIMD_32::load_le(in + 48); - transpose(B0, B1, B2, B3); + SIMD_32::transpose(B0, B1, B2, B3); key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); @@ -195,12 +161,12 @@ void serpent_decrypt_4(const byte in[64], i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); - transpose(B0, B1, B2, B3); + SIMD_32::transpose(B0, B1, B2, B3); - _mm_storeu_si128(out_mm , B0); - _mm_storeu_si128(out_mm + 1, B1); - _mm_storeu_si128(out_mm + 2, B2); - _mm_storeu_si128(out_mm + 3, B3); + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); } } @@ -208,7 +174,7 @@ void serpent_decrypt_4(const byte in[64], /* * Serpent Encryption */ -void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const +void Serpent_SIMD::encrypt_n(const byte in[], byte out[], u32bit blocks) const { while(blocks >= 4) { @@ -224,7 +190,7 @@ void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const /* * Serpent Decryption */ -void Serpent_SSE2::decrypt_n(const byte in[], byte out[], u32bit blocks) const +void Serpent_SIMD::decrypt_n(const byte in[], byte out[], u32bit blocks) const { while(blocks >= 4) { diff --git a/src/block/serpent_sse2/serp_sse2.h b/src/block/serpent_simd/serp_simd.h index f1e5c2028..1ecb70159 100644 --- a/src/block/serpent_sse2/serp_sse2.h +++ b/src/block/serpent_simd/serp_simd.h @@ -1,12 +1,12 @@ /* -* Serpent (SSE2) +* Serpent (SIMD) * (C) 2009 Jack Lloyd * * Distributed under the terms of the Botan license */ -#ifndef BOTAN_SERPENT_SSE2_H__ -#define BOTAN_SERPENT_SSE2_H__ +#ifndef BOTAN_SERPENT_SIMD_H__ +#define BOTAN_SERPENT_SIMD_H__ #include <botan/serpent.h> @@ -15,13 +15,13 @@ namespace Botan { /* * Serpent */ -class BOTAN_DLL Serpent_SSE2 : public Serpent +class BOTAN_DLL Serpent_SIMD : public Serpent { public: void encrypt_n(const byte in[], byte out[], u32bit blocks) const; void decrypt_n(const byte in[], byte out[], u32bit blocks) const; - BlockCipher* clone() const { return new Serpent_SSE2; } + BlockCipher* clone() const { return new Serpent_SIMD; } }; } diff --git a/src/block/serpent_simd/serp_simd_sbox.h b/src/block/serpent_simd/serp_simd_sbox.h new file mode 100644 index 000000000..6e3da7359 --- /dev/null +++ b/src/block/serpent_simd/serp_simd_sbox.h @@ -0,0 +1,426 @@ +/* +* Serpent Sboxes in SIMD form +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef SERPENT_SIMD_SBOXES_H__ +#define SERPENT_SIMD_SBOXES_H__ + +#define SBoxE1(B0, B1, B2, B3) \ + do { \ + B3 ^= B0; \ + SIMD_32 B4 = B1; \ + B1 &= B3; \ + B4 ^= B2; \ + B1 ^= B0; \ + B0 |= B3; \ + B0 ^= B4; \ + B4 ^= B3; \ + B3 ^= B2; \ + B2 |= B1; \ + B2 ^= B4; \ + B4 = ~B4; \ + B4 |= B1; \ + B1 ^= B3; \ + B1 ^= B4; \ + B3 |= B0; \ + B1 ^= B3; \ + B4 ^= B3; \ + B3 = B0; \ + B0 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxE2(B0, B1, B2, B3) \ + do { \ + B0 = ~B0; \ + B2 = ~B2; \ + SIMD_32 B4 = B0; \ + B0 &= B1; \ + B2 ^= B0; \ + B0 |= B3; \ + B3 ^= B2; \ + B1 ^= B0; \ + B0 ^= B4; \ + B4 |= B1; \ + B1 ^= B3; \ + B2 |= B0; \ + B2 &= B4; \ + B0 ^= B1; \ + B1 &= B2; \ + B1 ^= B0; \ + B0 &= B2; \ + B4 ^= B0; \ + B0 = B2; \ + B2 = B3; \ + B3 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxE3(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B0; \ + B0 &= B2; \ + B0 ^= B3; \ + B2 ^= B1; \ + B2 ^= B0; \ + B3 |= B4; \ + B3 ^= B1; \ + B4 ^= B2; \ + B1 = B3; \ + B3 |= B4; \ + B3 ^= B0; \ + B0 &= B1; \ + B4 ^= B0; \ + B1 ^= B3; \ + B1 ^= B4; \ + B4 = ~B4; \ + B0 = B2; \ + B2 = B1; \ + B1 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxE4(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B0; \ + B0 |= B3; \ + B3 ^= B1; \ + B1 &= B4; \ + B4 ^= B2; \ + B2 ^= B3; \ + B3 &= B0; \ + B4 |= B1; \ + B3 ^= B4; \ + B0 ^= B1; \ + B4 &= B0; \ + B1 ^= B3; \ + B4 ^= B2; \ + B1 |= B0; \ + B1 ^= B2; \ + B0 ^= B3; \ + B2 = B1; \ + B1 |= B3; \ + B0 ^= B1; \ + B1 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxE5(B0, B1, B2, B3) \ + do { \ + B1 ^= B3; \ + B3 = ~B3; \ + B2 ^= B3; \ + B3 ^= B0; \ + SIMD_32 B4 = B1; \ + B1 &= B3; \ + B1 ^= B2; \ + B4 ^= B3; \ + B0 ^= B4; \ + B2 &= B4; \ + B2 ^= B0; \ + B0 &= B1; \ + B3 ^= B0; \ + B4 |= B1; \ + B4 ^= B0; \ + B0 |= B3; \ + B0 ^= B2; \ + B2 &= B3; \ + B0 = ~B0; \ + B4 ^= B2; \ + B2 = B0; \ + B0 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxE6(B0, B1, B2, B3) \ + do { \ + B0 ^= B1; \ + B1 ^= B3; \ + B3 = ~B3; \ + SIMD_32 B4 = B1; \ + B1 &= B0; \ + B2 ^= B3; \ + B1 ^= B2; \ + B2 |= B4; \ + B4 ^= B3; \ + B3 &= B1; \ + B3 ^= B0; \ + B4 ^= B1; \ + B4 ^= B2; \ + B2 ^= B0; \ + B0 &= B3; \ + B2 = ~B2; \ + B0 ^= B4; \ + B4 |= B3; \ + B4 ^= B2; \ + B2 = B0; \ + B0 = B1; \ + B1 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxE7(B0, B1, B2, B3) \ + do { \ + B2 = ~B2; \ + SIMD_32 B4 = B3; \ + B3 &= B0; \ + B0 ^= B4; \ + B3 ^= B2; \ + B2 |= B4; \ + B1 ^= B3; \ + B2 ^= B0; \ + B0 |= B1; \ + B2 ^= B1; \ + B4 ^= B0; \ + B0 |= B3; \ + B0 ^= B2; \ + B4 ^= B3; \ + B4 ^= B0; \ + B3 = ~B3; \ + B2 &= B4; \ + B3 ^= B2; \ + B2 = B4; \ + } while(0); + +#define SBoxE8(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B1; \ + B1 |= B2; \ + B1 ^= B3; \ + B4 ^= B2; \ + B2 ^= B1; \ + B3 |= B4; \ + B3 &= B0; \ + B4 ^= B2; \ + B3 ^= B1; \ + B1 |= B4; \ + B1 ^= B0; \ + B0 |= B4; \ + B0 ^= B2; \ + B1 ^= B4; \ + B2 ^= B1; \ + B1 &= B0; \ + B1 ^= B4; \ + B2 = ~B2; \ + B2 |= B0; \ + B4 ^= B2; \ + B2 = B1; \ + B1 = B3; \ + B3 = B0; \ + B0 = B4; \ + } while(0); + +#define SBoxD1(B0, B1, B2, B3) \ + do { \ + B2 = ~B2; \ + SIMD_32 B4 = B1; \ + B1 |= B0; \ + B4 = ~B4; \ + B1 ^= B2; \ + B2 |= B4; \ + B1 ^= B3; \ + B0 ^= B4; \ + B2 ^= B0; \ + B0 &= B3; \ + B4 ^= B0; \ + B0 |= B1; \ + B0 ^= B2; \ + B3 ^= B4; \ + B2 ^= B1; \ + B3 ^= B0; \ + B3 ^= B1; \ + B2 &= B3; \ + B4 ^= B2; \ + B2 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxD2(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B1; \ + B1 ^= B3; \ + B3 &= B1; \ + B4 ^= B2; \ + B3 ^= B0; \ + B0 |= B1; \ + B2 ^= B3; \ + B0 ^= B4; \ + B0 |= B2; \ + B1 ^= B3; \ + B0 ^= B1; \ + B1 |= B3; \ + B1 ^= B0; \ + B4 = ~B4; \ + B4 ^= B1; \ + B1 |= B0; \ + B1 ^= B0; \ + B1 |= B4; \ + B3 ^= B1; \ + B1 = B0; \ + B0 = B4; \ + B4 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD3(B0, B1, B2, B3) \ + do { \ + B2 ^= B3; \ + B3 ^= B0; \ + SIMD_32 B4 = B3; \ + B3 &= B2; \ + B3 ^= B1; \ + B1 |= B2; \ + B1 ^= B4; \ + B4 &= B3; \ + B2 ^= B3; \ + B4 &= B0; \ + B4 ^= B2; \ + B2 &= B1; \ + B2 |= B0; \ + B3 = ~B3; \ + B2 ^= B3; \ + B0 ^= B3; \ + B0 &= B1; \ + B3 ^= B4; \ + B3 ^= B0; \ + B0 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxD4(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B2; \ + B2 ^= B1; \ + B0 ^= B2; \ + B4 &= B2; \ + B4 ^= B0; \ + B0 &= B1; \ + B1 ^= B3; \ + B3 |= B4; \ + B2 ^= B3; \ + B0 ^= B3; \ + B1 ^= B4; \ + B3 &= B2; \ + B3 ^= B1; \ + B1 ^= B0; \ + B1 |= B2; \ + B0 ^= B3; \ + B1 ^= B4; \ + B0 ^= B1; \ + B4 = B0; \ + B0 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD5(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B2; \ + B2 &= B3; \ + B2 ^= B1; \ + B1 |= B3; \ + B1 &= B0; \ + B4 ^= B2; \ + B4 ^= B1; \ + B1 &= B2; \ + B0 = ~B0; \ + B3 ^= B4; \ + B1 ^= B3; \ + B3 &= B0; \ + B3 ^= B2; \ + B0 ^= B1; \ + B2 &= B0; \ + B3 ^= B0; \ + B2 ^= B4; \ + B2 |= B3; \ + B3 ^= B0; \ + B2 ^= B1; \ + B1 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD6(B0, B1, B2, B3) \ + do { \ + B1 = ~B1; \ + SIMD_32 B4 = B3; \ + B2 ^= B1; \ + B3 |= B0; \ + B3 ^= B2; \ + B2 |= B1; \ + B2 &= B0; \ + B4 ^= B3; \ + B2 ^= B4; \ + B4 |= B0; \ + B4 ^= B1; \ + B1 &= B2; \ + B1 ^= B3; \ + B4 ^= B2; \ + B3 &= B4; \ + B4 ^= B1; \ + B3 ^= B4; \ + B4 = ~B4; \ + B3 ^= B0; \ + B0 = B1; \ + B1 = B4; \ + B4 = B3; \ + B3 = B2; \ + B2 = B4; \ + } while(0); + +#define SBoxD7(B0, B1, B2, B3) \ + do { \ + B0 ^= B2; \ + SIMD_32 B4 = B2; \ + B2 &= B0; \ + B4 ^= B3; \ + B2 = ~B2; \ + B3 ^= B1; \ + B2 ^= B3; \ + B4 |= B0; \ + B0 ^= B2; \ + B3 ^= B4; \ + B4 ^= B1; \ + B1 &= B3; \ + B1 ^= B0; \ + B0 ^= B3; \ + B0 |= B2; \ + B3 ^= B1; \ + B4 ^= B0; \ + B0 = B1; \ + B1 = B2; \ + B2 = B4; \ + } while(0); + +#define SBoxD8(B0, B1, B2, B3) \ + do { \ + SIMD_32 B4 = B2; \ + B2 ^= B0; \ + B0 &= B3; \ + B4 |= B3; \ + B2 = ~B2; \ + B3 ^= B1; \ + B1 |= B0; \ + B0 ^= B2; \ + B2 &= B4; \ + B3 &= B4; \ + B1 ^= B2; \ + B2 ^= B0; \ + B0 |= B2; \ + B4 ^= B1; \ + B0 ^= B3; \ + B3 ^= B4; \ + B4 |= B0; \ + B3 ^= B2; \ + B4 ^= B2; \ + B2 = B1; \ + B1 = B0; \ + B0 = B3; \ + B3 = B4; \ + } while(0); + +#endif diff --git a/src/block/serpent_sse2/info.txt b/src/block/serpent_sse2/info.txt deleted file mode 100644 index a4ec561a8..000000000 --- a/src/block/serpent_sse2/info.txt +++ /dev/null @@ -1,6 +0,0 @@ -define SERPENT_SSE2 - -<requires> -serpent -sse2_eng -</requires> diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h deleted file mode 100644 index 40c552e87..000000000 --- a/src/block/serpent_sse2/serp_sse2_sbox.h +++ /dev/null @@ -1,434 +0,0 @@ -/* -* Serpent Sboxes in SSE2 form -* (C) 2009 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef SERPENT_SSE2_SBOXES_H__ -#define SERPENT_SSE2_SBOXES_H__ - -#define SBoxE1(B0, B1, B2, B3) \ - do { \ - B3 = _mm_xor_si128(B3, B0); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = _mm_or_si128(B2, B1); \ - B2 = _mm_xor_si128(B2, B4); \ - B4 = _mm_xor_si128(B4, all_ones); \ - B4 = _mm_or_si128(B4, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B3 = _mm_or_si128(B3, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = B0; \ - B0 = B1; \ - B1 = B4; \ - } while(0); - -#define SBoxE2(B0, B1, B2, B3) \ - do { \ - B0 = _mm_xor_si128(B0, all_ones); \ - B2 = _mm_xor_si128(B2, all_ones); \ - __m128i B4 = B0; \ - B0 = _mm_and_si128(B0, B1); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B3 = _mm_xor_si128(B3, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_or_si128(B4, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B2 = _mm_or_si128(B2, B0); \ - B2 = _mm_and_si128(B2, B4); \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = _mm_and_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_and_si128(B0, B2); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = B2; \ - B2 = B3; \ - B3 = B1; \ - B1 = B4; \ - } while(0); - -#define SBoxE3(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B0; \ - B0 = _mm_and_si128(B0, B2); \ - B0 = _mm_xor_si128(B0, B3); \ - B2 = _mm_xor_si128(B2, B1); \ - B2 = _mm_xor_si128(B2, B0); \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_xor_si128(B3, B1); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = B3; \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_xor_si128(B3, B0); \ - B0 = _mm_and_si128(B0, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B4 = _mm_xor_si128(B4, all_ones); \ - B0 = B2; \ - B2 = B1; \ - B1 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxE4(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B0; \ - B0 = _mm_or_si128(B0, B3); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_and_si128(B1, B4); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B3); \ - B3 = _mm_and_si128(B3, B0); \ - B4 = _mm_or_si128(B4, B1); \ - B3 = _mm_xor_si128(B3, B4); \ - B0 = _mm_xor_si128(B0, B1); \ - B4 = _mm_and_si128(B4, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = _mm_or_si128(B1, B0); \ - B1 = _mm_xor_si128(B1, B2); \ - B0 = _mm_xor_si128(B0, B3); \ - B2 = B1; \ - B1 = _mm_or_si128(B1, B3); \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = B2; \ - B2 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxE5(B0, B1, B2, B3) \ - do { \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_xor_si128(B3, all_ones); \ - B2 = _mm_xor_si128(B2, B3); \ - B3 = _mm_xor_si128(B3, B0); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B2); \ - B4 = _mm_xor_si128(B4, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B2 = _mm_and_si128(B2, B4); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B1); \ - B3 = _mm_xor_si128(B3, B0); \ - B4 = _mm_or_si128(B4, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B2); \ - B2 = _mm_and_si128(B2, B3); \ - B0 = _mm_xor_si128(B0, all_ones); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B0; \ - B0 = B1; \ - B1 = B4; \ - } while(0); - -#define SBoxE6(B0, B1, B2, B3) \ - do { \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_xor_si128(B3, all_ones); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B0); \ - B2 = _mm_xor_si128(B2, B3); \ - B1 = _mm_xor_si128(B1, B2); \ - B2 = _mm_or_si128(B2, B4); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = _mm_and_si128(B3, B1); \ - B3 = _mm_xor_si128(B3, B0); \ - B4 = _mm_xor_si128(B4, B1); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B3); \ - B2 = _mm_xor_si128(B2, all_ones); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_or_si128(B4, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B0; \ - B0 = B1; \ - B1 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxE7(B0, B1, B2, B3) \ - do { \ - B2 = _mm_xor_si128(B2, all_ones); \ - __m128i B4 = B3; \ - B3 = _mm_and_si128(B3, B0); \ - B0 = _mm_xor_si128(B0, B4); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = _mm_or_si128(B2, B4); \ - B1 = _mm_xor_si128(B1, B3); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_or_si128(B0, B1); \ - B2 = _mm_xor_si128(B2, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B2); \ - B4 = _mm_xor_si128(B4, B3); \ - B4 = _mm_xor_si128(B4, B0); \ - B3 = _mm_xor_si128(B3, all_ones); \ - B2 = _mm_and_si128(B2, B4); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = B4; \ - } while(0); - -#define SBoxE8(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B1; \ - B1 = _mm_or_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B1); \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_and_si128(B3, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_or_si128(B1, B4); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_or_si128(B0, B4); \ - B0 = _mm_xor_si128(B0, B2); \ - B1 = _mm_xor_si128(B1, B4); \ - B2 = _mm_xor_si128(B2, B1); \ - B1 = _mm_and_si128(B1, B0); \ - B1 = _mm_xor_si128(B1, B4); \ - B2 = _mm_xor_si128(B2, all_ones); \ - B2 = _mm_or_si128(B2, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B1; \ - B1 = B3; \ - B3 = B0; \ - B0 = B4; \ - } while(0); - -#define SBoxD1(B0, B1, B2, B3) \ - do \ - { \ - B2 = _mm_xor_si128(B2, all_ones); \ - __m128i B4 = B1; \ - B1 = _mm_or_si128(B1, B0); \ - B4 = _mm_xor_si128(B4, all_ones); \ - B1 = _mm_xor_si128(B1, B2); \ - B2 = _mm_or_si128(B2, B4); \ - B1 = _mm_xor_si128(B1, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B3); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_or_si128(B0, B1); \ - B0 = _mm_xor_si128(B0, B2); \ - B3 = _mm_xor_si128(B3, B4); \ - B2 = _mm_xor_si128(B2, B1); \ - B3 = _mm_xor_si128(B3, B0); \ - B3 = _mm_xor_si128(B3, B1); \ - B2 = _mm_and_si128(B2, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B1; \ - B1 = B4; \ - } while(0); - -#define SBoxD2(B0, B1, B2, B3) \ - do \ - { \ - __m128i B4 = B1; \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_and_si128(B3, B1); \ - B4 = _mm_xor_si128(B4, B2); \ - B3 = _mm_xor_si128(B3, B0); \ - B0 = _mm_or_si128(B0, B1); \ - B2 = _mm_xor_si128(B2, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B0 = _mm_or_si128(B0, B2); \ - B1 = _mm_xor_si128(B1, B3); \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = _mm_or_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B0); \ - B4 = _mm_xor_si128(B4, all_ones); \ - B4 = _mm_xor_si128(B4, B1); \ - B1 = _mm_or_si128(B1, B0); \ - B1 = _mm_xor_si128(B1, B0); \ - B1 = _mm_or_si128(B1, B4); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = B0; \ - B0 = B4; \ - B4 = B2; \ - B2 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxD3(B0, B1, B2, B3) \ - do \ - { \ - B2 = _mm_xor_si128(B2, B3); \ - B3 = _mm_xor_si128(B3, B0); \ - __m128i B4 = B3; \ - B3 = _mm_and_si128(B3, B2); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_or_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B4); \ - B4 = _mm_and_si128(B4, B3); \ - B2 = _mm_xor_si128(B2, B3); \ - B4 = _mm_and_si128(B4, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_and_si128(B2, B1); \ - B2 = _mm_or_si128(B2, B0); \ - B3 = _mm_xor_si128(B3, all_ones); \ - B2 = _mm_xor_si128(B2, B3); \ - B0 = _mm_xor_si128(B0, B3); \ - B0 = _mm_and_si128(B0, B1); \ - B3 = _mm_xor_si128(B3, B4); \ - B3 = _mm_xor_si128(B3, B0); \ - B0 = B1; \ - B1 = B4; \ - } while(0); - -#define SBoxD4(B0, B1, B2, B3) \ - do \ - { \ - __m128i B4 = B2; \ - B2 = _mm_xor_si128(B2, B1); \ - B0 = _mm_xor_si128(B0, B2); \ - B4 = _mm_and_si128(B4, B2); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_and_si128(B0, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_or_si128(B3, B4); \ - B2 = _mm_xor_si128(B2, B3); \ - B0 = _mm_xor_si128(B0, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B3 = _mm_and_si128(B3, B2); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_xor_si128(B1, B0); \ - B1 = _mm_or_si128(B1, B2); \ - B0 = _mm_xor_si128(B0, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B0 = _mm_xor_si128(B0, B1); \ - B4 = B0; \ - B0 = B2; \ - B2 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxD5(B0, B1, B2, B3) \ - do \ - { \ - __m128i B4 = B2; \ - B2 = _mm_and_si128(B2, B3); \ - B2 = _mm_xor_si128(B2, B1); \ - B1 = _mm_or_si128(B1, B3); \ - B1 = _mm_and_si128(B1, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B4 = _mm_xor_si128(B4, B1); \ - B1 = _mm_and_si128(B1, B2); \ - B0 = _mm_xor_si128(B0, all_ones); \ - B3 = _mm_xor_si128(B3, B4); \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_and_si128(B3, B0); \ - B3 = _mm_xor_si128(B3, B2); \ - B0 = _mm_xor_si128(B0, B1); \ - B2 = _mm_and_si128(B2, B0); \ - B3 = _mm_xor_si128(B3, B0); \ - B2 = _mm_xor_si128(B2, B4); \ - B2 = _mm_or_si128(B2, B3); \ - B3 = _mm_xor_si128(B3, B0); \ - B2 = _mm_xor_si128(B2, B1); \ - B1 = B3; \ - B3 = B4; \ - } while(0); - -#define SBoxD6(B0, B1, B2, B3) \ - do \ - { \ - B1 = _mm_xor_si128(B1, all_ones); \ - __m128i B4 = B3; \ - B2 = _mm_xor_si128(B2, B1); \ - B3 = _mm_or_si128(B3, B0); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = _mm_or_si128(B2, B1); \ - B2 = _mm_and_si128(B2, B0); \ - B4 = _mm_xor_si128(B4, B3); \ - B2 = _mm_xor_si128(B2, B4); \ - B4 = _mm_or_si128(B4, B0); \ - B4 = _mm_xor_si128(B4, B1); \ - B1 = _mm_and_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B3 = _mm_and_si128(B3, B4); \ - B4 = _mm_xor_si128(B4, B1); \ - B3 = _mm_xor_si128(B3, B4); \ - B4 = _mm_xor_si128(B4, all_ones); \ - B3 = _mm_xor_si128(B3, B0); \ - B0 = B1; \ - B1 = B4; \ - B4 = B3; \ - B3 = B2; \ - B2 = B4; \ - } while(0); - -#define SBoxD7(B0, B1, B2, B3) \ - do \ - { \ - B0 = _mm_xor_si128(B0, B2); \ - __m128i B4 = B2; \ - B2 = _mm_and_si128(B2, B0); \ - B4 = _mm_xor_si128(B4, B3); \ - B2 = _mm_xor_si128(B2, all_ones); \ - B3 = _mm_xor_si128(B3, B1); \ - B2 = _mm_xor_si128(B2, B3); \ - B4 = _mm_or_si128(B4, B0); \ - B0 = _mm_xor_si128(B0, B2); \ - B3 = _mm_xor_si128(B3, B4); \ - B4 = _mm_xor_si128(B4, B1); \ - B1 = _mm_and_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_xor_si128(B0, B3); \ - B0 = _mm_or_si128(B0, B2); \ - B3 = _mm_xor_si128(B3, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = B1; \ - B1 = B2; \ - B2 = B4; \ - } while(0); - -#define SBoxD8(B0, B1, B2, B3) \ - do \ - { \ - __m128i B4 = B2; \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B3); \ - B4 = _mm_or_si128(B4, B3); \ - B2 = _mm_xor_si128(B2, all_ones); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_or_si128(B1, B0); \ - B0 = _mm_xor_si128(B0, B2); \ - B2 = _mm_and_si128(B2, B4); \ - B3 = _mm_and_si128(B3, B4); \ - B1 = _mm_xor_si128(B1, B2); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_or_si128(B0, B2); \ - B4 = _mm_xor_si128(B4, B1); \ - B0 = _mm_xor_si128(B0, B3); \ - B3 = _mm_xor_si128(B3, B4); \ - B4 = _mm_or_si128(B4, B0); \ - B3 = _mm_xor_si128(B3, B2); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B1; \ - B1 = B0; \ - B0 = B3; \ - B3 = B4; \ - } while(0); - -#endif diff --git a/src/block/xtea/xtea.h b/src/block/xtea/xtea.h index f3b554edb..9982d0712 100644 --- a/src/block/xtea/xtea.h +++ b/src/block/xtea/xtea.h @@ -26,7 +26,7 @@ class BOTAN_DLL XTEA : public BlockCipher BlockCipher* clone() const { return new XTEA; } XTEA() : BlockCipher(8, 16) {} - private: + protected: void key_schedule(const byte[], u32bit); SecureBuffer<u32bit, 64> EK; }; diff --git a/src/block/xtea_simd/info.txt b/src/block/xtea_simd/info.txt new file mode 100644 index 000000000..0cdce14ef --- /dev/null +++ b/src/block/xtea_simd/info.txt @@ -0,0 +1,16 @@ +realname "XTEA (SIMD)" + +define XTEA_SIMD + +load_on auto + +<add> +xtea_simd.cpp +xtea_simd.h +</add> + +<requires> +xtea +simd_32 +simd_engine +</requires> diff --git a/src/block/xtea_simd/xtea_simd.cpp b/src/block/xtea_simd/xtea_simd.cpp new file mode 100644 index 000000000..6151c355c --- /dev/null +++ b/src/block/xtea_simd/xtea_simd.cpp @@ -0,0 +1,124 @@ +/* +* XTEA in SIMD +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include <botan/xtea_simd.h> +#include <botan/loadstor.h> +#include <botan/simd_32.h> + +namespace Botan { + +namespace { + +void xtea_encrypt_8(const byte in[64], byte out[64], const u32bit EK[64]) + { + SIMD_32 L0 = SIMD_32::load_be(in ); + SIMD_32 R0 = SIMD_32::load_be(in + 16); + SIMD_32 L1 = SIMD_32::load_be(in + 32); + SIMD_32 R1 = SIMD_32::load_be(in + 48); + + SIMD_32::transpose(L0, R0, L1, R1); + + for(u32bit i = 0; i != 32; i += 2) + { + SIMD_32 K0(EK[2*i ]); + SIMD_32 K1(EK[2*i+1]); + SIMD_32 K2(EK[2*i+2]); + SIMD_32 K3(EK[2*i+3]); + + L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K0; + L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K0; + + R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K1; + R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K1; + + L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K2; + L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K2; + + R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K3; + R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K3; + } + + SIMD_32::transpose(L0, R0, L1, R1); + + L0.store_be(out); + R0.store_be(out + 16); + L1.store_be(out + 32); + R1.store_be(out + 48); + } + +void xtea_decrypt_8(const byte in[64], byte out[64], const u32bit EK[64]) + { + SIMD_32 L0 = SIMD_32::load_be(in ); + SIMD_32 R0 = SIMD_32::load_be(in + 16); + SIMD_32 L1 = SIMD_32::load_be(in + 32); + SIMD_32 R1 = SIMD_32::load_be(in + 48); + + SIMD_32::transpose(L0, R0, L1, R1); + + for(u32bit i = 0; i != 32; i += 2) + { + SIMD_32 K0(EK[63 - 2*i]); + SIMD_32 K1(EK[62 - 2*i]); + SIMD_32 K2(EK[61 - 2*i]); + SIMD_32 K3(EK[60 - 2*i]); + + R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K0; + R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K0; + + L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K1; + L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K1; + + R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K2; + R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K2; + + L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K3; + L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K3; + } + + SIMD_32::transpose(L0, R0, L1, R1); + + L0.store_be(out); + R0.store_be(out + 16); + L1.store_be(out + 32); + R1.store_be(out + 48); + } + +} + +/* +* XTEA Encryption +*/ +void XTEA_SIMD::encrypt_n(const byte in[], byte out[], u32bit blocks) const + { + while(blocks >= 8) + { + xtea_encrypt_8(in, out, this->EK); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + + XTEA::encrypt_n(in, out, blocks); + } + +/* +* XTEA Decryption +*/ +void XTEA_SIMD::decrypt_n(const byte in[], byte out[], u32bit blocks) const + { + while(blocks >= 8) + { + xtea_decrypt_8(in, out, this->EK); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + + XTEA::decrypt_n(in, out, blocks); + } + +} diff --git a/src/block/xtea_simd/xtea_simd.h b/src/block/xtea_simd/xtea_simd.h new file mode 100644 index 000000000..e4ce734ed --- /dev/null +++ b/src/block/xtea_simd/xtea_simd.h @@ -0,0 +1,28 @@ +/* +* XTEA in SIMD +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_XTEA_SIMD_H__ +#define BOTAN_XTEA_SIMD_H__ + +#include <botan/xtea.h> + +namespace Botan { + +/* +* XTEA (SIMD variant) +*/ +class BOTAN_DLL XTEA_SIMD : public XTEA + { + public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + BlockCipher* clone() const { return new XTEA_SIMD; } + }; + +} + +#endif |