From f51841ba5237952dda3e76df643d3ae13bed3df5 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 11 Aug 2009 02:31:17 +0000 Subject: Change the BlockCipher interface to support multi-block encryption and decryption. Currently only used for counter mode. Doesn't offer much advantage as-is (though might help slightly, in terms of cache effects), but allows for SIMD implementations to process multiple blocks in parallel when possible. Particularly thinking here of Serpent; TEA/XTEA also seem promising in this sense, as is Threefish once that is implemented as a standalone block cipher. --- src/block/aes/aes.cpp | 266 ++++++++++++++++++----------------- src/block/aes/aes.h | 6 +- src/block/block_cipher.h | 20 +-- src/block/blowfish/blowfish.cpp | 70 +++++---- src/block/blowfish/blowfish.h | 6 +- src/block/cast/cast128.cpp | 100 +++++++------ src/block/cast/cast128.h | 4 +- src/block/cast/cast256.cpp | 140 +++++++++--------- src/block/cast/cast256.h | 4 +- src/block/des/des.cpp | 144 +++++++++++-------- src/block/des/des.h | 8 +- src/block/des/desx.cpp | 28 ++-- src/block/des/desx.h | 4 +- src/block/gost_28147/gost_28147.cpp | 67 +++++---- src/block/gost_28147/gost_28147.h | 4 +- src/block/idea/idea.cpp | 132 +++++++++-------- src/block/idea/idea.h | 4 +- src/block/kasumi/kasumi.cpp | 110 ++++++++------- src/block/kasumi/kasumi.h | 4 +- src/block/lion/lion.cpp | 56 +++++--- src/block/lion/lion.h | 4 +- src/block/lubyrack/lubyrack.cpp | 104 ++++++++------ src/block/lubyrack/lubyrack.h | 4 +- src/block/mars/mars.cpp | 138 +++++++++--------- src/block/mars/mars.h | 4 +- src/block/misty1/misty1.cpp | 130 +++++++++-------- src/block/misty1/misty1.h | 4 +- src/block/noekeon/noekeon.cpp | 92 ++++++------ src/block/noekeon/noekeon.h | 4 +- src/block/rc2/rc2.cpp | 104 ++++++++------ src/block/rc2/rc2.h | 4 +- src/block/rc5/rc5.cpp | 72 ++++++---- src/block/rc5/rc5.h | 4 +- src/block/rc6/rc6.cpp | 140 +++++++++--------- src/block/rc6/rc6.h | 4 +- src/block/safer/safer_sk.cpp | 97 ++++++++----- src/block/safer/safer_sk.h | 4 +- src/block/seed/seed.cpp | 116 ++++++++------- src/block/seed/seed.h | 4 +- src/block/serpent/serpent.cpp | 172 +++++++++++----------- src/block/serpent/serpent.h | 4 +- src/block/serpent_ia32/serp_ia32.cpp | 18 ++- src/block/serpent_ia32/serp_ia32.h | 4 +- src/block/skipjack/skipjack.cpp | 68 +++++---- src/block/skipjack/skipjack.h | 4 +- src/block/square/square.cpp | 192 ++++++++++++++----------- src/block/square/square.h | 4 +- src/block/tea/tea.cpp | 52 ++++--- src/block/tea/tea.h | 4 +- src/block/twofish/twofish.cpp | 156 ++++++++++---------- src/block/twofish/twofish.h | 4 +- src/block/xtea/xtea.cpp | 44 +++--- src/block/xtea/xtea.h | 4 +- src/modes/ctr/ctr.cpp | 104 +++++++++++--- src/modes/ctr/ctr.h | 17 ++- 55 files changed, 1740 insertions(+), 1321 deletions(-) (limited to 'src') diff --git a/src/block/aes/aes.cpp b/src/block/aes/aes.cpp index 9072b507b..34698ae7f 100644 --- a/src/block/aes/aes.cpp +++ b/src/block/aes/aes.cpp @@ -1,6 +1,6 @@ /** * AES -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -13,163 +13,175 @@ namespace Botan { /** * AES Encryption */ -void AES::enc(const byte in[], byte out[]) const +void AES::encrypt_n(const byte in[], byte out[], u32bit blocks) const { const u32bit* TE0 = TE; const u32bit* TE1 = TE + 256; const u32bit* TE2 = TE + 512; const u32bit* TE3 = TE + 768; - u32bit T0 = load_be(in, 0) ^ EK[0]; - u32bit T1 = load_be(in, 1) ^ EK[1]; - u32bit T2 = load_be(in, 2) ^ EK[2]; - u32bit T3 = load_be(in, 3) ^ EK[3]; - - u32bit B0, B1, B2, B3; - B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(1, T1)] ^ - TE2[get_byte(2, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4]; - B1 = TE0[get_byte(0, T1)] ^ TE1[get_byte(1, T2)] ^ - TE2[get_byte(2, T3)] ^ TE3[get_byte(3, T0)] ^ EK[5]; - B2 = TE0[get_byte(0, T2)] ^ TE1[get_byte(1, T3)] ^ - TE2[get_byte(2, T0)] ^ TE3[get_byte(3, T1)] ^ EK[6]; - B3 = TE0[get_byte(0, T3)] ^ TE1[get_byte(1, T0)] ^ - TE2[get_byte(2, T1)] ^ TE3[get_byte(3, T2)] ^ EK[7]; - - for(u32bit j = 2; j != ROUNDS; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - const u32bit K0 = EK[4*j]; - const u32bit K1 = EK[4*j+1]; - const u32bit K2 = EK[4*j+2]; - const u32bit K3 = EK[4*j+3]; - - T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(1, B1)] ^ - TE2[get_byte(2, B2)] ^ TE3[get_byte(3, B3)] ^ K0; - T1 = TE0[get_byte(0, B1)] ^ TE1[get_byte(1, B2)] ^ - TE2[get_byte(2, B3)] ^ TE3[get_byte(3, B0)] ^ K1; - T2 = TE0[get_byte(0, B2)] ^ TE1[get_byte(1, B3)] ^ - TE2[get_byte(2, B0)] ^ TE3[get_byte(3, B1)] ^ K2; - T3 = TE0[get_byte(0, B3)] ^ TE1[get_byte(1, B0)] ^ - TE2[get_byte(2, B1)] ^ TE3[get_byte(3, B2)] ^ K3; - - const u32bit K4 = EK[4*(j+1)+0]; - const u32bit K5 = EK[4*(j+1)+1]; - const u32bit K6 = EK[4*(j+1)+2]; - const u32bit K7 = EK[4*(j+1)+3]; + u32bit T0 = load_be(in, 0) ^ EK[0]; + u32bit T1 = load_be(in, 1) ^ EK[1]; + u32bit T2 = load_be(in, 2) ^ EK[2]; + u32bit T3 = load_be(in, 3) ^ EK[3]; + u32bit B0, B1, B2, B3; B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(1, T1)] ^ - TE2[get_byte(2, T2)] ^ TE3[get_byte(3, T3)] ^ K4; + TE2[get_byte(2, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4]; B1 = TE0[get_byte(0, T1)] ^ TE1[get_byte(1, T2)] ^ - TE2[get_byte(2, T3)] ^ TE3[get_byte(3, T0)] ^ K5; + TE2[get_byte(2, T3)] ^ TE3[get_byte(3, T0)] ^ EK[5]; B2 = TE0[get_byte(0, T2)] ^ TE1[get_byte(1, T3)] ^ - TE2[get_byte(2, T0)] ^ TE3[get_byte(3, T1)] ^ K6; + TE2[get_byte(2, T0)] ^ TE3[get_byte(3, T1)] ^ EK[6]; B3 = TE0[get_byte(0, T3)] ^ TE1[get_byte(1, T0)] ^ - TE2[get_byte(2, T1)] ^ TE3[get_byte(3, T2)] ^ K7; - } + TE2[get_byte(2, T1)] ^ TE3[get_byte(3, T2)] ^ EK[7]; + + for(u32bit j = 2; j != ROUNDS; j += 2) + { + const u32bit K0 = EK[4*j]; + const u32bit K1 = EK[4*j+1]; + const u32bit K2 = EK[4*j+2]; + const u32bit K3 = EK[4*j+3]; + + T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(1, B1)] ^ + TE2[get_byte(2, B2)] ^ TE3[get_byte(3, B3)] ^ K0; + T1 = TE0[get_byte(0, B1)] ^ TE1[get_byte(1, B2)] ^ + TE2[get_byte(2, B3)] ^ TE3[get_byte(3, B0)] ^ K1; + T2 = TE0[get_byte(0, B2)] ^ TE1[get_byte(1, B3)] ^ + TE2[get_byte(2, B0)] ^ TE3[get_byte(3, B1)] ^ K2; + T3 = TE0[get_byte(0, B3)] ^ TE1[get_byte(1, B0)] ^ + TE2[get_byte(2, B1)] ^ TE3[get_byte(3, B2)] ^ K3; + + const u32bit K4 = EK[4*(j+1)+0]; + const u32bit K5 = EK[4*(j+1)+1]; + const u32bit K6 = EK[4*(j+1)+2]; + const u32bit K7 = EK[4*(j+1)+3]; - /* - Joseph Bonneau and Ilya Mironov's paper - - Cache-Collision Timing Attacks Against AES describes an attack - that can recover AES keys with as few as 213 samples. - - """In addition to OpenSSL v. 0.9.8.(a), which was used in our - experiments, the AES implementations of Crypto++ 5.2.1 and - LibTomCrypt 1.09 use the original Rijndael C implementation with - very few changes and are highly vulnerable. The AES implementations - in libgcrypt v. 1.2.2 and Botan v. 1.4.2 are also vulnerable, but - use a smaller byte-wide final table which lessens the effectiveness - of the attacks.""" - */ - out[ 0] = SE[get_byte(0, B0)] ^ ME[0]; - out[ 1] = SE[get_byte(1, B1)] ^ ME[1]; - out[ 2] = SE[get_byte(2, B2)] ^ ME[2]; - out[ 3] = SE[get_byte(3, B3)] ^ ME[3]; - out[ 4] = SE[get_byte(0, B1)] ^ ME[4]; - out[ 5] = SE[get_byte(1, B2)] ^ ME[5]; - out[ 6] = SE[get_byte(2, B3)] ^ ME[6]; - out[ 7] = SE[get_byte(3, B0)] ^ ME[7]; - out[ 8] = SE[get_byte(0, B2)] ^ ME[8]; - out[ 9] = SE[get_byte(1, B3)] ^ ME[9]; - out[10] = SE[get_byte(2, B0)] ^ ME[10]; - out[11] = SE[get_byte(3, B1)] ^ ME[11]; - out[12] = SE[get_byte(0, B3)] ^ ME[12]; - out[13] = SE[get_byte(1, B0)] ^ ME[13]; - out[14] = SE[get_byte(2, B1)] ^ ME[14]; - out[15] = SE[get_byte(3, B2)] ^ ME[15]; + B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(1, T1)] ^ + TE2[get_byte(2, T2)] ^ TE3[get_byte(3, T3)] ^ K4; + B1 = TE0[get_byte(0, T1)] ^ TE1[get_byte(1, T2)] ^ + TE2[get_byte(2, T3)] ^ TE3[get_byte(3, T0)] ^ K5; + B2 = TE0[get_byte(0, T2)] ^ TE1[get_byte(1, T3)] ^ + TE2[get_byte(2, T0)] ^ TE3[get_byte(3, T1)] ^ K6; + B3 = TE0[get_byte(0, T3)] ^ TE1[get_byte(1, T0)] ^ + TE2[get_byte(2, T1)] ^ TE3[get_byte(3, T2)] ^ K7; + } + + /* + Joseph Bonneau and Ilya Mironov's paper + + Cache-Collision Timing Attacks Against AES describes an attack + that can recover AES keys with as few as 213 samples. + + """In addition to OpenSSL v. 0.9.8.(a), which was used in our + experiments, the AES implementations of Crypto++ 5.2.1 and + LibTomCrypt 1.09 use the original Rijndael C implementation with + very few changes and are highly vulnerable. The AES implementations + in libgcrypt v. 1.2.2 and Botan v. 1.4.2 are also vulnerable, but + use a smaller byte-wide final table which lessens the effectiveness + of the attacks.""" + */ + out[ 0] = SE[get_byte(0, B0)] ^ ME[0]; + out[ 1] = SE[get_byte(1, B1)] ^ ME[1]; + out[ 2] = SE[get_byte(2, B2)] ^ ME[2]; + out[ 3] = SE[get_byte(3, B3)] ^ ME[3]; + out[ 4] = SE[get_byte(0, B1)] ^ ME[4]; + out[ 5] = SE[get_byte(1, B2)] ^ ME[5]; + out[ 6] = SE[get_byte(2, B3)] ^ ME[6]; + out[ 7] = SE[get_byte(3, B0)] ^ ME[7]; + out[ 8] = SE[get_byte(0, B2)] ^ ME[8]; + out[ 9] = SE[get_byte(1, B3)] ^ ME[9]; + out[10] = SE[get_byte(2, B0)] ^ ME[10]; + out[11] = SE[get_byte(3, B1)] ^ ME[11]; + out[12] = SE[get_byte(0, B3)] ^ ME[12]; + out[13] = SE[get_byte(1, B0)] ^ ME[13]; + out[14] = SE[get_byte(2, B1)] ^ ME[14]; + out[15] = SE[get_byte(3, B2)] ^ ME[15]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /** * AES Decryption */ -void AES::dec(const byte in[], byte out[]) const +void AES::decrypt_n(const byte in[], byte out[], u32bit blocks) const { const u32bit* TD0 = TD; const u32bit* TD1 = TD + 256; const u32bit* TD2 = TD + 512; const u32bit* TD3 = TD + 768; - u32bit T0 = load_be(in, 0) ^ DK[0]; - u32bit T1 = load_be(in, 1) ^ DK[1]; - u32bit T2 = load_be(in, 2) ^ DK[2]; - u32bit T3 = load_be(in, 3) ^ DK[3]; - - u32bit B0, B1, B2, B3; - B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(1, T3)] ^ - TD2[get_byte(2, T2)] ^ TD3[get_byte(3, T1)] ^ DK[4]; - B1 = TD0[get_byte(0, T1)] ^ TD1[get_byte(1, T0)] ^ - TD2[get_byte(2, T3)] ^ TD3[get_byte(3, T2)] ^ DK[5]; - B2 = TD0[get_byte(0, T2)] ^ TD1[get_byte(1, T1)] ^ - TD2[get_byte(2, T0)] ^ TD3[get_byte(3, T3)] ^ DK[6]; - B3 = TD0[get_byte(0, T3)] ^ TD1[get_byte(1, T2)] ^ - TD2[get_byte(2, T1)] ^ TD3[get_byte(3, T0)] ^ DK[7]; - - for(u32bit j = 2; j != ROUNDS; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - const u32bit K0 = DK[4*j+0]; - const u32bit K1 = DK[4*j+1]; - const u32bit K2 = DK[4*j+2]; - const u32bit K3 = DK[4*j+3]; - - T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(1, B3)] ^ - TD2[get_byte(2, B2)] ^ TD3[get_byte(3, B1)] ^ K0; - T1 = TD0[get_byte(0, B1)] ^ TD1[get_byte(1, B0)] ^ - TD2[get_byte(2, B3)] ^ TD3[get_byte(3, B2)] ^ K1; - T2 = TD0[get_byte(0, B2)] ^ TD1[get_byte(1, B1)] ^ - TD2[get_byte(2, B0)] ^ TD3[get_byte(3, B3)] ^ K2; - T3 = TD0[get_byte(0, B3)] ^ TD1[get_byte(1, B2)] ^ - TD2[get_byte(2, B1)] ^ TD3[get_byte(3, B0)] ^ K3; - - const u32bit K4 = DK[4*(j+1)+0]; - const u32bit K5 = DK[4*(j+1)+1]; - const u32bit K6 = DK[4*(j+1)+2]; - const u32bit K7 = DK[4*(j+1)+3]; + u32bit T0 = load_be(in, 0) ^ DK[0]; + u32bit T1 = load_be(in, 1) ^ DK[1]; + u32bit T2 = load_be(in, 2) ^ DK[2]; + u32bit T3 = load_be(in, 3) ^ DK[3]; + u32bit B0, B1, B2, B3; B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(1, T3)] ^ - TD2[get_byte(2, T2)] ^ TD3[get_byte(3, T1)] ^ K4; + TD2[get_byte(2, T2)] ^ TD3[get_byte(3, T1)] ^ DK[4]; B1 = TD0[get_byte(0, T1)] ^ TD1[get_byte(1, T0)] ^ - TD2[get_byte(2, T3)] ^ TD3[get_byte(3, T2)] ^ K5; + TD2[get_byte(2, T3)] ^ TD3[get_byte(3, T2)] ^ DK[5]; B2 = TD0[get_byte(0, T2)] ^ TD1[get_byte(1, T1)] ^ - TD2[get_byte(2, T0)] ^ TD3[get_byte(3, T3)] ^ K6; + TD2[get_byte(2, T0)] ^ TD3[get_byte(3, T3)] ^ DK[6]; B3 = TD0[get_byte(0, T3)] ^ TD1[get_byte(1, T2)] ^ - TD2[get_byte(2, T1)] ^ TD3[get_byte(3, T0)] ^ K7; - } + TD2[get_byte(2, T1)] ^ TD3[get_byte(3, T0)] ^ DK[7]; + + for(u32bit j = 2; j != ROUNDS; j += 2) + { + const u32bit K0 = DK[4*j+0]; + const u32bit K1 = DK[4*j+1]; + const u32bit K2 = DK[4*j+2]; + const u32bit K3 = DK[4*j+3]; + + T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(1, B3)] ^ + TD2[get_byte(2, B2)] ^ TD3[get_byte(3, B1)] ^ K0; + T1 = TD0[get_byte(0, B1)] ^ TD1[get_byte(1, B0)] ^ + TD2[get_byte(2, B3)] ^ TD3[get_byte(3, B2)] ^ K1; + T2 = TD0[get_byte(0, B2)] ^ TD1[get_byte(1, B1)] ^ + TD2[get_byte(2, B0)] ^ TD3[get_byte(3, B3)] ^ K2; + T3 = TD0[get_byte(0, B3)] ^ TD1[get_byte(1, B2)] ^ + TD2[get_byte(2, B1)] ^ TD3[get_byte(3, B0)] ^ K3; + + const u32bit K4 = DK[4*(j+1)+0]; + const u32bit K5 = DK[4*(j+1)+1]; + const u32bit K6 = DK[4*(j+1)+2]; + const u32bit K7 = DK[4*(j+1)+3]; - out[ 0] = SD[get_byte(0, B0)] ^ MD[0]; - out[ 1] = SD[get_byte(1, B3)] ^ MD[1]; - out[ 2] = SD[get_byte(2, B2)] ^ MD[2]; - out[ 3] = SD[get_byte(3, B1)] ^ MD[3]; - out[ 4] = SD[get_byte(0, B1)] ^ MD[4]; - out[ 5] = SD[get_byte(1, B0)] ^ MD[5]; - out[ 6] = SD[get_byte(2, B3)] ^ MD[6]; - out[ 7] = SD[get_byte(3, B2)] ^ MD[7]; - out[ 8] = SD[get_byte(0, B2)] ^ MD[8]; - out[ 9] = SD[get_byte(1, B1)] ^ MD[9]; - out[10] = SD[get_byte(2, B0)] ^ MD[10]; - out[11] = SD[get_byte(3, B3)] ^ MD[11]; - out[12] = SD[get_byte(0, B3)] ^ MD[12]; - out[13] = SD[get_byte(1, B2)] ^ MD[13]; - out[14] = SD[get_byte(2, B1)] ^ MD[14]; - out[15] = SD[get_byte(3, B0)] ^ MD[15]; + B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(1, T3)] ^ + TD2[get_byte(2, T2)] ^ TD3[get_byte(3, T1)] ^ K4; + B1 = TD0[get_byte(0, T1)] ^ TD1[get_byte(1, T0)] ^ + TD2[get_byte(2, T3)] ^ TD3[get_byte(3, T2)] ^ K5; + B2 = TD0[get_byte(0, T2)] ^ TD1[get_byte(1, T1)] ^ + TD2[get_byte(2, T0)] ^ TD3[get_byte(3, T3)] ^ K6; + B3 = TD0[get_byte(0, T3)] ^ TD1[get_byte(1, T2)] ^ + TD2[get_byte(2, T1)] ^ TD3[get_byte(3, T0)] ^ K7; + } + + out[ 0] = SD[get_byte(0, B0)] ^ MD[0]; + out[ 1] = SD[get_byte(1, B3)] ^ MD[1]; + out[ 2] = SD[get_byte(2, B2)] ^ MD[2]; + out[ 3] = SD[get_byte(3, B1)] ^ MD[3]; + out[ 4] = SD[get_byte(0, B1)] ^ MD[4]; + out[ 5] = SD[get_byte(1, B0)] ^ MD[5]; + out[ 6] = SD[get_byte(2, B3)] ^ MD[6]; + out[ 7] = SD[get_byte(3, B2)] ^ MD[7]; + out[ 8] = SD[get_byte(0, B2)] ^ MD[8]; + out[ 9] = SD[get_byte(1, B1)] ^ MD[9]; + out[10] = SD[get_byte(2, B0)] ^ MD[10]; + out[11] = SD[get_byte(3, B3)] ^ MD[11]; + out[12] = SD[get_byte(0, B3)] ^ MD[12]; + out[13] = SD[get_byte(1, B2)] ^ MD[13]; + out[14] = SD[get_byte(2, B1)] ^ MD[14]; + out[15] = SD[get_byte(3, B0)] ^ MD[15]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /** diff --git a/src/block/aes/aes.h b/src/block/aes/aes.h index 05e2e3123..940e11a48 100644 --- a/src/block/aes/aes.h +++ b/src/block/aes/aes.h @@ -1,6 +1,6 @@ /** * AES -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -24,8 +24,8 @@ class BOTAN_DLL AES : public BlockCipher AES() : BlockCipher(16, 16, 32, 8) { ROUNDS = 14; } AES(u32bit); private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static u32bit S(u32bit); diff --git a/src/block/block_cipher.h b/src/block/block_cipher.h index 01c45af04..a27609171 100644 --- a/src/block/block_cipher.h +++ b/src/block/block_cipher.h @@ -1,6 +1,6 @@ /** * Block Cipher Base Class -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -45,7 +45,8 @@ class BOTAN_DLL BlockCipher : public SymmetricAlgorithm * @param out The byte array designated to hold the encrypted block. * Must be of length BLOCK_SIZE. */ - void encrypt(const byte in[], byte out[]) const { enc(in, out); } + void encrypt(const byte in[], byte out[]) const + { encrypt_n(in, out, 1); } /** * Decrypt a block. @@ -54,7 +55,8 @@ class BOTAN_DLL BlockCipher : public SymmetricAlgorithm * @param out The byte array designated to hold the decrypted block. * Must be of length BLOCK_SIZE. */ - void decrypt(const byte in[], byte out[]) const { dec(in, out); } + void decrypt(const byte in[], byte out[]) const + { decrypt_n(in, out, 1); } /** * Encrypt a block. @@ -62,7 +64,7 @@ class BOTAN_DLL BlockCipher : public SymmetricAlgorithm * Must be of length BLOCK_SIZE. Will hold the result when the function * has finished. */ - void encrypt(byte block[]) const { enc(block, block); } + void encrypt(byte block[]) const { encrypt_n(block, block, 1); } /** * Decrypt a block. @@ -70,7 +72,12 @@ class BOTAN_DLL BlockCipher : public SymmetricAlgorithm * Must be of length BLOCK_SIZE. Will hold the result when the function * has finished. */ - void decrypt(byte block[]) const { dec(block, block); } + void decrypt(byte block[]) const { decrypt_n(block, block, 1); } + + virtual void encrypt_n(const byte in[], byte out[], + u32bit blocks) const = 0; + virtual void decrypt_n(const byte in[], byte out[], + u32bit blocks) const = 0; /** * Get a new object representing the same algorithm as *this @@ -90,9 +97,6 @@ class BOTAN_DLL BlockCipher : public SymmetricAlgorithm BLOCK_SIZE(block_size) {} virtual ~BlockCipher() {} - private: - virtual void enc(const byte[], byte[]) const = 0; - virtual void dec(const byte[], byte[]) const = 0; }; } diff --git a/src/block/blowfish/blowfish.cpp b/src/block/blowfish/blowfish.cpp index b0599d6c5..312603c3a 100644 --- a/src/block/blowfish/blowfish.cpp +++ b/src/block/blowfish/blowfish.cpp @@ -1,6 +1,6 @@ /* * Blowfish -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -13,59 +13,71 @@ namespace Botan { /* * Blowfish Encryption */ -void Blowfish::enc(const byte in[], byte out[]) const +void Blowfish::encrypt_n(const byte in[], byte out[], u32bit blocks) const { const u32bit* S1 = S + 0; const u32bit* S2 = S + 256; const u32bit* S3 = S + 512; const u32bit* S4 = S + 768; - u32bit L = load_be(in, 0); - u32bit R = load_be(in, 1); - - for(u32bit j = 0; j != 16; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - L ^= P[j]; - R ^= ((S1[get_byte(0, L)] + S2[get_byte(1, L)]) ^ - S3[get_byte(2, L)]) + S4[get_byte(3, L)]; + u32bit L = load_be(in, 0); + u32bit R = load_be(in, 1); - R ^= P[j+1]; - L ^= ((S1[get_byte(0, R)] + S2[get_byte(1, R)]) ^ - S3[get_byte(2, R)]) + S4[get_byte(3, R)]; - } + for(u32bit j = 0; j != 16; j += 2) + { + L ^= P[j]; + R ^= ((S1[get_byte(0, L)] + S2[get_byte(1, L)]) ^ + S3[get_byte(2, L)]) + S4[get_byte(3, L)]; - L ^= P[16]; R ^= P[17]; + R ^= P[j+1]; + L ^= ((S1[get_byte(0, R)] + S2[get_byte(1, R)]) ^ + S3[get_byte(2, R)]) + S4[get_byte(3, R)]; + } + + L ^= P[16]; R ^= P[17]; - store_be(out, R, L); + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Blowfish Decryption */ -void Blowfish::dec(const byte in[], byte out[]) const +void Blowfish::decrypt_n(const byte in[], byte out[], u32bit blocks) const { const u32bit* S1 = S + 0; const u32bit* S2 = S + 256; const u32bit* S3 = S + 512; const u32bit* S4 = S + 768; - u32bit L = load_be(in, 0); - u32bit R = load_be(in, 1); - - for(u32bit j = 17; j != 1; j -= 2) + for(u32bit i = 0; i != blocks; ++i) { - L ^= P[j]; - R ^= ((S1[get_byte(0, L)] + S2[get_byte(1, L)]) ^ - S3[get_byte(2, L)]) + S4[get_byte(3, L)]; + u32bit L = load_be(in, 0); + u32bit R = load_be(in, 1); - R ^= P[j-1]; - L ^= ((S1[get_byte(0, R)] + S2[get_byte(1, R)]) ^ - S3[get_byte(2, R)]) + S4[get_byte(3, R)]; - } + for(u32bit j = 17; j != 1; j -= 2) + { + L ^= P[j]; + R ^= ((S1[get_byte(0, L)] + S2[get_byte(1, L)]) ^ + S3[get_byte(2, L)]) + S4[get_byte(3, L)]; - L ^= P[1]; R ^= P[0]; + R ^= P[j-1]; + L ^= ((S1[get_byte(0, R)] + S2[get_byte(1, R)]) ^ + S3[get_byte(2, R)]) + S4[get_byte(3, R)]; + } + + L ^= P[1]; R ^= P[0]; - store_be(out, R, L); + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/blowfish/blowfish.h b/src/block/blowfish/blowfish.h index f0f26418d..3623c2087 100644 --- a/src/block/blowfish/blowfish.h +++ b/src/block/blowfish/blowfish.h @@ -1,6 +1,6 @@ /* * Blowfish -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -23,8 +23,8 @@ class BOTAN_DLL Blowfish : public BlockCipher BlockCipher* clone() const { return new Blowfish; } Blowfish() : BlockCipher(8, 1, 56) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void generate_sbox(u32bit[], u32bit, u32bit&, u32bit&) const; diff --git a/src/block/cast/cast128.cpp b/src/block/cast/cast128.cpp index 046638ab9..887dcf994 100644 --- a/src/block/cast/cast128.cpp +++ b/src/block/cast/cast128.cpp @@ -48,57 +48,69 @@ inline void R3(u32bit& L, u32bit R, u32bit MK, u32bit RK) /* * CAST-128 Encryption */ -void CAST_128::enc(const byte in[], byte out[]) const +void CAST_128::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0); - u32bit R = load_be(in, 1); - - R1(L, R, MK[ 0], RK[ 0]); - R2(R, L, MK[ 1], RK[ 1]); - R3(L, R, MK[ 2], RK[ 2]); - R1(R, L, MK[ 3], RK[ 3]); - R2(L, R, MK[ 4], RK[ 4]); - R3(R, L, MK[ 5], RK[ 5]); - R1(L, R, MK[ 6], RK[ 6]); - R2(R, L, MK[ 7], RK[ 7]); - R3(L, R, MK[ 8], RK[ 8]); - R1(R, L, MK[ 9], RK[ 9]); - R2(L, R, MK[10], RK[10]); - R3(R, L, MK[11], RK[11]); - R1(L, R, MK[12], RK[12]); - R2(R, L, MK[13], RK[13]); - R3(L, R, MK[14], RK[14]); - R1(R, L, MK[15], RK[15]); - - store_be(out, R, L); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit L = load_be(in, 0); + u32bit R = load_be(in, 1); + + R1(L, R, MK[ 0], RK[ 0]); + R2(R, L, MK[ 1], RK[ 1]); + R3(L, R, MK[ 2], RK[ 2]); + R1(R, L, MK[ 3], RK[ 3]); + R2(L, R, MK[ 4], RK[ 4]); + R3(R, L, MK[ 5], RK[ 5]); + R1(L, R, MK[ 6], RK[ 6]); + R2(R, L, MK[ 7], RK[ 7]); + R3(L, R, MK[ 8], RK[ 8]); + R1(R, L, MK[ 9], RK[ 9]); + R2(L, R, MK[10], RK[10]); + R3(R, L, MK[11], RK[11]); + R1(L, R, MK[12], RK[12]); + R2(R, L, MK[13], RK[13]); + R3(L, R, MK[14], RK[14]); + R1(R, L, MK[15], RK[15]); + + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * CAST-128 Decryption */ -void CAST_128::dec(const byte in[], byte out[]) const +void CAST_128::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0); - u32bit R = load_be(in, 1); - - R1(L, R, MK[15], RK[15]); - R3(R, L, MK[14], RK[14]); - R2(L, R, MK[13], RK[13]); - R1(R, L, MK[12], RK[12]); - R3(L, R, MK[11], RK[11]); - R2(R, L, MK[10], RK[10]); - R1(L, R, MK[ 9], RK[ 9]); - R3(R, L, MK[ 8], RK[ 8]); - R2(L, R, MK[ 7], RK[ 7]); - R1(R, L, MK[ 6], RK[ 6]); - R3(L, R, MK[ 5], RK[ 5]); - R2(R, L, MK[ 4], RK[ 4]); - R1(L, R, MK[ 3], RK[ 3]); - R3(R, L, MK[ 2], RK[ 2]); - R2(L, R, MK[ 1], RK[ 1]); - R1(R, L, MK[ 0], RK[ 0]); - - store_be(out, R, L); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit L = load_be(in, 0); + u32bit R = load_be(in, 1); + + R1(L, R, MK[15], RK[15]); + R3(R, L, MK[14], RK[14]); + R2(L, R, MK[13], RK[13]); + R1(R, L, MK[12], RK[12]); + R3(L, R, MK[11], RK[11]); + R2(R, L, MK[10], RK[10]); + R1(L, R, MK[ 9], RK[ 9]); + R3(R, L, MK[ 8], RK[ 8]); + R2(L, R, MK[ 7], RK[ 7]); + R1(R, L, MK[ 6], RK[ 6]); + R3(L, R, MK[ 5], RK[ 5]); + R2(R, L, MK[ 4], RK[ 4]); + R1(L, R, MK[ 3], RK[ 3]); + R3(R, L, MK[ 2], RK[ 2]); + R2(L, R, MK[ 1], RK[ 1]); + R1(R, L, MK[ 0], RK[ 0]); + + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/cast/cast128.h b/src/block/cast/cast128.h index 680481482..092ee824b 100644 --- a/src/block/cast/cast128.h +++ b/src/block/cast/cast128.h @@ -23,8 +23,8 @@ class BOTAN_DLL CAST_128 : public BlockCipher BlockCipher* clone() const { return new CAST_128; } CAST_128() : BlockCipher(8, 11, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void key_schedule(u32bit[16], u32bit[4]); diff --git a/src/block/cast/cast256.cpp b/src/block/cast/cast256.cpp index 22ff876fa..7a4a4e805 100644 --- a/src/block/cast/cast256.cpp +++ b/src/block/cast/cast256.cpp @@ -48,77 +48,89 @@ void round3(u32bit& out, u32bit in, u32bit mask, u32bit rot) /* * CAST-256 Encryption */ -void CAST_256::enc(const byte in[], byte out[]) const +void CAST_256::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_be(in, 0); - u32bit B = load_be(in, 1); - u32bit C = load_be(in, 2); - u32bit D = load_be(in, 3); - - round1(C, D, MK[ 0], RK[ 0]); round2(B, C, MK[ 1], RK[ 1]); - round3(A, B, MK[ 2], RK[ 2]); round1(D, A, MK[ 3], RK[ 3]); - round1(C, D, MK[ 4], RK[ 4]); round2(B, C, MK[ 5], RK[ 5]); - round3(A, B, MK[ 6], RK[ 6]); round1(D, A, MK[ 7], RK[ 7]); - round1(C, D, MK[ 8], RK[ 8]); round2(B, C, MK[ 9], RK[ 9]); - round3(A, B, MK[10], RK[10]); round1(D, A, MK[11], RK[11]); - round1(C, D, MK[12], RK[12]); round2(B, C, MK[13], RK[13]); - round3(A, B, MK[14], RK[14]); round1(D, A, MK[15], RK[15]); - round1(C, D, MK[16], RK[16]); round2(B, C, MK[17], RK[17]); - round3(A, B, MK[18], RK[18]); round1(D, A, MK[19], RK[19]); - round1(C, D, MK[20], RK[20]); round2(B, C, MK[21], RK[21]); - round3(A, B, MK[22], RK[22]); round1(D, A, MK[23], RK[23]); - round1(D, A, MK[27], RK[27]); round3(A, B, MK[26], RK[26]); - round2(B, C, MK[25], RK[25]); round1(C, D, MK[24], RK[24]); - round1(D, A, MK[31], RK[31]); round3(A, B, MK[30], RK[30]); - round2(B, C, MK[29], RK[29]); round1(C, D, MK[28], RK[28]); - round1(D, A, MK[35], RK[35]); round3(A, B, MK[34], RK[34]); - round2(B, C, MK[33], RK[33]); round1(C, D, MK[32], RK[32]); - round1(D, A, MK[39], RK[39]); round3(A, B, MK[38], RK[38]); - round2(B, C, MK[37], RK[37]); round1(C, D, MK[36], RK[36]); - round1(D, A, MK[43], RK[43]); round3(A, B, MK[42], RK[42]); - round2(B, C, MK[41], RK[41]); round1(C, D, MK[40], RK[40]); - round1(D, A, MK[47], RK[47]); round3(A, B, MK[46], RK[46]); - round2(B, C, MK[45], RK[45]); round1(C, D, MK[44], RK[44]); - - store_be(out, A, B, C, D); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_be(in, 0); + u32bit B = load_be(in, 1); + u32bit C = load_be(in, 2); + u32bit D = load_be(in, 3); + + round1(C, D, MK[ 0], RK[ 0]); round2(B, C, MK[ 1], RK[ 1]); + round3(A, B, MK[ 2], RK[ 2]); round1(D, A, MK[ 3], RK[ 3]); + round1(C, D, MK[ 4], RK[ 4]); round2(B, C, MK[ 5], RK[ 5]); + round3(A, B, MK[ 6], RK[ 6]); round1(D, A, MK[ 7], RK[ 7]); + round1(C, D, MK[ 8], RK[ 8]); round2(B, C, MK[ 9], RK[ 9]); + round3(A, B, MK[10], RK[10]); round1(D, A, MK[11], RK[11]); + round1(C, D, MK[12], RK[12]); round2(B, C, MK[13], RK[13]); + round3(A, B, MK[14], RK[14]); round1(D, A, MK[15], RK[15]); + round1(C, D, MK[16], RK[16]); round2(B, C, MK[17], RK[17]); + round3(A, B, MK[18], RK[18]); round1(D, A, MK[19], RK[19]); + round1(C, D, MK[20], RK[20]); round2(B, C, MK[21], RK[21]); + round3(A, B, MK[22], RK[22]); round1(D, A, MK[23], RK[23]); + round1(D, A, MK[27], RK[27]); round3(A, B, MK[26], RK[26]); + round2(B, C, MK[25], RK[25]); round1(C, D, MK[24], RK[24]); + round1(D, A, MK[31], RK[31]); round3(A, B, MK[30], RK[30]); + round2(B, C, MK[29], RK[29]); round1(C, D, MK[28], RK[28]); + round1(D, A, MK[35], RK[35]); round3(A, B, MK[34], RK[34]); + round2(B, C, MK[33], RK[33]); round1(C, D, MK[32], RK[32]); + round1(D, A, MK[39], RK[39]); round3(A, B, MK[38], RK[38]); + round2(B, C, MK[37], RK[37]); round1(C, D, MK[36], RK[36]); + round1(D, A, MK[43], RK[43]); round3(A, B, MK[42], RK[42]); + round2(B, C, MK[41], RK[41]); round1(C, D, MK[40], RK[40]); + round1(D, A, MK[47], RK[47]); round3(A, B, MK[46], RK[46]); + round2(B, C, MK[45], RK[45]); round1(C, D, MK[44], RK[44]); + + store_be(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * CAST-256 Decryption */ -void CAST_256::dec(const byte in[], byte out[]) const +void CAST_256::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_be(in, 0); - u32bit B = load_be(in, 1); - u32bit C = load_be(in, 2); - u32bit D = load_be(in, 3); - - round1(C, D, MK[44], RK[44]); round2(B, C, MK[45], RK[45]); - round3(A, B, MK[46], RK[46]); round1(D, A, MK[47], RK[47]); - round1(C, D, MK[40], RK[40]); round2(B, C, MK[41], RK[41]); - round3(A, B, MK[42], RK[42]); round1(D, A, MK[43], RK[43]); - round1(C, D, MK[36], RK[36]); round2(B, C, MK[37], RK[37]); - round3(A, B, MK[38], RK[38]); round1(D, A, MK[39], RK[39]); - round1(C, D, MK[32], RK[32]); round2(B, C, MK[33], RK[33]); - round3(A, B, MK[34], RK[34]); round1(D, A, MK[35], RK[35]); - round1(C, D, MK[28], RK[28]); round2(B, C, MK[29], RK[29]); - round3(A, B, MK[30], RK[30]); round1(D, A, MK[31], RK[31]); - round1(C, D, MK[24], RK[24]); round2(B, C, MK[25], RK[25]); - round3(A, B, MK[26], RK[26]); round1(D, A, MK[27], RK[27]); - round1(D, A, MK[23], RK[23]); round3(A, B, MK[22], RK[22]); - round2(B, C, MK[21], RK[21]); round1(C, D, MK[20], RK[20]); - round1(D, A, MK[19], RK[19]); round3(A, B, MK[18], RK[18]); - round2(B, C, MK[17], RK[17]); round1(C, D, MK[16], RK[16]); - round1(D, A, MK[15], RK[15]); round3(A, B, MK[14], RK[14]); - round2(B, C, MK[13], RK[13]); round1(C, D, MK[12], RK[12]); - round1(D, A, MK[11], RK[11]); round3(A, B, MK[10], RK[10]); - round2(B, C, MK[ 9], RK[ 9]); round1(C, D, MK[ 8], RK[ 8]); - round1(D, A, MK[ 7], RK[ 7]); round3(A, B, MK[ 6], RK[ 6]); - round2(B, C, MK[ 5], RK[ 5]); round1(C, D, MK[ 4], RK[ 4]); - round1(D, A, MK[ 3], RK[ 3]); round3(A, B, MK[ 2], RK[ 2]); - round2(B, C, MK[ 1], RK[ 1]); round1(C, D, MK[ 0], RK[ 0]); - - store_be(out, A, B, C, D); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_be(in, 0); + u32bit B = load_be(in, 1); + u32bit C = load_be(in, 2); + u32bit D = load_be(in, 3); + + round1(C, D, MK[44], RK[44]); round2(B, C, MK[45], RK[45]); + round3(A, B, MK[46], RK[46]); round1(D, A, MK[47], RK[47]); + round1(C, D, MK[40], RK[40]); round2(B, C, MK[41], RK[41]); + round3(A, B, MK[42], RK[42]); round1(D, A, MK[43], RK[43]); + round1(C, D, MK[36], RK[36]); round2(B, C, MK[37], RK[37]); + round3(A, B, MK[38], RK[38]); round1(D, A, MK[39], RK[39]); + round1(C, D, MK[32], RK[32]); round2(B, C, MK[33], RK[33]); + round3(A, B, MK[34], RK[34]); round1(D, A, MK[35], RK[35]); + round1(C, D, MK[28], RK[28]); round2(B, C, MK[29], RK[29]); + round3(A, B, MK[30], RK[30]); round1(D, A, MK[31], RK[31]); + round1(C, D, MK[24], RK[24]); round2(B, C, MK[25], RK[25]); + round3(A, B, MK[26], RK[26]); round1(D, A, MK[27], RK[27]); + round1(D, A, MK[23], RK[23]); round3(A, B, MK[22], RK[22]); + round2(B, C, MK[21], RK[21]); round1(C, D, MK[20], RK[20]); + round1(D, A, MK[19], RK[19]); round3(A, B, MK[18], RK[18]); + round2(B, C, MK[17], RK[17]); round1(C, D, MK[16], RK[16]); + round1(D, A, MK[15], RK[15]); round3(A, B, MK[14], RK[14]); + round2(B, C, MK[13], RK[13]); round1(C, D, MK[12], RK[12]); + round1(D, A, MK[11], RK[11]); round3(A, B, MK[10], RK[10]); + round2(B, C, MK[ 9], RK[ 9]); round1(C, D, MK[ 8], RK[ 8]); + round1(D, A, MK[ 7], RK[ 7]); round3(A, B, MK[ 6], RK[ 6]); + round2(B, C, MK[ 5], RK[ 5]); round1(C, D, MK[ 4], RK[ 4]); + round1(D, A, MK[ 3], RK[ 3]); round3(A, B, MK[ 2], RK[ 2]); + round2(B, C, MK[ 1], RK[ 1]); round1(C, D, MK[ 0], RK[ 0]); + + store_be(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/cast/cast256.h b/src/block/cast/cast256.h index cd48edd5e..ea80df65d 100644 --- a/src/block/cast/cast256.h +++ b/src/block/cast/cast256.h @@ -23,8 +23,8 @@ class BOTAN_DLL CAST_256 : public BlockCipher BlockCipher* clone() const { return new CAST_256; } CAST_256() : BlockCipher(16, 4, 32, 4) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const u32bit KEY_MASK[192]; diff --git a/src/block/des/des.cpp b/src/block/des/des.cpp index 37520e0fc..1c9d37e6b 100644 --- a/src/block/des/des.cpp +++ b/src/block/des/des.cpp @@ -139,51 +139,63 @@ void des_decrypt(u32bit& L, u32bit& R, /* * DES Encryption */ -void DES::enc(const byte in[], byte out[]) const +void DES::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + for(u32bit i = 0; i != blocks; ++i) + { + u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | + (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | + (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | + (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + + u32bit L = static_cast(T >> 32); + u32bit R = static_cast(T); - u32bit L = static_cast(T >> 32); - u32bit R = static_cast(T); + des_encrypt(L, R, round_key); - des_encrypt(L, R, round_key); + T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | + (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | + (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | + (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | - (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | - (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | - (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); + T = rotate_left(T, 32); - T = rotate_left(T, 32); + store_be(T, out); - store_be(T, out); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * DES Decryption */ -void DES::dec(const byte in[], byte out[]) const +void DES::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + for(u32bit i = 0; i != blocks; ++i) + { + u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | + (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | + (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | + (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + + u32bit L = static_cast(T >> 32); + u32bit R = static_cast(T); - u32bit L = static_cast(T >> 32); - u32bit R = static_cast(T); + des_decrypt(L, R, round_key); - des_decrypt(L, R, round_key); + T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | + (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | + (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | + (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | - (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | - (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | - (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); + T = rotate_left(T, 32); - T = rotate_left(T, 32); + store_be(T, out); - store_be(T, out); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* @@ -197,55 +209,67 @@ void DES::key_schedule(const byte key[], u32bit) /* * TripleDES Encryption */ -void TripleDES::enc(const byte in[], byte out[]) const +void TripleDES::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + for(u32bit i = 0; i != blocks; ++i) + { + u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | + (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | + (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | + (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + + u32bit L = static_cast(T >> 32); + u32bit R = static_cast(T); - u32bit L = static_cast(T >> 32); - u32bit R = static_cast(T); + des_encrypt(L, R, round_key); + des_decrypt(R, L, round_key + 32); + des_encrypt(L, R, round_key + 64); - des_encrypt(L, R, round_key); - des_decrypt(R, L, round_key + 32); - des_encrypt(L, R, round_key + 64); + T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | + (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | + (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | + (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | - (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | - (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | - (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); + T = rotate_left(T, 32); - T = rotate_left(T, 32); + store_be(T, out); - store_be(T, out); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * TripleDES Decryption */ -void TripleDES::dec(const byte in[], byte out[]) const +void TripleDES::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | - (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | - (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | - (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + for(u32bit i = 0; i != blocks; ++i) + { + u64bit T = (DES_IPTAB1[in[0]] ) | (DES_IPTAB1[in[1]] << 1) | + (DES_IPTAB1[in[2]] << 2) | (DES_IPTAB1[in[3]] << 3) | + (DES_IPTAB1[in[4]] << 4) | (DES_IPTAB1[in[5]] << 5) | + (DES_IPTAB1[in[6]] << 6) | (DES_IPTAB2[in[7]] ); + + u32bit L = static_cast(T >> 32); + u32bit R = static_cast(T); - u32bit L = static_cast(T >> 32); - u32bit R = static_cast(T); + des_decrypt(L, R, round_key + 64); + des_encrypt(R, L, round_key + 32); + des_decrypt(L, R, round_key); - des_decrypt(L, R, round_key + 64); - des_encrypt(R, L, round_key + 32); - des_decrypt(L, R, round_key); + T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | + (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | + (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | + (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); - T = (DES_FPTAB1[get_byte(0, L)] << 5) | (DES_FPTAB1[get_byte(1, L)] << 3) | - (DES_FPTAB1[get_byte(2, L)] << 1) | (DES_FPTAB2[get_byte(3, L)] << 1) | - (DES_FPTAB1[get_byte(0, R)] << 4) | (DES_FPTAB1[get_byte(1, R)] << 2) | - (DES_FPTAB1[get_byte(2, R)] ) | (DES_FPTAB2[get_byte(3, R)] ); + T = rotate_left(T, 32); - T = rotate_left(T, 32); + store_be(T, out); - store_be(T, out); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/des/des.h b/src/block/des/des.h index 6fa59de5e..39d1ac404 100644 --- a/src/block/des/des.h +++ b/src/block/des/des.h @@ -23,8 +23,8 @@ class BOTAN_DLL DES : public BlockCipher BlockCipher* clone() const { return new DES; } DES() : BlockCipher(8, 8) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; @@ -41,8 +41,8 @@ class BOTAN_DLL TripleDES : public BlockCipher BlockCipher* clone() const { return new TripleDES; } TripleDES() : BlockCipher(8, 16, 24, 8) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; diff --git a/src/block/des/desx.cpp b/src/block/des/desx.cpp index e557901d3..1fc1c47f2 100644 --- a/src/block/des/desx.cpp +++ b/src/block/des/desx.cpp @@ -13,21 +13,33 @@ namespace Botan { /* * DESX Encryption */ -void DESX::enc(const byte in[], byte out[]) const +void DESX::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - xor_buf(out, in, K1.begin(), BLOCK_SIZE); - des.encrypt(out); - xor_buf(out, K2.begin(), BLOCK_SIZE); + for(u32bit i = 0; i != blocks; ++i) + { + xor_buf(out, in, K1.begin(), BLOCK_SIZE); + des.encrypt(out); + xor_buf(out, K2.begin(), BLOCK_SIZE); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * DESX Decryption */ -void DESX::dec(const byte in[], byte out[]) const +void DESX::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - xor_buf(out, in, K2.begin(), BLOCK_SIZE); - des.decrypt(out); - xor_buf(out, K1.begin(), BLOCK_SIZE); + for(u32bit i = 0; i != blocks; ++i) + { + xor_buf(out, in, K2.begin(), BLOCK_SIZE); + des.decrypt(out); + xor_buf(out, K1.begin(), BLOCK_SIZE); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/des/desx.h b/src/block/des/desx.h index 49ecc2421..d71335071 100644 --- a/src/block/des/desx.h +++ b/src/block/des/desx.h @@ -23,8 +23,8 @@ class BOTAN_DLL DESX : public BlockCipher BlockCipher* clone() const { return new DESX; } DESX() : BlockCipher(8, 24) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K1, K2; DES des; diff --git a/src/block/gost_28147/gost_28147.cpp b/src/block/gost_28147/gost_28147.cpp index bfd092c56..272f1bcab 100644 --- a/src/block/gost_28147/gost_28147.cpp +++ b/src/block/gost_28147/gost_28147.cpp @@ -84,47 +84,58 @@ GOST_28147_89::GOST_28147_89(const GOST_28147_89_Params& param) : /* * GOST Encryption */ -void GOST_28147_89::enc(const byte in[], byte out[]) const +void GOST_28147_89::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit N1 = load_le(in, 0), N2 = load_le(in, 1); - - for(size_t i = 0; i != 3; ++i) + for(u32bit i = 0; i != blocks; ++i) { - GOST_2ROUND(N1, N2, 0, 1); - GOST_2ROUND(N1, N2, 2, 3); - GOST_2ROUND(N1, N2, 4, 5); - GOST_2ROUND(N1, N2, 6, 7); - } + u32bit N1 = load_le(in, 0), N2 = load_le(in, 1); - GOST_2ROUND(N1, N2, 7, 6); - GOST_2ROUND(N1, N2, 5, 4); - GOST_2ROUND(N1, N2, 3, 2); - GOST_2ROUND(N1, N2, 1, 0); + for(size_t j = 0; j != 3; ++j) + { + GOST_2ROUND(N1, N2, 0, 1); + GOST_2ROUND(N1, N2, 2, 3); + GOST_2ROUND(N1, N2, 4, 5); + GOST_2ROUND(N1, N2, 6, 7); + } - store_le(out, N2, N1); + GOST_2ROUND(N1, N2, 7, 6); + GOST_2ROUND(N1, N2, 5, 4); + GOST_2ROUND(N1, N2, 3, 2); + GOST_2ROUND(N1, N2, 1, 0); + + store_le(out, N2, N1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * GOST Decryption */ -void GOST_28147_89::dec(const byte in[], byte out[]) const +void GOST_28147_89::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit N1 = load_le(in, 0), N2 = load_le(in, 1); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit N1 = load_le(in, 0), N2 = load_le(in, 1); - GOST_2ROUND(N1, N2, 0, 1); - GOST_2ROUND(N1, N2, 2, 3); - GOST_2ROUND(N1, N2, 4, 5); - GOST_2ROUND(N1, N2, 6, 7); + GOST_2ROUND(N1, N2, 0, 1); + GOST_2ROUND(N1, N2, 2, 3); + GOST_2ROUND(N1, N2, 4, 5); + GOST_2ROUND(N1, N2, 6, 7); - for(size_t i = 0; i != 3; ++i) - { - GOST_2ROUND(N1, N2, 7, 6); - GOST_2ROUND(N1, N2, 5, 4); - GOST_2ROUND(N1, N2, 3, 2); - GOST_2ROUND(N1, N2, 1, 0); - } + for(size_t i = 0; i != 3; ++i) + { + GOST_2ROUND(N1, N2, 7, 6); + GOST_2ROUND(N1, N2, 5, 4); + GOST_2ROUND(N1, N2, 3, 2); + GOST_2ROUND(N1, N2, 1, 0); + } - store_le(out, N2, N1); + store_le(out, N2, N1); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/gost_28147/gost_28147.h b/src/block/gost_28147/gost_28147.h index 96d24c669..825e106a2 100644 --- a/src/block/gost_28147/gost_28147.h +++ b/src/block/gost_28147/gost_28147.h @@ -54,8 +54,8 @@ class BOTAN_DLL GOST_28147_89 : public BlockCipher GOST_28147_89(const SecureBuffer& other_SBOX) : BlockCipher(8, 32), SBOX(other_SBOX) {} - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer SBOX; diff --git a/src/block/idea/idea.cpp b/src/block/idea/idea.cpp index 5bbe47087..fb5fe83f1 100644 --- a/src/block/idea/idea.cpp +++ b/src/block/idea/idea.cpp @@ -60,77 +60,89 @@ u16bit mul_inv(u16bit x) /* * IDEA Encryption */ -void IDEA::enc(const byte in[], byte out[]) const +void IDEA::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit X1 = load_be(in, 0); - u16bit X2 = load_be(in, 1); - u16bit X3 = load_be(in, 2); - u16bit X4 = load_be(in, 3); - - for(u32bit j = 0; j != 8; ++j) + for(u32bit i = 0; i != blocks; ++i) { - X1 = mul(X1, EK[6*j+0]); - X2 += EK[6*j+1]; - X3 += EK[6*j+2]; - X4 = mul(X4, EK[6*j+3]); - - u16bit T0 = X3; - X3 = mul(X3 ^ X1, EK[6*j+4]); - - u16bit T1 = X2; - X2 = mul((X2 ^ X4) + X3, EK[6*j+5]); - X3 += X2; - - X1 ^= X2; - X4 ^= X3; - X2 ^= T0; - X3 ^= T1; + u16bit X1 = load_be(in, 0); + u16bit X2 = load_be(in, 1); + u16bit X3 = load_be(in, 2); + u16bit X4 = load_be(in, 3); + + for(u32bit j = 0; j != 8; ++j) + { + X1 = mul(X1, EK[6*j+0]); + X2 += EK[6*j+1]; + X3 += EK[6*j+2]; + X4 = mul(X4, EK[6*j+3]); + + u16bit T0 = X3; + X3 = mul(X3 ^ X1, EK[6*j+4]); + + u16bit T1 = X2; + X2 = mul((X2 ^ X4) + X3, EK[6*j+5]); + X3 += X2; + + X1 ^= X2; + X4 ^= X3; + X2 ^= T0; + X3 ^= T1; + } + + X1 = mul(X1, EK[48]); + X2 += EK[50]; + X3 += EK[49]; + X4 = mul(X4, EK[51]); + + store_be(out, X1, X3, X2, X4); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - - X1 = mul(X1, EK[48]); - X2 += EK[50]; - X3 += EK[49]; - X4 = mul(X4, EK[51]); - - store_be(out, X1, X3, X2, X4); } /* * IDEA Decryption */ -void IDEA::dec(const byte in[], byte out[]) const +void IDEA::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit X1 = load_be(in, 0); - u16bit X2 = load_be(in, 1); - u16bit X3 = load_be(in, 2); - u16bit X4 = load_be(in, 3); - - for(u32bit j = 0; j != 8; ++j) + for(u32bit i = 0; i != blocks; ++i) { - X1 = mul(X1, DK[6*j+0]); - X2 += DK[6*j+1]; - X3 += DK[6*j+2]; - X4 = mul(X4, DK[6*j+3]); - - u16bit T0 = X3; - X3 = mul(X3 ^ X1, DK[6*j+4]); - - u16bit T1 = X2; - X2 = mul((X2 ^ X4) + X3, DK[6*j+5]); - X3 += X2; - - X1 ^= X2; - X4 ^= X3; - X2 ^= T0; - X3 ^= T1; + u16bit X1 = load_be(in, 0); + u16bit X2 = load_be(in, 1); + u16bit X3 = load_be(in, 2); + u16bit X4 = load_be(in, 3); + + for(u32bit j = 0; j != 8; ++j) + { + X1 = mul(X1, DK[6*j+0]); + X2 += DK[6*j+1]; + X3 += DK[6*j+2]; + X4 = mul(X4, DK[6*j+3]); + + u16bit T0 = X3; + X3 = mul(X3 ^ X1, DK[6*j+4]); + + u16bit T1 = X2; + X2 = mul((X2 ^ X4) + X3, DK[6*j+5]); + X3 += X2; + + X1 ^= X2; + X4 ^= X3; + X2 ^= T0; + X3 ^= T1; + } + + X1 = mul(X1, DK[48]); + X2 += DK[50]; + X3 += DK[49]; + X4 = mul(X4, DK[51]); + + store_be(out, X1, X3, X2, X4); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - - X1 = mul(X1, DK[48]); - X2 += DK[50]; - X3 += DK[49]; - X4 = mul(X4, DK[51]); - - store_be(out, X1, X3, X2, X4); } /* diff --git a/src/block/idea/idea.h b/src/block/idea/idea.h index 2c53cd0e4..a3384b667 100644 --- a/src/block/idea/idea.h +++ b/src/block/idea/idea.h @@ -23,8 +23,8 @@ class BOTAN_DLL IDEA : public BlockCipher BlockCipher* clone() const { return new IDEA; } IDEA() : BlockCipher(8, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK, DK; }; diff --git a/src/block/kasumi/kasumi.cpp b/src/block/kasumi/kasumi.cpp index e051ddefb..dff6db13c 100644 --- a/src/block/kasumi/kasumi.cpp +++ b/src/block/kasumi/kasumi.cpp @@ -109,79 +109,91 @@ u16bit FI(u16bit I, u16bit K) /* * KASUMI Encryption */ -void KASUMI::enc(const byte in[], byte out[]) const +void KASUMI::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit B0 = load_be(in, 0); - u16bit B1 = load_be(in, 1); - u16bit B2 = load_be(in, 2); - u16bit B3 = load_be(in, 3); - - for(u32bit j = 0; j != 8; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - const u16bit* K = EK + 8*j; + u16bit B0 = load_be(in, 0); + u16bit B1 = load_be(in, 1); + u16bit B2 = load_be(in, 2); + u16bit B3 = load_be(in, 3); - u16bit R = B1 ^ (rotate_left(B0, 1) & K[0]); - u16bit L = B0 ^ (rotate_left(R, 1) | K[1]); + for(u32bit j = 0; j != 8; j += 2) + { + const u16bit* K = EK + 8*j; - L = FI(L ^ K[ 2], K[ 3]) ^ R; - R = FI(R ^ K[ 4], K[ 5]) ^ L; - L = FI(L ^ K[ 6], K[ 7]) ^ R; + u16bit R = B1 ^ (rotate_left(B0, 1) & K[0]); + u16bit L = B0 ^ (rotate_left(R, 1) | K[1]); - R = B2 ^= R; - L = B3 ^= L; + L = FI(L ^ K[ 2], K[ 3]) ^ R; + R = FI(R ^ K[ 4], K[ 5]) ^ L; + L = FI(L ^ K[ 6], K[ 7]) ^ R; - R = FI(R ^ K[10], K[11]) ^ L; - L = FI(L ^ K[12], K[13]) ^ R; - R = FI(R ^ K[14], K[15]) ^ L; + R = B2 ^= R; + L = B3 ^= L; - R ^= (rotate_left(L, 1) & K[8]); - L ^= (rotate_left(R, 1) | K[9]); + R = FI(R ^ K[10], K[11]) ^ L; + L = FI(L ^ K[12], K[13]) ^ R; + R = FI(R ^ K[14], K[15]) ^ L; - B0 ^= L; - B1 ^= R; - } + R ^= (rotate_left(L, 1) & K[8]); + L ^= (rotate_left(R, 1) | K[9]); + + B0 ^= L; + B1 ^= R; + } - store_be(out, B0, B1, B2, B3); + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * KASUMI Decryption */ -void KASUMI::dec(const byte in[], byte out[]) const +void KASUMI::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit B0 = load_be(in, 0); - u16bit B1 = load_be(in, 1); - u16bit B2 = load_be(in, 2); - u16bit B3 = load_be(in, 3); - - for(u32bit j = 0; j != 8; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - const u16bit* K = EK + 8*(6-j); + u16bit B0 = load_be(in, 0); + u16bit B1 = load_be(in, 1); + u16bit B2 = load_be(in, 2); + u16bit B3 = load_be(in, 3); - u16bit L = B2, R = B3; + for(u32bit j = 0; j != 8; j += 2) + { + const u16bit* K = EK + 8*(6-j); - L = FI(L ^ K[10], K[11]) ^ R; - R = FI(R ^ K[12], K[13]) ^ L; - L = FI(L ^ K[14], K[15]) ^ R; + u16bit L = B2, R = B3; - L ^= (rotate_left(R, 1) & K[8]); - R ^= (rotate_left(L, 1) | K[9]); + L = FI(L ^ K[10], K[11]) ^ R; + R = FI(R ^ K[12], K[13]) ^ L; + L = FI(L ^ K[14], K[15]) ^ R; - R = B0 ^= R; - L = B1 ^= L; + L ^= (rotate_left(R, 1) & K[8]); + R ^= (rotate_left(L, 1) | K[9]); - L ^= (rotate_left(R, 1) & K[0]); - R ^= (rotate_left(L, 1) | K[1]); + R = B0 ^= R; + L = B1 ^= L; - R = FI(R ^ K[2], K[3]) ^ L; - L = FI(L ^ K[4], K[5]) ^ R; - R = FI(R ^ K[6], K[7]) ^ L; + L ^= (rotate_left(R, 1) & K[0]); + R ^= (rotate_left(L, 1) | K[1]); - B2 ^= L; - B3 ^= R; - } + R = FI(R ^ K[2], K[3]) ^ L; + L = FI(L ^ K[4], K[5]) ^ R; + R = FI(R ^ K[6], K[7]) ^ L; + + B2 ^= L; + B3 ^= R; + } - store_be(out, B0, B1, B2, B3); + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/kasumi/kasumi.h b/src/block/kasumi/kasumi.h index df49fa9eb..3b18e675b 100644 --- a/src/block/kasumi/kasumi.h +++ b/src/block/kasumi/kasumi.h @@ -24,8 +24,8 @@ class BOTAN_DLL KASUMI : public BlockCipher KASUMI() : BlockCipher(8, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK; diff --git a/src/block/lion/lion.cpp b/src/block/lion/lion.cpp index c7cdf6d13..bfff64b67 100644 --- a/src/block/lion/lion.cpp +++ b/src/block/lion/lion.cpp @@ -14,41 +14,53 @@ namespace Botan { /* * Lion Encryption */ -void Lion::enc(const byte in[], byte out[]) const +void Lion::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - SecureVector buffer(LEFT_SIZE); + for(u32bit i = 0; i != blocks; ++i) + { + SecureVector buffer(LEFT_SIZE); - xor_buf(buffer, in, key1, LEFT_SIZE); - cipher->set_key(buffer, LEFT_SIZE); - cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); + xor_buf(buffer, in, key1, LEFT_SIZE); + cipher->set_key(buffer, LEFT_SIZE); + cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); - hash->update(out + LEFT_SIZE, RIGHT_SIZE); - hash->final(buffer); - xor_buf(out, in, buffer, LEFT_SIZE); + hash->update(out + LEFT_SIZE, RIGHT_SIZE); + hash->final(buffer); + xor_buf(out, in, buffer, LEFT_SIZE); - xor_buf(buffer, out, key2, LEFT_SIZE); - cipher->set_key(buffer, LEFT_SIZE); - cipher->encrypt(out + LEFT_SIZE, RIGHT_SIZE); + xor_buf(buffer, out, key2, LEFT_SIZE); + cipher->set_key(buffer, LEFT_SIZE); + cipher->encrypt(out + LEFT_SIZE, RIGHT_SIZE); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Lion Decryption */ -void Lion::dec(const byte in[], byte out[]) const +void Lion::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - SecureVector buffer(LEFT_SIZE); + for(u32bit i = 0; i != blocks; ++i) + { + SecureVector buffer(LEFT_SIZE); + + xor_buf(buffer, in, key2, LEFT_SIZE); + cipher->set_key(buffer, LEFT_SIZE); + cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); - xor_buf(buffer, in, key2, LEFT_SIZE); - cipher->set_key(buffer, LEFT_SIZE); - cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); + hash->update(out + LEFT_SIZE, RIGHT_SIZE); + hash->final(buffer); + xor_buf(out, in, buffer, LEFT_SIZE); - hash->update(out + LEFT_SIZE, RIGHT_SIZE); - hash->final(buffer); - xor_buf(out, in, buffer, LEFT_SIZE); + xor_buf(buffer, out, key1, LEFT_SIZE); + cipher->set_key(buffer, LEFT_SIZE); + cipher->encrypt(out + LEFT_SIZE, RIGHT_SIZE); - xor_buf(buffer, out, key1, LEFT_SIZE); - cipher->set_key(buffer, LEFT_SIZE); - cipher->encrypt(out + LEFT_SIZE, RIGHT_SIZE); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/lion/lion.h b/src/block/lion/lion.h index 5bc4e72c0..d9f933846 100644 --- a/src/block/lion/lion.h +++ b/src/block/lion/lion.h @@ -27,8 +27,8 @@ class BOTAN_DLL Lion : public BlockCipher Lion(HashFunction*, StreamCipher*, u32bit); ~Lion() { delete hash; delete cipher; } private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); const u32bit LEFT_SIZE, RIGHT_SIZE; diff --git a/src/block/lubyrack/lubyrack.cpp b/src/block/lubyrack/lubyrack.cpp index a9d2b1db2..6ad64f2b0 100644 --- a/src/block/lubyrack/lubyrack.cpp +++ b/src/block/lubyrack/lubyrack.cpp @@ -13,59 +13,71 @@ namespace Botan { /* * Luby-Rackoff Encryption */ -void LubyRackoff::enc(const byte in[], byte out[]) const +void LubyRackoff::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - const u32bit len = hash->OUTPUT_LENGTH; - - SecureVector buffer(len); - hash->update(K1); - hash->update(in, len); - hash->final(buffer); - xor_buf(out + len, in + len, buffer, len); - - hash->update(K2); - hash->update(out + len, len); - hash->final(buffer); - xor_buf(out, in, buffer, len); - - hash->update(K1); - hash->update(out, len); - hash->final(buffer); - xor_buf(out + len, buffer, len); - - hash->update(K2); - hash->update(out + len, len); - hash->final(buffer); - xor_buf(out, buffer, len); + for(u32bit i = 0; i != blocks; ++i) + { + const u32bit len = hash->OUTPUT_LENGTH; + + SecureVector buffer(len); + hash->update(K1); + hash->update(in, len); + hash->final(buffer); + xor_buf(out + len, in + len, buffer, len); + + hash->update(K2); + hash->update(out + len, len); + hash->final(buffer); + xor_buf(out, in, buffer, len); + + hash->update(K1); + hash->update(out, len); + hash->final(buffer); + xor_buf(out + len, buffer, len); + + hash->update(K2); + hash->update(out + len, len); + hash->final(buffer); + xor_buf(out, buffer, len); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Luby-Rackoff Decryption */ -void LubyRackoff::dec(const byte in[], byte out[]) const +void LubyRackoff::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - const u32bit len = hash->OUTPUT_LENGTH; - - SecureVector buffer(len); - hash->update(K2); - hash->update(in + len, len); - hash->final(buffer); - xor_buf(out, in, buffer, len); - - hash->update(K1); - hash->update(out, len); - hash->final(buffer); - xor_buf(out + len, in + len, buffer, len); - - hash->update(K2); - hash->update(out + len, len); - hash->final(buffer); - xor_buf(out, buffer, len); - - hash->update(K1); - hash->update(out, len); - hash->final(buffer); - xor_buf(out + len, buffer, len); + for(u32bit i = 0; i != blocks; ++i) + { + const u32bit len = hash->OUTPUT_LENGTH; + + SecureVector buffer(len); + hash->update(K2); + hash->update(in + len, len); + hash->final(buffer); + xor_buf(out, in, buffer, len); + + hash->update(K1); + hash->update(out, len); + hash->final(buffer); + xor_buf(out + len, in + len, buffer, len); + + hash->update(K2); + hash->update(out + len, len); + hash->final(buffer); + xor_buf(out, buffer, len); + + hash->update(K1); + hash->update(out, len); + hash->final(buffer); + xor_buf(out + len, buffer, len); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/lubyrack/lubyrack.h b/src/block/lubyrack/lubyrack.h index ebde31304..1e83748a6 100644 --- a/src/block/lubyrack/lubyrack.h +++ b/src/block/lubyrack/lubyrack.h @@ -26,8 +26,8 @@ class BOTAN_DLL LubyRackoff : public BlockCipher LubyRackoff(HashFunction* hash); ~LubyRackoff() { delete hash; } private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); HashFunction* hash; SecureVector K1, K2; diff --git a/src/block/mars/mars.cpp b/src/block/mars/mars.cpp index 08c8409c5..69556acb3 100644 --- a/src/block/mars/mars.cpp +++ b/src/block/mars/mars.cpp @@ -1,6 +1,6 @@ /* * MARS -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -50,75 +50,87 @@ u32bit gen_mask(u32bit input) /* * MARS Encryption */ -void MARS::enc(const byte in[], byte out[]) const +void MARS::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0) + EK[0]; - u32bit B = load_le(in, 1) + EK[1]; - u32bit C = load_le(in, 2) + EK[2]; - u32bit D = load_le(in, 3) + EK[3]; - - forward_mix(A, B, C, D); - - encrypt_round(A, B, C, D, 0); - encrypt_round(B, C, D, A, 1); - encrypt_round(C, D, A, B, 2); - encrypt_round(D, A, B, C, 3); - encrypt_round(A, B, C, D, 4); - encrypt_round(B, C, D, A, 5); - encrypt_round(C, D, A, B, 6); - encrypt_round(D, A, B, C, 7); - - encrypt_round(A, D, C, B, 8); - encrypt_round(B, A, D, C, 9); - encrypt_round(C, B, A, D, 10); - encrypt_round(D, C, B, A, 11); - encrypt_round(A, D, C, B, 12); - encrypt_round(B, A, D, C, 13); - encrypt_round(C, B, A, D, 14); - encrypt_round(D, C, B, A, 15); - - reverse_mix(A, B, C, D); - - A -= EK[36]; B -= EK[37]; C -= EK[38]; D -= EK[39]; - - store_le(out, A, B, C, D); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_le(in, 0) + EK[0]; + u32bit B = load_le(in, 1) + EK[1]; + u32bit C = load_le(in, 2) + EK[2]; + u32bit D = load_le(in, 3) + EK[3]; + + forward_mix(A, B, C, D); + + encrypt_round(A, B, C, D, 0); + encrypt_round(B, C, D, A, 1); + encrypt_round(C, D, A, B, 2); + encrypt_round(D, A, B, C, 3); + encrypt_round(A, B, C, D, 4); + encrypt_round(B, C, D, A, 5); + encrypt_round(C, D, A, B, 6); + encrypt_round(D, A, B, C, 7); + + encrypt_round(A, D, C, B, 8); + encrypt_round(B, A, D, C, 9); + encrypt_round(C, B, A, D, 10); + encrypt_round(D, C, B, A, 11); + encrypt_round(A, D, C, B, 12); + encrypt_round(B, A, D, C, 13); + encrypt_round(C, B, A, D, 14); + encrypt_round(D, C, B, A, 15); + + reverse_mix(A, B, C, D); + + A -= EK[36]; B -= EK[37]; C -= EK[38]; D -= EK[39]; + + store_le(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * MARS Decryption */ -void MARS::dec(const byte in[], byte out[]) const +void MARS::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 3) + EK[39]; - u32bit B = load_le(in, 2) + EK[38]; - u32bit C = load_le(in, 1) + EK[37]; - u32bit D = load_le(in, 0) + EK[36]; - - forward_mix(A, B, C, D); - - decrypt_round(A, B, C, D, 15); - decrypt_round(B, C, D, A, 14); - decrypt_round(C, D, A, B, 13); - decrypt_round(D, A, B, C, 12); - decrypt_round(A, B, C, D, 11); - decrypt_round(B, C, D, A, 10); - decrypt_round(C, D, A, B, 9); - decrypt_round(D, A, B, C, 8); - - decrypt_round(A, D, C, B, 7); - decrypt_round(B, A, D, C, 6); - decrypt_round(C, B, A, D, 5); - decrypt_round(D, C, B, A, 4); - decrypt_round(A, D, C, B, 3); - decrypt_round(B, A, D, C, 2); - decrypt_round(C, B, A, D, 1); - decrypt_round(D, C, B, A, 0); - - reverse_mix(A, B, C, D); - - A -= EK[3]; B -= EK[2]; C -= EK[1]; D -= EK[0]; - - store_le(out, D, C, B, A); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_le(in, 3) + EK[39]; + u32bit B = load_le(in, 2) + EK[38]; + u32bit C = load_le(in, 1) + EK[37]; + u32bit D = load_le(in, 0) + EK[36]; + + forward_mix(A, B, C, D); + + decrypt_round(A, B, C, D, 15); + decrypt_round(B, C, D, A, 14); + decrypt_round(C, D, A, B, 13); + decrypt_round(D, A, B, C, 12); + decrypt_round(A, B, C, D, 11); + decrypt_round(B, C, D, A, 10); + decrypt_round(C, D, A, B, 9); + decrypt_round(D, A, B, C, 8); + + decrypt_round(A, D, C, B, 7); + decrypt_round(B, A, D, C, 6); + decrypt_round(C, B, A, D, 5); + decrypt_round(D, C, B, A, 4); + decrypt_round(A, D, C, B, 3); + decrypt_round(B, A, D, C, 2); + decrypt_round(C, B, A, D, 1); + decrypt_round(D, C, B, A, 0); + + reverse_mix(A, B, C, D); + + A -= EK[3]; B -= EK[2]; C -= EK[1]; D -= EK[0]; + + store_le(out, D, C, B, A); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/mars/mars.h b/src/block/mars/mars.h index ca49695af..b3d74b179 100644 --- a/src/block/mars/mars.h +++ b/src/block/mars/mars.h @@ -20,8 +20,8 @@ class BOTAN_DLL MARS : public BlockCipher BlockCipher* clone() const { return new MARS; } MARS() : BlockCipher(16, 16, 32, 4) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void encrypt_round(u32bit&, u32bit&, u32bit&, u32bit&, u32bit) const; diff --git a/src/block/misty1/misty1.cpp b/src/block/misty1/misty1.cpp index a35ff584d..8a92824cc 100644 --- a/src/block/misty1/misty1.cpp +++ b/src/block/misty1/misty1.cpp @@ -1,6 +1,6 @@ /* * MISTY1 -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -102,89 +102,101 @@ u16bit FI(u16bit input, u16bit key7, u16bit key9) /* * MISTY1 Encryption */ -void MISTY1::enc(const byte in[], byte out[]) const +void MISTY1::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit B0 = load_be(in, 0); - u16bit B1 = load_be(in, 1); - u16bit B2 = load_be(in, 2); - u16bit B3 = load_be(in, 3); - - for(u32bit j = 0; j != 12; j += 3) + for(u32bit i = 0; i != blocks; ++i) { - const u16bit* RK = EK + 8 * j; + u16bit B0 = load_be(in, 0); + u16bit B1 = load_be(in, 1); + u16bit B2 = load_be(in, 2); + u16bit B3 = load_be(in, 3); - B1 ^= B0 & RK[0]; - B0 ^= B1 | RK[1]; - B3 ^= B2 & RK[2]; - B2 ^= B3 | RK[3]; + for(u32bit j = 0; j != 12; j += 3) + { + const u16bit* RK = EK + 8 * j; - u32bit T0, T1; + B1 ^= B0 & RK[0]; + B0 ^= B1 | RK[1]; + B3 ^= B2 & RK[2]; + B2 ^= B3 | RK[3]; - T0 = FI(B0 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B1; - T1 = FI(B1 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; - T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; + u32bit T0, T1; - B2 ^= T1 ^ RK[13]; - B3 ^= T0; + T0 = FI(B0 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B1; + T1 = FI(B1 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; + T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; - T0 = FI(B2 ^ RK[14], RK[15], RK[16]) ^ B3; - T1 = FI(B3 ^ RK[17], RK[18], RK[19]) ^ T0; - T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + B2 ^= T1 ^ RK[13]; + B3 ^= T0; - B0 ^= T1 ^ RK[23]; - B1 ^= T0; - } + T0 = FI(B2 ^ RK[14], RK[15], RK[16]) ^ B3; + T1 = FI(B3 ^ RK[17], RK[18], RK[19]) ^ T0; + T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + + B0 ^= T1 ^ RK[23]; + B1 ^= T0; + } - B1 ^= B0 & EK[96]; - B0 ^= B1 | EK[97]; - B3 ^= B2 & EK[98]; - B2 ^= B3 | EK[99]; + B1 ^= B0 & EK[96]; + B0 ^= B1 | EK[97]; + B3 ^= B2 & EK[98]; + B2 ^= B3 | EK[99]; - store_be(out, B2, B3, B0, B1); + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * MISTY1 Decryption */ -void MISTY1::dec(const byte in[], byte out[]) const +void MISTY1::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit B0 = load_be(in, 2); - u16bit B1 = load_be(in, 3); - u16bit B2 = load_be(in, 0); - u16bit B3 = load_be(in, 1); - - for(u32bit j = 0; j != 12; j += 3) + for(u32bit i = 0; i != blocks; ++i) { - const u16bit* RK = DK + 8 * j; + u16bit B0 = load_be(in, 2); + u16bit B1 = load_be(in, 3); + u16bit B2 = load_be(in, 0); + u16bit B3 = load_be(in, 1); - B2 ^= B3 | RK[0]; - B3 ^= B2 & RK[1]; - B0 ^= B1 | RK[2]; - B1 ^= B0 & RK[3]; + for(u32bit j = 0; j != 12; j += 3) + { + const u16bit* RK = DK + 8 * j; - u32bit T0, T1; + B2 ^= B3 | RK[0]; + B3 ^= B2 & RK[1]; + B0 ^= B1 | RK[2]; + B1 ^= B0 & RK[3]; - T0 = FI(B2 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B3; - T1 = FI(B3 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; - T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; + u32bit T0, T1; - B0 ^= T1 ^ RK[13]; - B1 ^= T0; + T0 = FI(B2 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B3; + T1 = FI(B3 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; + T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; - T0 = FI(B0 ^ RK[14], RK[15], RK[16]) ^ B1; - T1 = FI(B1 ^ RK[17], RK[18], RK[19]) ^ T0; - T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + B0 ^= T1 ^ RK[13]; + B1 ^= T0; - B2 ^= T1 ^ RK[23]; - B3 ^= T0; - } + T0 = FI(B0 ^ RK[14], RK[15], RK[16]) ^ B1; + T1 = FI(B1 ^ RK[17], RK[18], RK[19]) ^ T0; + T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + + B2 ^= T1 ^ RK[23]; + B3 ^= T0; + } - B2 ^= B3 | DK[96]; - B3 ^= B2 & DK[97]; - B0 ^= B1 | DK[98]; - B1 ^= B0 & DK[99]; + B2 ^= B3 | DK[96]; + B3 ^= B2 & DK[97]; + B0 ^= B1 | DK[98]; + B1 ^= B0 & DK[99]; - store_be(out, B0, B1, B2, B3); + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/misty1/misty1.h b/src/block/misty1/misty1.h index 62d4f856f..14b7c8e45 100644 --- a/src/block/misty1/misty1.h +++ b/src/block/misty1/misty1.h @@ -23,8 +23,8 @@ class BOTAN_DLL MISTY1 : public BlockCipher BlockCipher* clone() const { return new MISTY1; } MISTY1(u32bit = 8); private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK, DK; diff --git a/src/block/noekeon/noekeon.cpp b/src/block/noekeon/noekeon.cpp index 90eb9ad2b..1b327aa47 100644 --- a/src/block/noekeon/noekeon.cpp +++ b/src/block/noekeon/noekeon.cpp @@ -84,65 +84,77 @@ const byte Noekeon::RC[] = { /* * Noekeon Encryption */ -void Noekeon::enc(const byte in[], byte out[]) const +void Noekeon::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A0 = load_be(in, 0); - u32bit A1 = load_be(in, 1); - u32bit A2 = load_be(in, 2); - u32bit A3 = load_be(in, 3); - - for(u32bit j = 0; j != 16; ++j) + for(u32bit i = 0; i != blocks; ++i) { - A0 ^= RC[j]; - theta(A0, A1, A2, A3, EK); + u32bit A0 = load_be(in, 0); + u32bit A1 = load_be(in, 1); + u32bit A2 = load_be(in, 2); + u32bit A3 = load_be(in, 3); - A1 = rotate_left(A1, 1); - A2 = rotate_left(A2, 5); - A3 = rotate_left(A3, 2); + for(u32bit j = 0; j != 16; ++j) + { + A0 ^= RC[j]; + theta(A0, A1, A2, A3, EK); - gamma(A0, A1, A2, A3); + A1 = rotate_left(A1, 1); + A2 = rotate_left(A2, 5); + A3 = rotate_left(A3, 2); - A1 = rotate_right(A1, 1); - A2 = rotate_right(A2, 5); - A3 = rotate_right(A3, 2); - } + gamma(A0, A1, A2, A3); - A0 ^= RC[16]; - theta(A0, A1, A2, A3, EK); + A1 = rotate_right(A1, 1); + A2 = rotate_right(A2, 5); + A3 = rotate_right(A3, 2); + } + + A0 ^= RC[16]; + theta(A0, A1, A2, A3, EK); + + store_be(out, A0, A1, A2, A3); - store_be(out, A0, A1, A2, A3); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Noekeon Encryption */ -void Noekeon::dec(const byte in[], byte out[]) const +void Noekeon::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A0 = load_be(in, 0); - u32bit A1 = load_be(in, 1); - u32bit A2 = load_be(in, 2); - u32bit A3 = load_be(in, 3); - - for(u32bit j = 16; j != 0; --j) + for(u32bit i = 0; i != blocks; ++i) { - theta(A0, A1, A2, A3, DK); - A0 ^= RC[j]; + u32bit A0 = load_be(in, 0); + u32bit A1 = load_be(in, 1); + u32bit A2 = load_be(in, 2); + u32bit A3 = load_be(in, 3); - A1 = rotate_left(A1, 1); - A2 = rotate_left(A2, 5); - A3 = rotate_left(A3, 2); + for(u32bit j = 16; j != 0; --j) + { + theta(A0, A1, A2, A3, DK); + A0 ^= RC[j]; - gamma(A0, A1, A2, A3); + A1 = rotate_left(A1, 1); + A2 = rotate_left(A2, 5); + A3 = rotate_left(A3, 2); - A1 = rotate_right(A1, 1); - A2 = rotate_right(A2, 5); - A3 = rotate_right(A3, 2); - } + gamma(A0, A1, A2, A3); - theta(A0, A1, A2, A3, DK); - A0 ^= RC[0]; + A1 = rotate_right(A1, 1); + A2 = rotate_right(A2, 5); + A3 = rotate_right(A3, 2); + } - store_be(out, A0, A1, A2, A3); + theta(A0, A1, A2, A3, DK); + A0 ^= RC[0]; + + store_be(out, A0, A1, A2, A3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/noekeon/noekeon.h b/src/block/noekeon/noekeon.h index 893892446..b4f3980d7 100644 --- a/src/block/noekeon/noekeon.h +++ b/src/block/noekeon/noekeon.h @@ -23,8 +23,8 @@ class BOTAN_DLL Noekeon : public BlockCipher BlockCipher* clone() const { return new Noekeon; } Noekeon() : BlockCipher(16, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const byte RC[17]; diff --git a/src/block/rc2/rc2.cpp b/src/block/rc2/rc2.cpp index 5827bdb68..b5e4a7d50 100644 --- a/src/block/rc2/rc2.cpp +++ b/src/block/rc2/rc2.cpp @@ -14,73 +14,85 @@ namespace Botan { /* * RC2 Encryption */ -void RC2::enc(const byte in[], byte out[]) const +void RC2::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit R0 = load_le(in, 0); - u16bit R1 = load_le(in, 1); - u16bit R2 = load_le(in, 2); - u16bit R3 = load_le(in, 3); - - for(u32bit j = 0; j != 16; ++j) + for(u32bit i = 0; i != blocks; ++i) { - R0 += (R1 & ~R3) + (R2 & R3) + K[4*j]; - R0 = rotate_left(R0, 1); + u16bit R0 = load_le(in, 0); + u16bit R1 = load_le(in, 1); + u16bit R2 = load_le(in, 2); + u16bit R3 = load_le(in, 3); + + for(u32bit j = 0; j != 16; ++j) + { + R0 += (R1 & ~R3) + (R2 & R3) + K[4*j]; + R0 = rotate_left(R0, 1); - R1 += (R2 & ~R0) + (R3 & R0) + K[4*j + 1]; - R1 = rotate_left(R1, 2); + R1 += (R2 & ~R0) + (R3 & R0) + K[4*j + 1]; + R1 = rotate_left(R1, 2); - R2 += (R3 & ~R1) + (R0 & R1) + K[4*j + 2]; - R2 = rotate_left(R2, 3); + R2 += (R3 & ~R1) + (R0 & R1) + K[4*j + 2]; + R2 = rotate_left(R2, 3); - R3 += (R0 & ~R2) + (R1 & R2) + K[4*j + 3]; - R3 = rotate_left(R3, 5); + R3 += (R0 & ~R2) + (R1 & R2) + K[4*j + 3]; + R3 = rotate_left(R3, 5); - if(j == 4 || j == 10) - { - R0 += K[R3 % 64]; - R1 += K[R0 % 64]; - R2 += K[R1 % 64]; - R3 += K[R2 % 64]; + if(j == 4 || j == 10) + { + R0 += K[R3 % 64]; + R1 += K[R0 % 64]; + R2 += K[R1 % 64]; + R3 += K[R2 % 64]; + } } - } - store_le(out, R0, R1, R2, R3); + store_le(out, R0, R1, R2, R3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * RC2 Decryption */ -void RC2::dec(const byte in[], byte out[]) const +void RC2::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit R0 = load_le(in, 0); - u16bit R1 = load_le(in, 1); - u16bit R2 = load_le(in, 2); - u16bit R3 = load_le(in, 3); - - for(u32bit j = 0; j != 16; ++j) + for(u32bit i = 0; i != blocks; ++i) { - R3 = rotate_right(R3, 5); - R3 -= (R0 & ~R2) + (R1 & R2) + K[63 - (4*j + 0)]; + u16bit R0 = load_le(in, 0); + u16bit R1 = load_le(in, 1); + u16bit R2 = load_le(in, 2); + u16bit R3 = load_le(in, 3); + + for(u32bit j = 0; j != 16; ++j) + { + R3 = rotate_right(R3, 5); + R3 -= (R0 & ~R2) + (R1 & R2) + K[63 - (4*j + 0)]; - R2 = rotate_right(R2, 3); - R2 -= (R3 & ~R1) + (R0 & R1) + K[63 - (4*j + 1)]; + R2 = rotate_right(R2, 3); + R2 -= (R3 & ~R1) + (R0 & R1) + K[63 - (4*j + 1)]; - R1 = rotate_right(R1, 2); - R1 -= (R2 & ~R0) + (R3 & R0) + K[63 - (4*j + 2)]; + R1 = rotate_right(R1, 2); + R1 -= (R2 & ~R0) + (R3 & R0) + K[63 - (4*j + 2)]; - R0 = rotate_right(R0, 1); - R0 -= (R1 & ~R3) + (R2 & R3) + K[63 - (4*j + 3)]; + R0 = rotate_right(R0, 1); + R0 -= (R1 & ~R3) + (R2 & R3) + K[63 - (4*j + 3)]; - if(j == 4 || j == 10) - { - R3 -= K[R2 % 64]; - R2 -= K[R1 % 64]; - R1 -= K[R0 % 64]; - R0 -= K[R3 % 64]; + if(j == 4 || j == 10) + { + R3 -= K[R2 % 64]; + R2 -= K[R1 % 64]; + R1 -= K[R0 % 64]; + R0 -= K[R3 % 64]; + } } - } - store_le(out, R0, R1, R2, R3); + store_le(out, R0, R1, R2, R3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/rc2/rc2.h b/src/block/rc2/rc2.h index cb6f58f04..0d94d69a2 100644 --- a/src/block/rc2/rc2.h +++ b/src/block/rc2/rc2.h @@ -25,8 +25,8 @@ class BOTAN_DLL RC2 : public BlockCipher BlockCipher* clone() const { return new RC2; } RC2() : BlockCipher(8, 1, 32) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K; diff --git a/src/block/rc5/rc5.cpp b/src/block/rc5/rc5.cpp index 5d83d5a4e..4bfa27ea0 100644 --- a/src/block/rc5/rc5.cpp +++ b/src/block/rc5/rc5.cpp @@ -16,47 +16,59 @@ namespace Botan { /* * RC5 Encryption */ -void RC5::enc(const byte in[], byte out[]) const +void RC5::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0), B = load_le(in, 1); - - A += S[0]; B += S[1]; - for(u32bit j = 0; j != ROUNDS; j += 4) + for(u32bit i = 0; i != blocks; ++i) { - A = rotate_left(A ^ B, B % 32) + S[2*j+2]; - B = rotate_left(B ^ A, A % 32) + S[2*j+3]; - A = rotate_left(A ^ B, B % 32) + S[2*j+4]; - B = rotate_left(B ^ A, A % 32) + S[2*j+5]; - A = rotate_left(A ^ B, B % 32) + S[2*j+6]; - B = rotate_left(B ^ A, A % 32) + S[2*j+7]; - A = rotate_left(A ^ B, B % 32) + S[2*j+8]; - B = rotate_left(B ^ A, A % 32) + S[2*j+9]; - } + u32bit A = load_le(in, 0), B = load_le(in, 1); + + A += S[0]; B += S[1]; + for(u32bit j = 0; j != ROUNDS; j += 4) + { + A = rotate_left(A ^ B, B % 32) + S[2*j+2]; + B = rotate_left(B ^ A, A % 32) + S[2*j+3]; + A = rotate_left(A ^ B, B % 32) + S[2*j+4]; + B = rotate_left(B ^ A, A % 32) + S[2*j+5]; + A = rotate_left(A ^ B, B % 32) + S[2*j+6]; + B = rotate_left(B ^ A, A % 32) + S[2*j+7]; + A = rotate_left(A ^ B, B % 32) + S[2*j+8]; + B = rotate_left(B ^ A, A % 32) + S[2*j+9]; + } - store_le(out, A, B); + store_le(out, A, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * RC5 Decryption */ -void RC5::dec(const byte in[], byte out[]) const +void RC5::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0), B = load_le(in, 1); - - for(u32bit j = ROUNDS; j != 0; j -= 4) + for(u32bit i = 0; i != blocks; ++i) { - B = rotate_right(B - S[2*j+1], A % 32) ^ A; - A = rotate_right(A - S[2*j ], B % 32) ^ B; - B = rotate_right(B - S[2*j-1], A % 32) ^ A; - A = rotate_right(A - S[2*j-2], B % 32) ^ B; - B = rotate_right(B - S[2*j-3], A % 32) ^ A; - A = rotate_right(A - S[2*j-4], B % 32) ^ B; - B = rotate_right(B - S[2*j-5], A % 32) ^ A; - A = rotate_right(A - S[2*j-6], B % 32) ^ B; - } - B -= S[1]; A -= S[0]; + u32bit A = load_le(in, 0), B = load_le(in, 1); + + for(u32bit j = ROUNDS; j != 0; j -= 4) + { + B = rotate_right(B - S[2*j+1], A % 32) ^ A; + A = rotate_right(A - S[2*j ], B % 32) ^ B; + B = rotate_right(B - S[2*j-1], A % 32) ^ A; + A = rotate_right(A - S[2*j-2], B % 32) ^ B; + B = rotate_right(B - S[2*j-3], A % 32) ^ A; + A = rotate_right(A - S[2*j-4], B % 32) ^ B; + B = rotate_right(B - S[2*j-5], A % 32) ^ A; + A = rotate_right(A - S[2*j-6], B % 32) ^ B; + } + B -= S[1]; A -= S[0]; - store_le(out, A, B); + store_le(out, A, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/rc5/rc5.h b/src/block/rc5/rc5.h index 083224720..1816994dc 100644 --- a/src/block/rc5/rc5.h +++ b/src/block/rc5/rc5.h @@ -23,8 +23,8 @@ class BOTAN_DLL RC5 : public BlockCipher BlockCipher* clone() const { return new RC5(ROUNDS); } RC5(u32bit); private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureVector S; const u32bit ROUNDS; diff --git a/src/block/rc6/rc6.cpp b/src/block/rc6/rc6.cpp index 3b30ea93a..8bda62259 100644 --- a/src/block/rc6/rc6.cpp +++ b/src/block/rc6/rc6.cpp @@ -15,85 +15,97 @@ namespace Botan { /* * RC6 Encryption */ -void RC6::enc(const byte in[], byte out[]) const +void RC6::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0); - u32bit B = load_le(in, 1); - u32bit C = load_le(in, 2); - u32bit D = load_le(in, 3); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_le(in, 0); + u32bit B = load_le(in, 1); + u32bit C = load_le(in, 2); + u32bit D = load_le(in, 3); - B += S[0]; D += S[1]; + B += S[0]; D += S[1]; - for(u32bit j = 0; j != 20; j += 4) - { - u32bit T1, T2; - - T1 = rotate_left(B*(2*B+1), 5); - T2 = rotate_left(D*(2*D+1), 5); - A = rotate_left(A ^ T1, T2 % 32) + S[2*j+2]; - C = rotate_left(C ^ T2, T1 % 32) + S[2*j+3]; - - T1 = rotate_left(C*(2*C+1), 5); - T2 = rotate_left(A*(2*A+1), 5); - B = rotate_left(B ^ T1, T2 % 32) + S[2*j+4]; - D = rotate_left(D ^ T2, T1 % 32) + S[2*j+5]; - - T1 = rotate_left(D*(2*D+1), 5); - T2 = rotate_left(B*(2*B+1), 5); - C = rotate_left(C ^ T1, T2 % 32) + S[2*j+6]; - A = rotate_left(A ^ T2, T1 % 32) + S[2*j+7]; - - T1 = rotate_left(A*(2*A+1), 5); - T2 = rotate_left(C*(2*C+1), 5); - D = rotate_left(D ^ T1, T2 % 32) + S[2*j+8]; - B = rotate_left(B ^ T2, T1 % 32) + S[2*j+9]; - } + for(u32bit j = 0; j != 20; j += 4) + { + u32bit T1, T2; + + T1 = rotate_left(B*(2*B+1), 5); + T2 = rotate_left(D*(2*D+1), 5); + A = rotate_left(A ^ T1, T2 % 32) + S[2*j+2]; + C = rotate_left(C ^ T2, T1 % 32) + S[2*j+3]; + + T1 = rotate_left(C*(2*C+1), 5); + T2 = rotate_left(A*(2*A+1), 5); + B = rotate_left(B ^ T1, T2 % 32) + S[2*j+4]; + D = rotate_left(D ^ T2, T1 % 32) + S[2*j+5]; - A += S[42]; C += S[43]; + T1 = rotate_left(D*(2*D+1), 5); + T2 = rotate_left(B*(2*B+1), 5); + C = rotate_left(C ^ T1, T2 % 32) + S[2*j+6]; + A = rotate_left(A ^ T2, T1 % 32) + S[2*j+7]; - store_le(out, A, B, C, D); + T1 = rotate_left(A*(2*A+1), 5); + T2 = rotate_left(C*(2*C+1), 5); + D = rotate_left(D ^ T1, T2 % 32) + S[2*j+8]; + B = rotate_left(B ^ T2, T1 % 32) + S[2*j+9]; + } + + A += S[42]; C += S[43]; + + store_le(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * RC6 Decryption */ -void RC6::dec(const byte in[], byte out[]) const +void RC6::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0); - u32bit B = load_le(in, 1); - u32bit C = load_le(in, 2); - u32bit D = load_le(in, 3); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit A = load_le(in, 0); + u32bit B = load_le(in, 1); + u32bit C = load_le(in, 2); + u32bit D = load_le(in, 3); - C -= S[43]; A -= S[42]; + C -= S[43]; A -= S[42]; - for(u32bit j = 0; j != 20; j += 4) - { - u32bit T1, T2; - - T1 = rotate_left(A*(2*A+1), 5); - T2 = rotate_left(C*(2*C+1), 5); - B = rotate_right(B - S[41 - 2*j], T1 % 32) ^ T2; - D = rotate_right(D - S[40 - 2*j], T2 % 32) ^ T1; - - T1 = rotate_left(D*(2*D+1), 5); - T2 = rotate_left(B*(2*B+1), 5); - A = rotate_right(A - S[39 - 2*j], T1 % 32) ^ T2; - C = rotate_right(C - S[38 - 2*j], T2 % 32) ^ T1; - - T1 = rotate_left(C*(2*C+1), 5); - T2 = rotate_left(A*(2*A+1), 5); - D = rotate_right(D - S[37 - 2*j], T1 % 32) ^ T2; - B = rotate_right(B - S[36 - 2*j], T2 % 32) ^ T1; - - T1 = rotate_left(B*(2*B+1), 5); - T2 = rotate_left(D*(2*D+1), 5); - C = rotate_right(C - S[35 - 2*j], T1 % 32) ^ T2; - A = rotate_right(A - S[34 - 2*j], T2 % 32) ^ T1; - } + for(u32bit j = 0; j != 20; j += 4) + { + u32bit T1, T2; + + T1 = rotate_left(A*(2*A+1), 5); + T2 = rotate_left(C*(2*C+1), 5); + B = rotate_right(B - S[41 - 2*j], T1 % 32) ^ T2; + D = rotate_right(D - S[40 - 2*j], T2 % 32) ^ T1; + + T1 = rotate_left(D*(2*D+1), 5); + T2 = rotate_left(B*(2*B+1), 5); + A = rotate_right(A - S[39 - 2*j], T1 % 32) ^ T2; + C = rotate_right(C - S[38 - 2*j], T2 % 32) ^ T1; - D -= S[1]; B -= S[0]; + T1 = rotate_left(C*(2*C+1), 5); + T2 = rotate_left(A*(2*A+1), 5); + D = rotate_right(D - S[37 - 2*j], T1 % 32) ^ T2; + B = rotate_right(B - S[36 - 2*j], T2 % 32) ^ T1; - store_le(out, A, B, C, D); + T1 = rotate_left(B*(2*B+1), 5); + T2 = rotate_left(D*(2*D+1), 5); + C = rotate_right(C - S[35 - 2*j], T1 % 32) ^ T2; + A = rotate_right(A - S[34 - 2*j], T2 % 32) ^ T1; + } + + D -= S[1]; B -= S[0]; + + store_le(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/rc6/rc6.h b/src/block/rc6/rc6.h index cb2800be7..f634ebcd9 100644 --- a/src/block/rc6/rc6.h +++ b/src/block/rc6/rc6.h @@ -23,8 +23,8 @@ class BOTAN_DLL RC6 : public BlockCipher BlockCipher* clone() const { return new RC6; } RC6() : BlockCipher(16, 1, 32) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer S; diff --git a/src/block/safer/safer_sk.cpp b/src/block/safer/safer_sk.cpp index f72c4773b..eb5c22fc9 100644 --- a/src/block/safer/safer_sk.cpp +++ b/src/block/safer/safer_sk.cpp @@ -1,6 +1,6 @@ /* * SAFER-SK -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -15,54 +15,75 @@ namespace Botan { /* * SAFER-SK Encryption */ -void SAFER_SK::enc(const byte in[], byte out[]) const +void SAFER_SK::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - byte A = in[0], B = in[1], C = in[2], D = in[3], - E = in[4], F = in[5], G = in[6], H = in[7], X, Y; - for(u32bit j = 0; j != 16*ROUNDS; j += 16) + for(u32bit i = 0; i != blocks; ++i) { - A = EXP[A ^ EK[j ]]; B = LOG[B + EK[j+1]]; - C = LOG[C + EK[j+2]]; D = EXP[D ^ EK[j+3]]; - E = EXP[E ^ EK[j+4]]; F = LOG[F + EK[j+5]]; - G = LOG[G + EK[j+6]]; H = EXP[H ^ EK[j+7]]; - A += EK[j+ 8]; B ^= EK[j+ 9]; C ^= EK[j+10]; D += EK[j+11]; - E += EK[j+12]; F ^= EK[j+13]; G ^= EK[j+14]; H += EK[j+15]; - B += A; D += C; F += E; H += G; A += B; C += D; E += F; G += H; - C += A; G += E; D += B; H += F; A += C; E += G; B += D; F += H; - H += D; Y = D + H; D = B + F; X = B + D; B = A + E; - A += B; F = C + G; E = C + F; C = X; G = Y; + byte A = in[0], B = in[1], C = in[2], D = in[3], + E = in[4], F = in[5], G = in[6], H = in[7], X, Y; + + for(u32bit j = 0; j != 16*ROUNDS; j += 16) + { + A = EXP[A ^ EK[j ]]; B = LOG[B + EK[j+1]]; + C = LOG[C + EK[j+2]]; D = EXP[D ^ EK[j+3]]; + E = EXP[E ^ EK[j+4]]; F = LOG[F + EK[j+5]]; + G = LOG[G + EK[j+6]]; H = EXP[H ^ EK[j+7]]; + + A += EK[j+ 8]; B ^= EK[j+ 9]; C ^= EK[j+10]; D += EK[j+11]; + E += EK[j+12]; F ^= EK[j+13]; G ^= EK[j+14]; H += EK[j+15]; + + B += A; D += C; F += E; H += G; A += B; C += D; E += F; G += H; + C += A; G += E; D += B; H += F; A += C; E += G; B += D; F += H; + H += D; Y = D + H; D = B + F; X = B + D; B = A + E; + A += B; F = C + G; E = C + F; C = X; G = Y; + } + + out[0] = A ^ EK[16*ROUNDS+0]; out[1] = B + EK[16*ROUNDS+1]; + out[2] = C + EK[16*ROUNDS+2]; out[3] = D ^ EK[16*ROUNDS+3]; + out[4] = E ^ EK[16*ROUNDS+4]; out[5] = F + EK[16*ROUNDS+5]; + out[6] = G + EK[16*ROUNDS+6]; out[7] = H ^ EK[16*ROUNDS+7]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[0] = A ^ EK[16*ROUNDS+0]; out[1] = B + EK[16*ROUNDS+1]; - out[2] = C + EK[16*ROUNDS+2]; out[3] = D ^ EK[16*ROUNDS+3]; - out[4] = E ^ EK[16*ROUNDS+4]; out[5] = F + EK[16*ROUNDS+5]; - out[6] = G + EK[16*ROUNDS+6]; out[7] = H ^ EK[16*ROUNDS+7]; } /* * SAFER-SK Decryption */ -void SAFER_SK::dec(const byte in[], byte out[]) const +void SAFER_SK::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - byte A = in[0], B = in[1], C = in[2], D = in[3], - E = in[4], F = in[5], G = in[6], H = in[7]; - A ^= EK[16*ROUNDS+0]; B -= EK[16*ROUNDS+1]; C -= EK[16*ROUNDS+2]; - D ^= EK[16*ROUNDS+3]; E ^= EK[16*ROUNDS+4]; F -= EK[16*ROUNDS+5]; - G -= EK[16*ROUNDS+6]; H ^= EK[16*ROUNDS+7]; - for(s32bit j = 16*(ROUNDS-1); j >= 0; j -= 16) + for(u32bit i = 0; i != blocks; ++i) { - byte T = E; E = B; B = C; C = T; T = F; F = D; D = G; G = T; - A -= E; B -= F; C -= G; D -= H; E -= A; F -= B; G -= C; H -= D; - A -= C; E -= G; B -= D; F -= H; C -= A; G -= E; D -= B; H -= F; - A -= B; C -= D; E -= F; G -= H; B -= A; D -= C; F -= E; H -= G; - A = LOG[A - EK[j+8 ] + 256]; B = EXP[B ^ EK[j+9 ]]; - C = EXP[C ^ EK[j+10]]; D = LOG[D - EK[j+11] + 256]; - E = LOG[E - EK[j+12] + 256]; F = EXP[F ^ EK[j+13]]; - G = EXP[G ^ EK[j+14]]; H = LOG[H - EK[j+15] + 256]; - A ^= EK[j+0]; B -= EK[j+1]; C -= EK[j+2]; D ^= EK[j+3]; - E ^= EK[j+4]; F -= EK[j+5]; G -= EK[j+6]; H ^= EK[j+7]; + byte A = in[0], B = in[1], C = in[2], D = in[3], + E = in[4], F = in[5], G = in[6], H = in[7]; + + A ^= EK[16*ROUNDS+0]; B -= EK[16*ROUNDS+1]; C -= EK[16*ROUNDS+2]; + D ^= EK[16*ROUNDS+3]; E ^= EK[16*ROUNDS+4]; F -= EK[16*ROUNDS+5]; + G -= EK[16*ROUNDS+6]; H ^= EK[16*ROUNDS+7]; + + for(s32bit j = 16*(ROUNDS-1); j >= 0; j -= 16) + { + byte T = E; E = B; B = C; C = T; T = F; F = D; D = G; G = T; + A -= E; B -= F; C -= G; D -= H; E -= A; F -= B; G -= C; H -= D; + A -= C; E -= G; B -= D; F -= H; C -= A; G -= E; D -= B; H -= F; + A -= B; C -= D; E -= F; G -= H; B -= A; D -= C; F -= E; H -= G; + + A = LOG[A - EK[j+8 ] + 256]; B = EXP[B ^ EK[j+9 ]]; + C = EXP[C ^ EK[j+10]]; D = LOG[D - EK[j+11] + 256]; + E = LOG[E - EK[j+12] + 256]; F = EXP[F ^ EK[j+13]]; + G = EXP[G ^ EK[j+14]]; H = LOG[H - EK[j+15] + 256]; + + A ^= EK[j+0]; B -= EK[j+1]; C -= EK[j+2]; D ^= EK[j+3]; + E ^= EK[j+4]; F -= EK[j+5]; G -= EK[j+6]; H ^= EK[j+7]; + } + + out[0] = A; out[1] = B; out[2] = C; out[3] = D; + out[4] = E; out[5] = F; out[6] = G; out[7] = H; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[0] = A; out[1] = B; out[2] = C; out[3] = D; - out[4] = E; out[5] = F; out[6] = G; out[7] = H; } /* diff --git a/src/block/safer/safer_sk.h b/src/block/safer/safer_sk.h index e52c5837c..ae3d4f9a8 100644 --- a/src/block/safer/safer_sk.h +++ b/src/block/safer/safer_sk.h @@ -23,8 +23,8 @@ class BOTAN_DLL SAFER_SK : public BlockCipher BlockCipher* clone() const; SAFER_SK(u32bit); private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const byte EXP[256]; diff --git a/src/block/seed/seed.cpp b/src/block/seed/seed.cpp index b06a7cd77..378be16e4 100644 --- a/src/block/seed/seed.cpp +++ b/src/block/seed/seed.cpp @@ -22,69 +22,81 @@ u32bit SEED::G_FUNC::operator()(u32bit X) const /* * SEED Encryption */ -void SEED::enc(const byte in[], byte out[]) const +void SEED::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit B0 = load_be(in, 0); - u32bit B1 = load_be(in, 1); - u32bit B2 = load_be(in, 2); - u32bit B3 = load_be(in, 3); - - G_FUNC G; - - for(u32bit j = 0; j != 16; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - u32bit T0, T1; - - T0 = B2 ^ K[2*j]; - T1 = G(B2 ^ B3 ^ K[2*j+1]); - T0 = G(T1 + T0); - T1 = G(T1 + T0); - B1 ^= T1; - B0 ^= T0 + T1; - - T0 = B0 ^ K[2*j+2]; - T1 = G(B0 ^ B1 ^ K[2*j+3]); - T0 = G(T1 + T0); - T1 = G(T1 + T0); - B3 ^= T1; - B2 ^= T0 + T1; + u32bit B0 = load_be(in, 0); + u32bit B1 = load_be(in, 1); + u32bit B2 = load_be(in, 2); + u32bit B3 = load_be(in, 3); + + G_FUNC G; + + for(u32bit j = 0; j != 16; j += 2) + { + u32bit T0, T1; + + T0 = B2 ^ K[2*j]; + T1 = G(B2 ^ B3 ^ K[2*j+1]); + T0 = G(T1 + T0); + T1 = G(T1 + T0); + B1 ^= T1; + B0 ^= T0 + T1; + + T0 = B0 ^ K[2*j+2]; + T1 = G(B0 ^ B1 ^ K[2*j+3]); + T0 = G(T1 + T0); + T1 = G(T1 + T0); + B3 ^= T1; + B2 ^= T0 + T1; + } + + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - - store_be(out, B2, B3, B0, B1); } /* * SEED Decryption */ -void SEED::dec(const byte in[], byte out[]) const +void SEED::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit B0 = load_be(in, 0); - u32bit B1 = load_be(in, 1); - u32bit B2 = load_be(in, 2); - u32bit B3 = load_be(in, 3); - - G_FUNC G; - - for(u32bit j = 0; j != 16; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - u32bit T0, T1; - - T0 = B2 ^ K[30-2*j]; - T1 = G(B2 ^ B3 ^ K[31-2*j]); - T0 = G(T1 + T0); - T1 = G(T1 + T0); - B1 ^= T1; - B0 ^= T0 + T1; - - T0 = B0 ^ K[28-2*j]; - T1 = G(B0 ^ B1 ^ K[29-2*j]); - T0 = G(T1 + T0); - T1 = G(T1 + T0); - B3 ^= T1; - B2 ^= T0 + T1; + u32bit B0 = load_be(in, 0); + u32bit B1 = load_be(in, 1); + u32bit B2 = load_be(in, 2); + u32bit B3 = load_be(in, 3); + + G_FUNC G; + + for(u32bit j = 0; j != 16; j += 2) + { + u32bit T0, T1; + + T0 = B2 ^ K[30-2*j]; + T1 = G(B2 ^ B3 ^ K[31-2*j]); + T0 = G(T1 + T0); + T1 = G(T1 + T0); + B1 ^= T1; + B0 ^= T0 + T1; + + T0 = B0 ^ K[28-2*j]; + T1 = G(B0 ^ B1 ^ K[29-2*j]); + T0 = G(T1 + T0); + T1 = G(T1 + T0); + B3 ^= T1; + B2 ^= T0 + T1; + } + + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - - store_be(out, B2, B3, B0, B1); } /* diff --git a/src/block/seed/seed.h b/src/block/seed/seed.h index 54c25d580..0c9c2c8af 100644 --- a/src/block/seed/seed.h +++ b/src/block/seed/seed.h @@ -23,8 +23,8 @@ class BOTAN_DLL SEED : public BlockCipher BlockCipher* clone() const { return new SEED; } SEED() : BlockCipher(16, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); class G_FUNC diff --git a/src/block/serpent/serpent.cpp b/src/block/serpent/serpent.cpp index df7592fea..2fa27308f 100644 --- a/src/block/serpent/serpent.cpp +++ b/src/block/serpent/serpent.cpp @@ -243,93 +243,105 @@ inline void i_transform(u32bit& B0, u32bit& B1, u32bit& B2, u32bit& B3) /* * Serpent Encryption */ -void Serpent::enc(const byte in[], byte out[]) const +void Serpent::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit B0 = load_le(in, 0); - u32bit B1 = load_le(in, 1); - u32bit B2 = load_le(in, 2); - u32bit B3 = load_le(in, 3); - - key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 3,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 4,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(11,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(12,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(13,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(14,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(15,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(16,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(17,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(18,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(19,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(20,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(21,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(22,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(23,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(24,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(25,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(26,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(27,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(28,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(29,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); - - store_le(out, B0, B1, B2, B3); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit B0 = load_le(in, 0); + u32bit B1 = load_le(in, 1); + u32bit B2 = load_le(in, 2); + u32bit B3 = load_le(in, 3); + + key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + store_le(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Serpent Decryption */ -void Serpent::dec(const byte in[], byte out[]) const +void Serpent::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit B0 = load_le(in, 0); - u32bit B1 = load_le(in, 1); - u32bit B2 = load_le(in, 2); - u32bit B3 = load_le(in, 3); - - key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); - i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); - - store_le(out, B0, B1, B2, B3); + for(u32bit i = 0; i != blocks; ++i) + { + u32bit B0 = load_le(in, 0); + u32bit B1 = load_le(in, 1); + u32bit B2 = load_le(in, 2); + u32bit B3 = load_le(in, 3); + + key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + store_le(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/serpent/serpent.h b/src/block/serpent/serpent.h index 5b9be257f..9c9fb59f2 100644 --- a/src/block/serpent/serpent.h +++ b/src/block/serpent/serpent.h @@ -23,8 +23,8 @@ class BOTAN_DLL Serpent : public BlockCipher BlockCipher* clone() const { return new Serpent; } Serpent() : BlockCipher(16, 16, 32, 8) {} protected: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; diff --git a/src/block/serpent_ia32/serp_ia32.cpp b/src/block/serpent_ia32/serp_ia32.cpp index 37dd4e637..997bec2fc 100644 --- a/src/block/serpent_ia32/serp_ia32.cpp +++ b/src/block/serpent_ia32/serp_ia32.cpp @@ -21,17 +21,27 @@ void botan_serpent_ia32_key_schedule(u32bit[140]); /* * Serpent Encryption */ -void Serpent_IA32::enc(const byte in[], byte out[]) const +void Serpent_IA32::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - botan_serpent_ia32_encrypt(in, out, round_key); + for(u32bit i = 0; i != blocks; ++i) + { + botan_serpent_ia32_encrypt(in, out, round_key); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Serpent Decryption */ -void Serpent_IA32::dec(const byte in[], byte out[]) const +void Serpent_IA32::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - botan_serpent_ia32_decrypt(in, out, round_key); + for(u32bit i = 0; i != blocks; ++i) + { + botan_serpent_ia32_decrypt(in, out, round_key); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/serpent_ia32/serp_ia32.h b/src/block/serpent_ia32/serp_ia32.h index 565e9889d..2b4a95d3d 100644 --- a/src/block/serpent_ia32/serp_ia32.h +++ b/src/block/serpent_ia32/serp_ia32.h @@ -20,8 +20,8 @@ class BOTAN_DLL Serpent_IA32 : public Serpent public: BlockCipher* clone() const { return new Serpent_IA32; } private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); }; diff --git a/src/block/skipjack/skipjack.cpp b/src/block/skipjack/skipjack.cpp index f5ffc861e..6c308c0f8 100644 --- a/src/block/skipjack/skipjack.cpp +++ b/src/block/skipjack/skipjack.cpp @@ -13,51 +13,63 @@ namespace Botan { /* * Skipjack Encryption */ -void Skipjack::enc(const byte in[], byte out[]) const +void Skipjack::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit W1 = load_le(in, 3); - u16bit W2 = load_le(in, 2); - u16bit W3 = load_le(in, 1); - u16bit W4 = load_le(in, 0); + for(u32bit i = 0; i != blocks; ++i) + { + u16bit W1 = load_le(in, 3); + u16bit W2 = load_le(in, 2); + u16bit W3 = load_le(in, 1); + u16bit W4 = load_le(in, 0); - step_A(W1,W4, 1); step_A(W4,W3, 2); step_A(W3,W2, 3); step_A(W2,W1, 4); - step_A(W1,W4, 5); step_A(W4,W3, 6); step_A(W3,W2, 7); step_A(W2,W1, 8); + step_A(W1,W4, 1); step_A(W4,W3, 2); step_A(W3,W2, 3); step_A(W2,W1, 4); + step_A(W1,W4, 5); step_A(W4,W3, 6); step_A(W3,W2, 7); step_A(W2,W1, 8); - step_B(W1,W2, 9); step_B(W4,W1,10); step_B(W3,W4,11); step_B(W2,W3,12); - step_B(W1,W2,13); step_B(W4,W1,14); step_B(W3,W4,15); step_B(W2,W3,16); + step_B(W1,W2, 9); step_B(W4,W1,10); step_B(W3,W4,11); step_B(W2,W3,12); + step_B(W1,W2,13); step_B(W4,W1,14); step_B(W3,W4,15); step_B(W2,W3,16); - step_A(W1,W4,17); step_A(W4,W3,18); step_A(W3,W2,19); step_A(W2,W1,20); - step_A(W1,W4,21); step_A(W4,W3,22); step_A(W3,W2,23); step_A(W2,W1,24); + step_A(W1,W4,17); step_A(W4,W3,18); step_A(W3,W2,19); step_A(W2,W1,20); + step_A(W1,W4,21); step_A(W4,W3,22); step_A(W3,W2,23); step_A(W2,W1,24); - step_B(W1,W2,25); step_B(W4,W1,26); step_B(W3,W4,27); step_B(W2,W3,28); - step_B(W1,W2,29); step_B(W4,W1,30); step_B(W3,W4,31); step_B(W2,W3,32); + step_B(W1,W2,25); step_B(W4,W1,26); step_B(W3,W4,27); step_B(W2,W3,28); + step_B(W1,W2,29); step_B(W4,W1,30); step_B(W3,W4,31); step_B(W2,W3,32); - store_le(out, W4, W3, W2, W1); + store_le(out, W4, W3, W2, W1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Skipjack Decryption */ -void Skipjack::dec(const byte in[], byte out[]) const +void Skipjack::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u16bit W1 = load_le(in, 3); - u16bit W2 = load_le(in, 2); - u16bit W3 = load_le(in, 1); - u16bit W4 = load_le(in, 0); + for(u32bit i = 0; i != blocks; ++i) + { + u16bit W1 = load_le(in, 3); + u16bit W2 = load_le(in, 2); + u16bit W3 = load_le(in, 1); + u16bit W4 = load_le(in, 0); + + step_Bi(W2,W3,32); step_Bi(W3,W4,31); step_Bi(W4,W1,30); step_Bi(W1,W2,29); + step_Bi(W2,W3,28); step_Bi(W3,W4,27); step_Bi(W4,W1,26); step_Bi(W1,W2,25); - step_Bi(W2,W3,32); step_Bi(W3,W4,31); step_Bi(W4,W1,30); step_Bi(W1,W2,29); - step_Bi(W2,W3,28); step_Bi(W3,W4,27); step_Bi(W4,W1,26); step_Bi(W1,W2,25); + step_Ai(W1,W2,24); step_Ai(W2,W3,23); step_Ai(W3,W4,22); step_Ai(W4,W1,21); + step_Ai(W1,W2,20); step_Ai(W2,W3,19); step_Ai(W3,W4,18); step_Ai(W4,W1,17); - step_Ai(W1,W2,24); step_Ai(W2,W3,23); step_Ai(W3,W4,22); step_Ai(W4,W1,21); - step_Ai(W1,W2,20); step_Ai(W2,W3,19); step_Ai(W3,W4,18); step_Ai(W4,W1,17); + step_Bi(W2,W3,16); step_Bi(W3,W4,15); step_Bi(W4,W1,14); step_Bi(W1,W2,13); + step_Bi(W2,W3,12); step_Bi(W3,W4,11); step_Bi(W4,W1,10); step_Bi(W1,W2, 9); - step_Bi(W2,W3,16); step_Bi(W3,W4,15); step_Bi(W4,W1,14); step_Bi(W1,W2,13); - step_Bi(W2,W3,12); step_Bi(W3,W4,11); step_Bi(W4,W1,10); step_Bi(W1,W2, 9); + step_Ai(W1,W2, 8); step_Ai(W2,W3, 7); step_Ai(W3,W4, 6); step_Ai(W4,W1, 5); + step_Ai(W1,W2, 4); step_Ai(W2,W3, 3); step_Ai(W3,W4, 2); step_Ai(W4,W1, 1); - step_Ai(W1,W2, 8); step_Ai(W2,W3, 7); step_Ai(W3,W4, 6); step_Ai(W4,W1, 5); - step_Ai(W1,W2, 4); step_Ai(W2,W3, 3); step_Ai(W3,W4, 2); step_Ai(W4,W1, 1); + store_le(out, W4, W3, W2, W1); - store_le(out, W4, W3, W2, W1); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/skipjack/skipjack.h b/src/block/skipjack/skipjack.h index 231cd9c87..ec071dfe7 100644 --- a/src/block/skipjack/skipjack.h +++ b/src/block/skipjack/skipjack.h @@ -23,8 +23,8 @@ class BOTAN_DLL Skipjack : public BlockCipher BlockCipher* clone() const { return new Skipjack; } Skipjack() : BlockCipher(8, 10) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void step_A(u16bit&, u16bit&, u32bit) const; void step_B(u16bit&, u16bit&, u32bit) const; diff --git a/src/block/square/square.cpp b/src/block/square/square.cpp index cb226542d..fdd47d3b2 100644 --- a/src/block/square/square.cpp +++ b/src/block/square/square.cpp @@ -14,103 +14,123 @@ namespace Botan { /* * Square Encryption */ -void Square::enc(const byte in[], byte out[]) const +void Square::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit T0, T1, T2, T3, B0, B1, B2, B3; - B0 = TE0[in[ 0] ^ ME[ 0]] ^ TE1[in[ 4] ^ ME[ 4]] ^ - TE2[in[ 8] ^ ME[ 8]] ^ TE3[in[12] ^ ME[12]] ^ EK[0]; - B1 = TE0[in[ 1] ^ ME[ 1]] ^ TE1[in[ 5] ^ ME[ 5]] ^ - TE2[in[ 9] ^ ME[ 9]] ^ TE3[in[13] ^ ME[13]] ^ EK[1]; - B2 = TE0[in[ 2] ^ ME[ 2]] ^ TE1[in[ 6] ^ ME[ 6]] ^ - TE2[in[10] ^ ME[10]] ^ TE3[in[14] ^ ME[14]] ^ EK[2]; - B3 = TE0[in[ 3] ^ ME[ 3]] ^ TE1[in[ 7] ^ ME[ 7]] ^ - TE2[in[11] ^ ME[11]] ^ TE3[in[15] ^ ME[15]] ^ EK[3]; - for(u32bit j = 1; j != 7; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(0, B1)] ^ - TE2[get_byte(0, B2)] ^ TE3[get_byte(0, B3)] ^ EK[4*j+0]; - T1 = TE0[get_byte(1, B0)] ^ TE1[get_byte(1, B1)] ^ - TE2[get_byte(1, B2)] ^ TE3[get_byte(1, B3)] ^ EK[4*j+1]; - T2 = TE0[get_byte(2, B0)] ^ TE1[get_byte(2, B1)] ^ - TE2[get_byte(2, B2)] ^ TE3[get_byte(2, B3)] ^ EK[4*j+2]; - T3 = TE0[get_byte(3, B0)] ^ TE1[get_byte(3, B1)] ^ - TE2[get_byte(3, B2)] ^ TE3[get_byte(3, B3)] ^ EK[4*j+3]; - B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(0, T1)] ^ - TE2[get_byte(0, T2)] ^ TE3[get_byte(0, T3)] ^ EK[4*j+4]; - B1 = TE0[get_byte(1, T0)] ^ TE1[get_byte(1, T1)] ^ - TE2[get_byte(1, T2)] ^ TE3[get_byte(1, T3)] ^ EK[4*j+5]; - B2 = TE0[get_byte(2, T0)] ^ TE1[get_byte(2, T1)] ^ - TE2[get_byte(2, T2)] ^ TE3[get_byte(2, T3)] ^ EK[4*j+6]; - B3 = TE0[get_byte(3, T0)] ^ TE1[get_byte(3, T1)] ^ - TE2[get_byte(3, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4*j+7]; + u32bit T0, T1, T2, T3, B0, B1, B2, B3; + + B0 = TE0[in[ 0] ^ ME[ 0]] ^ TE1[in[ 4] ^ ME[ 4]] ^ + TE2[in[ 8] ^ ME[ 8]] ^ TE3[in[12] ^ ME[12]] ^ EK[0]; + B1 = TE0[in[ 1] ^ ME[ 1]] ^ TE1[in[ 5] ^ ME[ 5]] ^ + TE2[in[ 9] ^ ME[ 9]] ^ TE3[in[13] ^ ME[13]] ^ EK[1]; + B2 = TE0[in[ 2] ^ ME[ 2]] ^ TE1[in[ 6] ^ ME[ 6]] ^ + TE2[in[10] ^ ME[10]] ^ TE3[in[14] ^ ME[14]] ^ EK[2]; + B3 = TE0[in[ 3] ^ ME[ 3]] ^ TE1[in[ 7] ^ ME[ 7]] ^ + TE2[in[11] ^ ME[11]] ^ TE3[in[15] ^ ME[15]] ^ EK[3]; + + for(u32bit j = 1; j != 7; j += 2) + { + T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(0, B1)] ^ + TE2[get_byte(0, B2)] ^ TE3[get_byte(0, B3)] ^ EK[4*j+0]; + T1 = TE0[get_byte(1, B0)] ^ TE1[get_byte(1, B1)] ^ + TE2[get_byte(1, B2)] ^ TE3[get_byte(1, B3)] ^ EK[4*j+1]; + T2 = TE0[get_byte(2, B0)] ^ TE1[get_byte(2, B1)] ^ + TE2[get_byte(2, B2)] ^ TE3[get_byte(2, B3)] ^ EK[4*j+2]; + T3 = TE0[get_byte(3, B0)] ^ TE1[get_byte(3, B1)] ^ + TE2[get_byte(3, B2)] ^ TE3[get_byte(3, B3)] ^ EK[4*j+3]; + + B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(0, T1)] ^ + TE2[get_byte(0, T2)] ^ TE3[get_byte(0, T3)] ^ EK[4*j+4]; + B1 = TE0[get_byte(1, T0)] ^ TE1[get_byte(1, T1)] ^ + TE2[get_byte(1, T2)] ^ TE3[get_byte(1, T3)] ^ EK[4*j+5]; + B2 = TE0[get_byte(2, T0)] ^ TE1[get_byte(2, T1)] ^ + TE2[get_byte(2, T2)] ^ TE3[get_byte(2, T3)] ^ EK[4*j+6]; + B3 = TE0[get_byte(3, T0)] ^ TE1[get_byte(3, T1)] ^ + TE2[get_byte(3, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4*j+7]; + } + + out[ 0] = SE[get_byte(0, B0)] ^ ME[16]; + out[ 1] = SE[get_byte(0, B1)] ^ ME[17]; + out[ 2] = SE[get_byte(0, B2)] ^ ME[18]; + out[ 3] = SE[get_byte(0, B3)] ^ ME[19]; + out[ 4] = SE[get_byte(1, B0)] ^ ME[20]; + out[ 5] = SE[get_byte(1, B1)] ^ ME[21]; + out[ 6] = SE[get_byte(1, B2)] ^ ME[22]; + out[ 7] = SE[get_byte(1, B3)] ^ ME[23]; + out[ 8] = SE[get_byte(2, B0)] ^ ME[24]; + out[ 9] = SE[get_byte(2, B1)] ^ ME[25]; + out[10] = SE[get_byte(2, B2)] ^ ME[26]; + out[11] = SE[get_byte(2, B3)] ^ ME[27]; + out[12] = SE[get_byte(3, B0)] ^ ME[28]; + out[13] = SE[get_byte(3, B1)] ^ ME[29]; + out[14] = SE[get_byte(3, B2)] ^ ME[30]; + out[15] = SE[get_byte(3, B3)] ^ ME[31]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[ 0] = SE[get_byte(0, B0)] ^ ME[16]; - out[ 1] = SE[get_byte(0, B1)] ^ ME[17]; - out[ 2] = SE[get_byte(0, B2)] ^ ME[18]; - out[ 3] = SE[get_byte(0, B3)] ^ ME[19]; - out[ 4] = SE[get_byte(1, B0)] ^ ME[20]; - out[ 5] = SE[get_byte(1, B1)] ^ ME[21]; - out[ 6] = SE[get_byte(1, B2)] ^ ME[22]; - out[ 7] = SE[get_byte(1, B3)] ^ ME[23]; - out[ 8] = SE[get_byte(2, B0)] ^ ME[24]; - out[ 9] = SE[get_byte(2, B1)] ^ ME[25]; - out[10] = SE[get_byte(2, B2)] ^ ME[26]; - out[11] = SE[get_byte(2, B3)] ^ ME[27]; - out[12] = SE[get_byte(3, B0)] ^ ME[28]; - out[13] = SE[get_byte(3, B1)] ^ ME[29]; - out[14] = SE[get_byte(3, B2)] ^ ME[30]; - out[15] = SE[get_byte(3, B3)] ^ ME[31]; } /* * Square Decryption */ -void Square::dec(const byte in[], byte out[]) const +void Square::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit T0, T1, T2, T3, B0, B1, B2, B3; - B0 = TD0[in[ 0] ^ MD[ 0]] ^ TD1[in[ 4] ^ MD[ 4]] ^ - TD2[in[ 8] ^ MD[ 8]] ^ TD3[in[12] ^ MD[12]] ^ DK[0]; - B1 = TD0[in[ 1] ^ MD[ 1]] ^ TD1[in[ 5] ^ MD[ 5]] ^ - TD2[in[ 9] ^ MD[ 9]] ^ TD3[in[13] ^ MD[13]] ^ DK[1]; - B2 = TD0[in[ 2] ^ MD[ 2]] ^ TD1[in[ 6] ^ MD[ 6]] ^ - TD2[in[10] ^ MD[10]] ^ TD3[in[14] ^ MD[14]] ^ DK[2]; - B3 = TD0[in[ 3] ^ MD[ 3]] ^ TD1[in[ 7] ^ MD[ 7]] ^ - TD2[in[11] ^ MD[11]] ^ TD3[in[15] ^ MD[15]] ^ DK[3]; - for(u32bit j = 1; j != 7; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(0, B1)] ^ - TD2[get_byte(0, B2)] ^ TD3[get_byte(0, B3)] ^ DK[4*j+0]; - T1 = TD0[get_byte(1, B0)] ^ TD1[get_byte(1, B1)] ^ - TD2[get_byte(1, B2)] ^ TD3[get_byte(1, B3)] ^ DK[4*j+1]; - T2 = TD0[get_byte(2, B0)] ^ TD1[get_byte(2, B1)] ^ - TD2[get_byte(2, B2)] ^ TD3[get_byte(2, B3)] ^ DK[4*j+2]; - T3 = TD0[get_byte(3, B0)] ^ TD1[get_byte(3, B1)] ^ - TD2[get_byte(3, B2)] ^ TD3[get_byte(3, B3)] ^ DK[4*j+3]; - B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(0, T1)] ^ - TD2[get_byte(0, T2)] ^ TD3[get_byte(0, T3)] ^ DK[4*j+4]; - B1 = TD0[get_byte(1, T0)] ^ TD1[get_byte(1, T1)] ^ - TD2[get_byte(1, T2)] ^ TD3[get_byte(1, T3)] ^ DK[4*j+5]; - B2 = TD0[get_byte(2, T0)] ^ TD1[get_byte(2, T1)] ^ - TD2[get_byte(2, T2)] ^ TD3[get_byte(2, T3)] ^ DK[4*j+6]; - B3 = TD0[get_byte(3, T0)] ^ TD1[get_byte(3, T1)] ^ - TD2[get_byte(3, T2)] ^ TD3[get_byte(3, T3)] ^ DK[4*j+7]; + u32bit T0, T1, T2, T3, B0, B1, B2, B3; + + B0 = TD0[in[ 0] ^ MD[ 0]] ^ TD1[in[ 4] ^ MD[ 4]] ^ + TD2[in[ 8] ^ MD[ 8]] ^ TD3[in[12] ^ MD[12]] ^ DK[0]; + B1 = TD0[in[ 1] ^ MD[ 1]] ^ TD1[in[ 5] ^ MD[ 5]] ^ + TD2[in[ 9] ^ MD[ 9]] ^ TD3[in[13] ^ MD[13]] ^ DK[1]; + B2 = TD0[in[ 2] ^ MD[ 2]] ^ TD1[in[ 6] ^ MD[ 6]] ^ + TD2[in[10] ^ MD[10]] ^ TD3[in[14] ^ MD[14]] ^ DK[2]; + B3 = TD0[in[ 3] ^ MD[ 3]] ^ TD1[in[ 7] ^ MD[ 7]] ^ + TD2[in[11] ^ MD[11]] ^ TD3[in[15] ^ MD[15]] ^ DK[3]; + + for(u32bit j = 1; j != 7; j += 2) + { + T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(0, B1)] ^ + TD2[get_byte(0, B2)] ^ TD3[get_byte(0, B3)] ^ DK[4*j+0]; + T1 = TD0[get_byte(1, B0)] ^ TD1[get_byte(1, B1)] ^ + TD2[get_byte(1, B2)] ^ TD3[get_byte(1, B3)] ^ DK[4*j+1]; + T2 = TD0[get_byte(2, B0)] ^ TD1[get_byte(2, B1)] ^ + TD2[get_byte(2, B2)] ^ TD3[get_byte(2, B3)] ^ DK[4*j+2]; + T3 = TD0[get_byte(3, B0)] ^ TD1[get_byte(3, B1)] ^ + TD2[get_byte(3, B2)] ^ TD3[get_byte(3, B3)] ^ DK[4*j+3]; + + B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(0, T1)] ^ + TD2[get_byte(0, T2)] ^ TD3[get_byte(0, T3)] ^ DK[4*j+4]; + B1 = TD0[get_byte(1, T0)] ^ TD1[get_byte(1, T1)] ^ + TD2[get_byte(1, T2)] ^ TD3[get_byte(1, T3)] ^ DK[4*j+5]; + B2 = TD0[get_byte(2, T0)] ^ TD1[get_byte(2, T1)] ^ + TD2[get_byte(2, T2)] ^ TD3[get_byte(2, T3)] ^ DK[4*j+6]; + B3 = TD0[get_byte(3, T0)] ^ TD1[get_byte(3, T1)] ^ + TD2[get_byte(3, T2)] ^ TD3[get_byte(3, T3)] ^ DK[4*j+7]; + } + + out[ 0] = SD[get_byte(0, B0)] ^ MD[16]; + out[ 1] = SD[get_byte(0, B1)] ^ MD[17]; + out[ 2] = SD[get_byte(0, B2)] ^ MD[18]; + out[ 3] = SD[get_byte(0, B3)] ^ MD[19]; + out[ 4] = SD[get_byte(1, B0)] ^ MD[20]; + out[ 5] = SD[get_byte(1, B1)] ^ MD[21]; + out[ 6] = SD[get_byte(1, B2)] ^ MD[22]; + out[ 7] = SD[get_byte(1, B3)] ^ MD[23]; + out[ 8] = SD[get_byte(2, B0)] ^ MD[24]; + out[ 9] = SD[get_byte(2, B1)] ^ MD[25]; + out[10] = SD[get_byte(2, B2)] ^ MD[26]; + out[11] = SD[get_byte(2, B3)] ^ MD[27]; + out[12] = SD[get_byte(3, B0)] ^ MD[28]; + out[13] = SD[get_byte(3, B1)] ^ MD[29]; + out[14] = SD[get_byte(3, B2)] ^ MD[30]; + out[15] = SD[get_byte(3, B3)] ^ MD[31]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[ 0] = SD[get_byte(0, B0)] ^ MD[16]; - out[ 1] = SD[get_byte(0, B1)] ^ MD[17]; - out[ 2] = SD[get_byte(0, B2)] ^ MD[18]; - out[ 3] = SD[get_byte(0, B3)] ^ MD[19]; - out[ 4] = SD[get_byte(1, B0)] ^ MD[20]; - out[ 5] = SD[get_byte(1, B1)] ^ MD[21]; - out[ 6] = SD[get_byte(1, B2)] ^ MD[22]; - out[ 7] = SD[get_byte(1, B3)] ^ MD[23]; - out[ 8] = SD[get_byte(2, B0)] ^ MD[24]; - out[ 9] = SD[get_byte(2, B1)] ^ MD[25]; - out[10] = SD[get_byte(2, B2)] ^ MD[26]; - out[11] = SD[get_byte(2, B3)] ^ MD[27]; - out[12] = SD[get_byte(3, B0)] ^ MD[28]; - out[13] = SD[get_byte(3, B1)] ^ MD[29]; - out[14] = SD[get_byte(3, B2)] ^ MD[30]; - out[15] = SD[get_byte(3, B3)] ^ MD[31]; } /* diff --git a/src/block/square/square.h b/src/block/square/square.h index 94a1fc370..0de4c20bd 100644 --- a/src/block/square/square.h +++ b/src/block/square/square.h @@ -23,8 +23,8 @@ class BOTAN_DLL Square : public BlockCipher BlockCipher* clone() const { return new Square; } Square() : BlockCipher(16, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void transform(u32bit[4]); diff --git a/src/block/tea/tea.cpp b/src/block/tea/tea.cpp index 2b4212d9c..de30858da 100644 --- a/src/block/tea/tea.cpp +++ b/src/block/tea/tea.cpp @@ -13,37 +13,49 @@ namespace Botan { /* * TEA Encryption */ -void TEA::enc(const byte in[], byte out[]) const +void TEA::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0), R = load_be(in, 1); - - u32bit S = 0; - for(u32bit j = 0; j != 32; ++j) + for(u32bit i = 0; i != blocks; ++i) { - S += 0x9E3779B9; - L += ((R << 4) + K[0]) ^ (R + S) ^ ((R >> 5) + K[1]); - R += ((L << 4) + K[2]) ^ (L + S) ^ ((L >> 5) + K[3]); - } + u32bit L = load_be(in, 0), R = load_be(in, 1); + + u32bit S = 0; + for(u32bit j = 0; j != 32; ++j) + { + S += 0x9E3779B9; + L += ((R << 4) + K[0]) ^ (R + S) ^ ((R >> 5) + K[1]); + R += ((L << 4) + K[2]) ^ (L + S) ^ ((L >> 5) + K[3]); + } - store_be(out, L, R); + store_be(out, L, R); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * TEA Decryption */ -void TEA::dec(const byte in[], byte out[]) const +void TEA::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0), R = load_be(in, 1); - - u32bit S = 0xC6EF3720; - for(u32bit j = 0; j != 32; ++j) + for(u32bit i = 0; i != blocks; ++i) { - R -= ((L << 4) + K[2]) ^ (L + S) ^ ((L >> 5) + K[3]); - L -= ((R << 4) + K[0]) ^ (R + S) ^ ((R >> 5) + K[1]); - S -= 0x9E3779B9; - } + u32bit L = load_be(in, 0), R = load_be(in, 1); + + u32bit S = 0xC6EF3720; + for(u32bit j = 0; j != 32; ++j) + { + R -= ((L << 4) + K[2]) ^ (L + S) ^ ((L >> 5) + K[3]); + L -= ((R << 4) + K[0]) ^ (R + S) ^ ((R >> 5) + K[1]); + S -= 0x9E3779B9; + } - store_be(out, L, R); + store_be(out, L, R); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/tea/tea.h b/src/block/tea/tea.h index 8ddf3e330..141899e88 100644 --- a/src/block/tea/tea.h +++ b/src/block/tea/tea.h @@ -23,8 +23,8 @@ class BOTAN_DLL TEA : public BlockCipher BlockCipher* clone() const { return new TEA; } TEA() : BlockCipher(8, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K; }; diff --git a/src/block/twofish/twofish.cpp b/src/block/twofish/twofish.cpp index 9784b00a2..6a482a8f3 100644 --- a/src/block/twofish/twofish.cpp +++ b/src/block/twofish/twofish.cpp @@ -14,91 +14,103 @@ namespace Botan { /* * Twofish Encryption */ -void Twofish::enc(const byte in[], byte out[]) const +void Twofish::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0) ^ round_key[0]; - u32bit B = load_le(in, 1) ^ round_key[1]; - u32bit C = load_le(in, 2) ^ round_key[2]; - u32bit D = load_le(in, 3) ^ round_key[3]; - - for(u32bit j = 0; j != 16; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - u32bit X, Y; - - X = SBox0[get_byte(3, A)] ^ SBox1[get_byte(2, A)] ^ - SBox2[get_byte(1, A)] ^ SBox3[get_byte(0, A)]; - Y = SBox0[get_byte(0, B)] ^ SBox1[get_byte(3, B)] ^ - SBox2[get_byte(2, B)] ^ SBox3[get_byte(1, B)]; - X += Y; - Y += X + round_key[2*j + 9]; - X += round_key[2*j + 8]; - - C = rotate_right(C ^ X, 1); - D = rotate_left(D, 1) ^ Y; - - X = SBox0[get_byte(3, C)] ^ SBox1[get_byte(2, C)] ^ - SBox2[get_byte(1, C)] ^ SBox3[get_byte(0, C)]; - Y = SBox0[get_byte(0, D)] ^ SBox1[get_byte(3, D)] ^ - SBox2[get_byte(2, D)] ^ SBox3[get_byte(1, D)]; - X += Y; - Y += X + round_key[2*j + 11]; - X += round_key[2*j + 10]; - - A = rotate_right(A ^ X, 1); - B = rotate_left(B, 1) ^ Y; - } + u32bit A = load_le(in, 0) ^ round_key[0]; + u32bit B = load_le(in, 1) ^ round_key[1]; + u32bit C = load_le(in, 2) ^ round_key[2]; + u32bit D = load_le(in, 3) ^ round_key[3]; - C ^= round_key[4]; - D ^= round_key[5]; - A ^= round_key[6]; - B ^= round_key[7]; + for(u32bit j = 0; j != 16; j += 2) + { + u32bit X, Y; + + X = SBox0[get_byte(3, A)] ^ SBox1[get_byte(2, A)] ^ + SBox2[get_byte(1, A)] ^ SBox3[get_byte(0, A)]; + Y = SBox0[get_byte(0, B)] ^ SBox1[get_byte(3, B)] ^ + SBox2[get_byte(2, B)] ^ SBox3[get_byte(1, B)]; + X += Y; + Y += X + round_key[2*j + 9]; + X += round_key[2*j + 8]; + + C = rotate_right(C ^ X, 1); + D = rotate_left(D, 1) ^ Y; + + X = SBox0[get_byte(3, C)] ^ SBox1[get_byte(2, C)] ^ + SBox2[get_byte(1, C)] ^ SBox3[get_byte(0, C)]; + Y = SBox0[get_byte(0, D)] ^ SBox1[get_byte(3, D)] ^ + SBox2[get_byte(2, D)] ^ SBox3[get_byte(1, D)]; + X += Y; + Y += X + round_key[2*j + 11]; + X += round_key[2*j + 10]; + + A = rotate_right(A ^ X, 1); + B = rotate_left(B, 1) ^ Y; + } - store_le(out, C, D, A, B); + C ^= round_key[4]; + D ^= round_key[5]; + A ^= round_key[6]; + B ^= round_key[7]; + + store_le(out, C, D, A, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * Twofish Decryption */ -void Twofish::dec(const byte in[], byte out[]) const +void Twofish::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit A = load_le(in, 0) ^ round_key[4]; - u32bit B = load_le(in, 1) ^ round_key[5]; - u32bit C = load_le(in, 2) ^ round_key[6]; - u32bit D = load_le(in, 3) ^ round_key[7]; - - for(u32bit j = 0; j != 16; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - u32bit X, Y; - - X = SBox0[get_byte(3, A)] ^ SBox1[get_byte(2, A)] ^ - SBox2[get_byte(1, A)] ^ SBox3[get_byte(0, A)]; - Y = SBox0[get_byte(0, B)] ^ SBox1[get_byte(3, B)] ^ - SBox2[get_byte(2, B)] ^ SBox3[get_byte(1, B)]; - X += Y; - Y += X + round_key[39 - 2*j]; - X += round_key[38 - 2*j]; - - C = rotate_left(C, 1) ^ X; - D = rotate_right(D ^ Y, 1); - - X = SBox0[get_byte(3, C)] ^ SBox1[get_byte(2, C)] ^ - SBox2[get_byte(1, C)] ^ SBox3[get_byte(0, C)]; - Y = SBox0[get_byte(0, D)] ^ SBox1[get_byte(3, D)] ^ - SBox2[get_byte(2, D)] ^ SBox3[get_byte(1, D)]; - X += Y; - Y += X + round_key[37 - 2*j]; - X += round_key[36 - 2*j]; - - A = rotate_left(A, 1) ^ X; - B = rotate_right(B ^ Y, 1); - } + u32bit A = load_le(in, 0) ^ round_key[4]; + u32bit B = load_le(in, 1) ^ round_key[5]; + u32bit C = load_le(in, 2) ^ round_key[6]; + u32bit D = load_le(in, 3) ^ round_key[7]; - C ^= round_key[0]; - D ^= round_key[1]; - A ^= round_key[2]; - B ^= round_key[3]; + for(u32bit j = 0; j != 16; j += 2) + { + u32bit X, Y; + + X = SBox0[get_byte(3, A)] ^ SBox1[get_byte(2, A)] ^ + SBox2[get_byte(1, A)] ^ SBox3[get_byte(0, A)]; + Y = SBox0[get_byte(0, B)] ^ SBox1[get_byte(3, B)] ^ + SBox2[get_byte(2, B)] ^ SBox3[get_byte(1, B)]; + X += Y; + Y += X + round_key[39 - 2*j]; + X += round_key[38 - 2*j]; + + C = rotate_left(C, 1) ^ X; + D = rotate_right(D ^ Y, 1); + + X = SBox0[get_byte(3, C)] ^ SBox1[get_byte(2, C)] ^ + SBox2[get_byte(1, C)] ^ SBox3[get_byte(0, C)]; + Y = SBox0[get_byte(0, D)] ^ SBox1[get_byte(3, D)] ^ + SBox2[get_byte(2, D)] ^ SBox3[get_byte(1, D)]; + X += Y; + Y += X + round_key[37 - 2*j]; + X += round_key[36 - 2*j]; + + A = rotate_left(A, 1) ^ X; + B = rotate_right(B ^ Y, 1); + } - store_le(out, C, D, A, B); + C ^= round_key[0]; + D ^= round_key[1]; + A ^= round_key[2]; + B ^= round_key[3]; + + store_le(out, C, D, A, B); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/twofish/twofish.h b/src/block/twofish/twofish.h index 0640e32f8..640fb58ad 100644 --- a/src/block/twofish/twofish.h +++ b/src/block/twofish/twofish.h @@ -23,8 +23,8 @@ class BOTAN_DLL Twofish : public BlockCipher BlockCipher* clone() const { return new Twofish; } Twofish() : BlockCipher(16, 16, 32, 8) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void rs_mul(byte[4], byte, u32bit); diff --git a/src/block/xtea/xtea.cpp b/src/block/xtea/xtea.cpp index 5047f6594..0dba5f2be 100644 --- a/src/block/xtea/xtea.cpp +++ b/src/block/xtea/xtea.cpp @@ -14,33 +14,45 @@ namespace Botan { /* * XTEA Encryption */ -void XTEA::enc(const byte in[], byte out[]) const +void XTEA::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0), R = load_be(in, 1); - - for(u32bit j = 0; j != 32; ++j) + for(u32bit i = 0; i != blocks; ++i) { - L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*j]; - R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*j+1]; - } + u32bit L = load_be(in, 0), R = load_be(in, 1); + + for(u32bit j = 0; j != 32; ++j) + { + L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*j]; + R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*j+1]; + } - store_be(out, L, R); + store_be(out, L, R); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* * XTEA Decryption */ -void XTEA::dec(const byte in[], byte out[]) const +void XTEA::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit L = load_be(in, 0), R = load_be(in, 1); - - for(u32bit j = 0; j != 32; ++j) + for(u32bit i = 0; i != blocks; ++i) { - R -= (((L << 4) ^ (L >> 5)) + L) ^ EK[63 - 2*j]; - L -= (((R << 4) ^ (R >> 5)) + R) ^ EK[62 - 2*j]; - } + u32bit L = load_be(in, 0), R = load_be(in, 1); + + for(u32bit j = 0; j != 32; ++j) + { + R -= (((L << 4) ^ (L >> 5)) + L) ^ EK[63 - 2*j]; + L -= (((R << 4) ^ (R >> 5)) + R) ^ EK[62 - 2*j]; + } - store_be(out, L, R); + store_be(out, L, R); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } } /* diff --git a/src/block/xtea/xtea.h b/src/block/xtea/xtea.h index d9c6066cb..b50f487b4 100644 --- a/src/block/xtea/xtea.h +++ b/src/block/xtea/xtea.h @@ -23,8 +23,8 @@ class BOTAN_DLL XTEA : public BlockCipher BlockCipher* clone() const { return new XTEA; } XTEA() : BlockCipher(8, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK; }; diff --git a/src/modes/ctr/ctr.cpp b/src/modes/ctr/ctr.cpp index 9eb42ec5a..5c27ca63c 100644 --- a/src/modes/ctr/ctr.cpp +++ b/src/modes/ctr/ctr.cpp @@ -1,6 +1,6 @@ /* * CTR Mode -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -14,9 +14,13 @@ namespace Botan { /* * CTR-BE Constructor */ -CTR_BE::CTR_BE(BlockCipher* ciph) : - BlockCipherMode(ciph, "CTR-BE", ciph->BLOCK_SIZE, 1) +CTR_BE::CTR_BE(BlockCipher* ciph) : cipher(ciph) { + base_ptr = cipher; + position = 0; + + counter.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); + enc_buffer.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); } /* @@ -24,39 +28,86 @@ CTR_BE::CTR_BE(BlockCipher* ciph) : */ CTR_BE::CTR_BE(BlockCipher* ciph, const SymmetricKey& key, const InitializationVector& iv) : - BlockCipherMode(ciph, "CTR-BE", ciph->BLOCK_SIZE, 1) + cipher(ciph) { + base_ptr = cipher; + position = 0; + + counter.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); + enc_buffer.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); + set_key(key); set_iv(iv); } +/* +* CTR_BE Destructor +*/ +CTR_BE::~CTR_BE() + { + delete cipher; + } + +/* +* Return the name of this type +*/ +std::string CTR_BE::name() const + { + return ("CTR-BE/" + cipher->name()); + } + +/* +* Set CTR-BE IV +*/ +void CTR_BE::set_iv(const InitializationVector& iv) + { + if(iv.length() != cipher->BLOCK_SIZE) + throw Invalid_IV_Length(name(), iv.length()); + + enc_buffer.clear(); + position = 0; + + for(u32bit i = 0; i != CTR_BLOCKS_PARALLEL; ++i) + { + counter.copy(i*cipher->BLOCK_SIZE, iv.begin(), iv.length()); + + // FIXME: this is stupid + for(u32bit j = 0; j != i; ++j) + for(s32bit k = cipher->BLOCK_SIZE - 1; k >= 0; --k) + if(++counter[i*cipher->BLOCK_SIZE+k]) + break; + } + + cipher->encrypt_n(counter, enc_buffer, CTR_BLOCKS_PARALLEL); + } + /* * CTR-BE Encryption/Decryption */ void CTR_BE::write(const byte input[], u32bit length) { - u32bit copied = std::min(BLOCK_SIZE - position, length); - xor_buf(buffer + position, input, copied); - send(buffer + position, copied); + u32bit copied = std::min(enc_buffer.size() - position, length); + xor_buf(enc_buffer + position, input, copied); + send(enc_buffer + position, copied); input += copied; length -= copied; position += copied; - if(position == BLOCK_SIZE) + if(position == enc_buffer.size()) increment_counter(); - while(length >= BLOCK_SIZE) + while(length >= enc_buffer.size()) { - xor_buf(buffer, input, BLOCK_SIZE); - send(buffer, BLOCK_SIZE); + xor_buf(enc_buffer, input, enc_buffer.size()); + send(enc_buffer, enc_buffer.size()); - input += BLOCK_SIZE; - length -= BLOCK_SIZE; + input += enc_buffer.size(); + length -= enc_buffer.size(); increment_counter(); } - xor_buf(buffer + position, input, length); - send(buffer + position, length); + xor_buf(enc_buffer + position, input, length); + send(enc_buffer + position, length); position += length; } @@ -65,10 +116,25 @@ void CTR_BE::write(const byte input[], u32bit length) */ void CTR_BE::increment_counter() { - for(s32bit j = BLOCK_SIZE - 1; j >= 0; --j) - if(++state[j]) - break; - cipher->encrypt(state, buffer); + for(u32bit i = 0; i != CTR_BLOCKS_PARALLEL; ++i) + { + // FIXME: Can do it in a single loop + /* + for(u32bit j = 1; j != cipher->BLOCK_SIZE; ++j) + { + byte carry = 0; + byte z = counter[(i+1)*cipher->BLOCK_SIZE-1] + CTR_BLOCKS_PARALLEL; + + if( + */ + for(u32bit j = 0; j != CTR_BLOCKS_PARALLEL; ++j) + for(s32bit k = cipher->BLOCK_SIZE - 1; k >= 0; --k) + if(++counter[i*cipher->BLOCK_SIZE+k]) + break; + } + + cipher->encrypt_n(counter, enc_buffer, CTR_BLOCKS_PARALLEL); + position = 0; } diff --git a/src/modes/ctr/ctr.h b/src/modes/ctr/ctr.h index aa0db5761..3d509f02c 100644 --- a/src/modes/ctr/ctr.h +++ b/src/modes/ctr/ctr.h @@ -8,22 +8,33 @@ #ifndef BOTAN_COUNTER_MODE_H__ #define BOTAN_COUNTER_MODE_H__ -#include -#include +#include +#include namespace Botan { /* * CTR-BE Mode */ -class BOTAN_DLL CTR_BE : public BlockCipherMode +class BOTAN_DLL CTR_BE : public Keyed_Filter { public: + std::string name() const; + void set_iv(const InitializationVector&); + CTR_BE(BlockCipher*); CTR_BE(BlockCipher*, const SymmetricKey&, const InitializationVector&); + + ~CTR_BE(); private: + static const u32bit CTR_BLOCKS_PARALLEL = 8; + void write(const byte[], u32bit); void increment_counter(); + + BlockCipher* cipher; + SecureVector counter, enc_buffer; + u32bit position; }; } -- cgit v1.2.3 From 13d50de7b7675d798437c0d465acedd23e08b092 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 11 Aug 2009 17:12:00 +0000 Subject: Modify Keyed_Filter so it is a pure interface Modify ECB to use parallel encryption/decryption where possible Add toggles in build.h specifying how many blocks to process in parallel. Defaults to 8 blocks for all modes, which is sufficient that any likely parallelism can be extracted (via SIMD or concurrent execution) but not so much as to seem likely to cause cache problems (8*128 bits = 128 bytes, or two x86 cache lines) --- checks/validate.dat | 89 ++++++++++++++++++++ src/build-data/buildh.in | 6 ++ src/codec/openpgp/openpgp.cpp | 1 + src/filters/algo_filt.cpp | 10 +-- src/filters/basefilt.cpp | 21 ----- src/filters/basefilt.h | 33 -------- src/filters/filters.h | 42 +++++++++- src/filters/info.txt | 1 + src/filters/key_filt.h | 45 ++++++++++ src/modes/ctr/ctr.cpp | 30 ++++--- src/modes/ctr/ctr.h | 10 ++- src/modes/eax/eax.h | 2 +- src/modes/ecb/ecb.cpp | 191 ++++++++++++++++++++++++++++++++++-------- src/modes/ecb/ecb.h | 68 ++++++++------- src/modes/modebase.cpp | 2 +- src/modes/modebase.h | 9 +- src/modes/xts/xts.h | 8 +- 17 files changed, 418 insertions(+), 150 deletions(-) create mode 100644 src/filters/key_filt.h (limited to 'src') diff --git a/checks/validate.dat b/checks/validate.dat index b8566b62a..c4e0d0121 100644 --- a/checks/validate.dat +++ b/checks/validate.dat @@ -19472,7 +19472,20 @@ D261D6041824D259290EABD3E9132DB8:7E3B14847526572FF2AA5D7BD626B560:\ 01000000000000000000000000000000:07E5E5AD7097B849BADC2D5D803B7F6A:\ 0000000000000000000000000000000000000000000000000000000000000000 +1032547698BADCFEEFCDAB8967452301:D5BAA00A4BB9D8A7C981C8DC90D89D92:\ +FFEEDDCCBBAA99887766554433221100 +145F0B8B663176B95DCAB7E9DCD5CC24:1032547698BADCFEEFCDAB8967452301:\ +FFEEDDCCBBAA99887766554433221100 + +1032547698BADCFEEFCDAB8967452301:DA860842B720802BF404A4C71034879A:\ +8899AABBCCDDEEFFFFEEDDCCBBAA99887766554433221100 + +B2696BD0D98C17953E4239225D27202C:1032547698BADCFEEFCDAB8967452301:\ +8899AABBCCDDEEFFFFEEDDCCBBAA99887766554433221100 + +1032547698BADCFEEFCDAB8967452301:93DF9A3CAFE387BD999EEBE393A17FCA:\ +00112233445566778899AABBCCDDEEFFFFEEDDCCBBAA99887766554433221100 # Corrected test vectors, based on NIST's clarification of May 9, 2002 [Skipjack] @@ -22938,6 +22951,51 @@ B4ECC305C3DBD8E5:FBBEC8F5DBF4CEFD:1B5E23EBD915C1FEE59F57DD91AF7347 # The block cipher tests above are distinct from these ECB mode tests # for testing reasons. They could otherwise easily be CIPHER/ECB/NoPadding + +[AES/ECB/NoPadding] +D8F532538289EF7D06B506A4FD5BE9C9FD7A929E0FD917686D9520ED236A276D\ +69E63C821F9DE0BF23CF1D19C7374FD1C3139DE2E1BA4693C3E9D29D774C2FF4\ +69E63C821F9DE0BF23CF1D19C7374FD1C3139DE2E1BA4693C3E9D29D774C2FF4\ +D8F532538289EF7D06B506A4FD5BE9C9FD7A929E0FD917686D9520ED236A276D\ +D8F532538289EF7D06B506A4FD5BE9C9C3139DE2E1BA4693C3E9D29D774C2FF4:\ +FD7A929E0FD917686D9520ED236A276D69E63C821F9DE0BF23CF1D19C7374FD1\ +C3139DE2E1BA4693C3E9D29D774C2FF46BA2DCF84C0E7E4D75CB53AD11BA76D6\ +C3139DE2E1BA4693C3E9D29D774C2FF46BA2DCF84C0E7E4D75CB53AD11BA76D6\ +FD7A929E0FD917686D9520ED236A276D69E63C821F9DE0BF23CF1D19C7374FD1\ +FD7A929E0FD917686D9520ED236A276D6BA2DCF84C0E7E4D75CB53AD11BA76D6:\ +00010203050607080A0B0C0D0F101112 + +[Serpent/ECB/NoPadding] +D29D576FCEA3A3A7ED9099F29273D78E2D62A890CEA3A3A7ED9099F29273D78E\ +D29D576F315C5C58ED9099F29273D78E2D62A890315C5C58ED9099F29273D78E\ +D29D576FCEA3A3A7126F660D9273D78E2D62A890CEA3A3A7126F660D9273D78E\ +D29D576F315C5C58126F660D9273D78E2D62A890315C5C58126F660D9273D78E\ +D29D576FCEA3A3A7ED9099F26D8C28712D62A890CEA3A3A7ED9099F26D8C2871\ +D29D576F315C5C58ED9099F26D8C28712D62A890315C5C58ED9099F26D8C2871\ +D29D576FCEA3A3A7126F660D6D8C28712D62A890CEA3A3A7126F660D6D8C2871\ +D29D576F315C5C58126F660D6D8C28712D62A890315C5C58126F660D6D8C2871\ +AA26D561F567520E8AE47528C24C18D731A2193D9A97FED6922B17AAA6372B74\ +BE5DEBD559E303C9C92B174A5107BBFEB626D8F65EDCCDF3AEE475C8A1837722\ +41DDE7C1F1631F5FDED4F42746471BD651D238BA86176EFE39E4695AAEB73B52\ +EA5926CADAD8018962E469BA920CB8BF1EA9062E4D9CEDD5FAD4F4C7990367A4\ +B966E5C5D2277288C61B96A559CC84AFB6A6583C5AACFCD6212B0BD8AEF3C6A9\ +A11DDBD175639341052B0B384678D8D9352299B71DD880E29D1B96452DB86540:\ +B2288B968AE8B08648D1CE9606FD992D717EB02EB81A2E939D54ACA91087112D\ +0D809C5EE82F477EBA7B956DBB23463B0F0190D616F5294112FFB7884E8B37F9\ +41BA1B505386B7428B88338188F7E718A3348230BF5CFA552F88D22463D9703A\ +115351622E016BCA26918D17E13225F67EE4E3F2C46FE52ECBDA044C585717DC\ +563A8403FF5309D62370B1DCF5A11EDD2F7D73602B70CD2553E44C1D3F170126\ +155BBD9BE3A965B345E834718F651CEF6CC65E8C5C566E894817350F497816F1\ +EEFA51FC91FEBB6E9F8CB141CC0EB6AF3C6F8380CBD3C996167F2F0E90E71B75\ +6C87EB62A4975356B28DCBF6A64A0BD107206D48FE6DBE19D50314B90AC87B83\ +35706F9B26007071AD8105CFAA1C1E2FF7FEAE5CEC4D11477F24E6B200906870\ +3C0E29E2950F2AC2DACD63DEEB5C7EFA9FDB9F3B740563D5518287DC981FC9CB\ +46D4B5A5A86FEC08FE70D18297DCF51072DDBE038DA040EBB12C509F5940A212\ +DDEB59F02132BE4581FC23EABAA960D6341D9352E36DFD6E4EAF0F6F439BC8CE\ +73A9AB3164FF30350F2DC08E939A104D6DF0C2C28F8E2D44468A61278BB6B429\ +4DAE45AE0CAA032FC97CD4D8C57FB83BBA8AFCAE22070BC882D3A42B38A09E65:\ +00000000000000000000000000000000 + [DES/ECB/NoPadding] 059B5E0851CF143A:86A560F10EC6D85B:0113B970FD34F2CE 4E6F772069732074:3FA40E8A984D4815:0123456789ABCDEF @@ -24901,6 +24959,37 @@ E5C7CDDE872BF27C43E934008C389C0F683788499A7C05F662C16A27E4FCF277:\ 4E6F77206973207468652074696D6520666F7220616C6C20:\ 0123456789ABCDEF:1234567890ABCDEF +[Serpent/ECB/NoPadding] +B2288B968AE8B08648D1CE9606FD992D717EB02EB81A2E939D54ACA91087112D\ +0D809C5EE82F477EBA7B956DBB23463B0F0190D616F5294112FFB7884E8B37F9\ +41BA1B505386B7428B88338188F7E718A3348230BF5CFA552F88D22463D9703A\ +115351622E016BCA26918D17E13225F67EE4E3F2C46FE52ECBDA044C585717DC\ +563A8403FF5309D62370B1DCF5A11EDD2F7D73602B70CD2553E44C1D3F170126\ +155BBD9BE3A965B345E834718F651CEF6CC65E8C5C566E894817350F497816F1\ +EEFA51FC91FEBB6E9F8CB141CC0EB6AF3C6F8380CBD3C996167F2F0E90E71B75\ +6C87EB62A4975356B28DCBF6A64A0BD107206D48FE6DBE19D50314B90AC87B83\ +35706F9B26007071AD8105CFAA1C1E2FF7FEAE5CEC4D11477F24E6B200906870\ +3C0E29E2950F2AC2DACD63DEEB5C7EFA9FDB9F3B740563D5518287DC981FC9CB\ +46D4B5A5A86FEC08FE70D18297DCF51072DDBE038DA040EBB12C509F5940A212\ +DDEB59F02132BE4581FC23EABAA960D6341D9352E36DFD6E4EAF0F6F439BC8CE\ +73A9AB3164FF30350F2DC08E939A104D6DF0C2C28F8E2D44468A61278BB6B429\ +4DAE45AE0CAA032FC97CD4D8C57FB83BBA8AFCAE22070BC882D3A42B38A09E65:\ +D29D576FCEA3A3A7ED9099F29273D78E2D62A890CEA3A3A7ED9099F29273D78E\ +D29D576F315C5C58ED9099F29273D78E2D62A890315C5C58ED9099F29273D78E\ +D29D576FCEA3A3A7126F660D9273D78E2D62A890CEA3A3A7126F660D9273D78E\ +D29D576F315C5C58126F660D9273D78E2D62A890315C5C58126F660D9273D78E\ +D29D576FCEA3A3A7ED9099F26D8C28712D62A890CEA3A3A7ED9099F26D8C2871\ +D29D576F315C5C58ED9099F26D8C28712D62A890315C5C58ED9099F26D8C2871\ +D29D576FCEA3A3A7126F660D6D8C28712D62A890CEA3A3A7126F660D6D8C2871\ +D29D576F315C5C58126F660D6D8C28712D62A890315C5C58126F660D6D8C2871\ +AA26D561F567520E8AE47528C24C18D731A2193D9A97FED6922B17AAA6372B74\ +BE5DEBD559E303C9C92B174A5107BBFEB626D8F65EDCCDF3AEE475C8A1837722\ +41DDE7C1F1631F5FDED4F42746471BD651D238BA86176EFE39E4695AAEB73B52\ +EA5926CADAD8018962E469BA920CB8BF1EA9062E4D9CEDD5FAD4F4C7990367A4\ +B966E5C5D2277288C61B96A559CC84AFB6A6583C5AACFCD6212B0BD8AEF3C6A9\ +A11DDBD175639341052B0B384678D8D9352299B71DD880E29D1B96452DB86540:\ +00000000000000000000000000000000 + [RC5(8)/CBC/PKCS7] 7875DBF6738C64788F34C3C681C99695:FFFFFFFFFFFFFFFF:0102030405:0000000000000000 7875DBF6738C64787CB3F1DF34F948117FD1A023A5BBA217:\ diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in index e4ab0f44b..bd900d412 100644 --- a/src/build-data/buildh.in +++ b/src/build-data/buildh.in @@ -22,6 +22,12 @@ #define BOTAN_KARAT_SQR_THRESHOLD 32 #define BOTAN_PRIVATE_KEY_OP_BLINDING_BITS 64 +/* Toggles for parallel block cipher mode processing */ +#define BOTAN_PARALLEL_BLOCKS_ECB 8 +#define BOTAN_PARALLEL_BLOCKS_CTR 8 +#define BOTAN_PARALLEL_BLOCKS_EAX 8 +#define BOTAN_PARALLEL_BLOCKS_XTS 8 + /* PK key consistency checking toggles */ #define BOTAN_PUBLIC_KEY_STRONG_CHECKS_ON_LOAD 1 #define BOTAN_PRIVATE_KEY_STRONG_CHECKS_ON_LOAD 1 diff --git a/src/codec/openpgp/openpgp.cpp b/src/codec/openpgp/openpgp.cpp index 7f9cf5f9c..bfba828af 100644 --- a/src/codec/openpgp/openpgp.cpp +++ b/src/codec/openpgp/openpgp.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/src/filters/algo_filt.cpp b/src/filters/algo_filt.cpp index 23f7a20cf..3268276a6 100644 --- a/src/filters/algo_filt.cpp +++ b/src/filters/algo_filt.cpp @@ -18,7 +18,7 @@ StreamCipher_Filter::StreamCipher_Filter(const std::string& sc_name) : buffer(DEFAULT_BUFFERSIZE) { Algorithm_Factory& af = global_state().algorithm_factory(); - base_ptr = cipher = af.make_stream_cipher(sc_name); + cipher = af.make_stream_cipher(sc_name); } /* @@ -27,7 +27,7 @@ StreamCipher_Filter::StreamCipher_Filter(const std::string& sc_name) : StreamCipher_Filter::StreamCipher_Filter(StreamCipher* stream_cipher) : buffer(DEFAULT_BUFFERSIZE) { - base_ptr = cipher = stream_cipher; + cipher = stream_cipher; } /* @@ -38,7 +38,7 @@ StreamCipher_Filter::StreamCipher_Filter(const std::string& sc_name, buffer(DEFAULT_BUFFERSIZE) { Algorithm_Factory& af = global_state().algorithm_factory(); - base_ptr = cipher = af.make_stream_cipher(sc_name); + cipher = af.make_stream_cipher(sc_name); cipher->set_key(key); } @@ -95,7 +95,7 @@ MAC_Filter::MAC_Filter(const std::string& mac_name, u32bit len) : OUTPUT_LENGTH(len) { Algorithm_Factory& af = global_state().algorithm_factory(); - base_ptr = mac = af.make_mac(mac_name); + mac = af.make_mac(mac_name); } /* @@ -105,7 +105,7 @@ MAC_Filter::MAC_Filter(const std::string& mac_name, const SymmetricKey& key, u32bit len) : OUTPUT_LENGTH(len) { Algorithm_Factory& af = global_state().algorithm_factory(); - base_ptr = mac = af.make_mac(mac_name); + mac = af.make_mac(mac_name); mac->set_key(key); } diff --git a/src/filters/basefilt.cpp b/src/filters/basefilt.cpp index 02dbd8a73..c91a5aa62 100644 --- a/src/filters/basefilt.cpp +++ b/src/filters/basefilt.cpp @@ -50,25 +50,4 @@ Fork::Fork(Filter* filters[], u32bit count) set_next(filters, count); } -/* -* Set the algorithm key -*/ -void Keyed_Filter::set_key(const SymmetricKey& key) - { - if(base_ptr) - base_ptr->set_key(key); - else - throw Invalid_State("Keyed_Filter::set_key: No base algorithm set"); - } - -/* -* Check if a keylength is valid -*/ -bool Keyed_Filter::valid_keylength(u32bit n) const - { - if(base_ptr) - return base_ptr->valid_keylength(n); - throw Invalid_State("Keyed_Filter::valid_keylength: No base algorithm set"); - } - } diff --git a/src/filters/basefilt.h b/src/filters/basefilt.h index 75625abb0..348ad6fd3 100644 --- a/src/filters/basefilt.h +++ b/src/filters/basefilt.h @@ -9,7 +9,6 @@ #define BOTAN_BASEFILT_H__ #include -#include namespace Botan { @@ -62,38 +61,6 @@ class BOTAN_DLL Fork : public Fanout_Filter Fork(Filter* filter_arr[], u32bit length); }; -/** -* This class represents keyed filters, i.e. filters that have to be -* fed with a key in order to function. -*/ -class BOTAN_DLL Keyed_Filter : public Filter - { - public: - - /** - * Set the key of this filter. - * @param key the key to set - */ - virtual void set_key(const SymmetricKey& key); - - /** - * Set the initialization vector of this filter. - * @param iv the initialization vector to set - */ - virtual void set_iv(const InitializationVector&) {} - - /** - * Check whether a key length is valid for this filter. - * @param length the key length to be checked for validity - * @return true if the key length is valid, false otherwise - */ - virtual bool valid_keylength(u32bit length) const; - - Keyed_Filter() { base_ptr = 0; } - protected: - SymmetricAlgorithm* base_ptr; - }; - } #endif diff --git a/src/filters/filters.h b/src/filters/filters.h index 725651f7d..26de6e63c 100644 --- a/src/filters/filters.h +++ b/src/filters/filters.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include @@ -35,6 +35,13 @@ class BOTAN_DLL StreamCipher_Filter : public Keyed_Filter { public: + /** + * Write input data + * @param input data + * @param input_len length of input in bytes + */ + void write(const byte input[], u32bit input_len); + /** * Seek in the stream. * @param position the position to seek ahead @@ -53,7 +60,20 @@ class BOTAN_DLL StreamCipher_Filter : public Keyed_Filter * @param iv the initialization vector to set */ void set_iv(const InitializationVector& iv); - void write(const byte[], u32bit); + + /** + * Set the key of this filter. + * @param key the key to set + */ + void set_key(const SymmetricKey& key) { cipher->set_key(key); } + + /** + * Check whether a key length is valid for this filter. + * @param length the key length to be checked for validity + * @return true if the key length is valid, false otherwise + */ + bool valid_keylength(u32bit length) const + { return cipher->valid_keylength(length); } /** * Construct a stream cipher filter. @@ -125,6 +145,20 @@ class BOTAN_DLL MAC_Filter : public Keyed_Filter void write(const byte input[], u32bit len) { mac->update(input, len); } void end_msg(); + /** + * Set the key of this filter. + * @param key the key to set + */ + void set_key(const SymmetricKey& key) { mac->set_key(key); } + + /** + * Check whether a key length is valid for this filter. + * @param length the key length to be checked for validity + * @return true if the key length is valid, false otherwise + */ + bool valid_keylength(u32bit length) const + { return mac->valid_keylength(length); } + /** * Construct a MAC filter. The MAC key will be left empty. * @param mac the MAC to use @@ -136,7 +170,7 @@ class BOTAN_DLL MAC_Filter : public Keyed_Filter MAC_Filter(MessageAuthenticationCode* mac_obj, u32bit out_len = 0) : OUTPUT_LENGTH(out_len) { - base_ptr = mac = mac_obj; + mac = mac_obj; } /** @@ -152,7 +186,7 @@ class BOTAN_DLL MAC_Filter : public Keyed_Filter const SymmetricKey& key, u32bit out_len = 0) : OUTPUT_LENGTH(out_len) { - base_ptr = mac = mac_obj; + mac = mac_obj; mac->set_key(key); } diff --git a/src/filters/info.txt b/src/filters/info.txt index 79a92a9c5..fb8108659 100644 --- a/src/filters/info.txt +++ b/src/filters/info.txt @@ -17,6 +17,7 @@ data_src.h filter.cpp filter.h filters.h +key_filt.h out_buf.cpp out_buf.h pbe.h diff --git a/src/filters/key_filt.h b/src/filters/key_filt.h new file mode 100644 index 000000000..36af91f88 --- /dev/null +++ b/src/filters/key_filt.h @@ -0,0 +1,45 @@ +/* +* Keyed_Filter +* (C) 1999-2007 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_KEYED_FILTER_H__ +#define BOTAN_KEYED_FILTER_H__ + +#include +#include + +namespace Botan { + +/** +* This class represents keyed filters, i.e. filters that have to be +* fed with a key in order to function. +*/ +class BOTAN_DLL Keyed_Filter : public Filter + { + public: + /** + * Set the key of this filter. + * @param key the key to set + */ + virtual void set_key(const SymmetricKey& key) = 0; + + /** + * Set the initialization vector of this filter. + * @param iv the initialization vector to set + */ + virtual void set_iv(const InitializationVector&) {} + + /** + * Check whether a key length is valid for this filter. + * @param length the key length to be checked for validity + * @return true if the key length is valid, false otherwise + */ + virtual bool valid_keylength(u32bit length) const = 0; + }; + +} + +#endif diff --git a/src/modes/ctr/ctr.cpp b/src/modes/ctr/ctr.cpp index 5c27ca63c..a3476c474 100644 --- a/src/modes/ctr/ctr.cpp +++ b/src/modes/ctr/ctr.cpp @@ -11,16 +11,21 @@ namespace Botan { +namespace { + +const u32bit PARALLEL_BLOCKS = BOTAN_PARALLEL_BLOCKS_CTR; + +} + /* * CTR-BE Constructor */ CTR_BE::CTR_BE(BlockCipher* ciph) : cipher(ciph) { - base_ptr = cipher; position = 0; - counter.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); - enc_buffer.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); + counter.create(ciph->BLOCK_SIZE * PARALLEL_BLOCKS); + enc_buffer.create(ciph->BLOCK_SIZE * PARALLEL_BLOCKS); } /* @@ -30,13 +35,12 @@ CTR_BE::CTR_BE(BlockCipher* ciph, const SymmetricKey& key, const InitializationVector& iv) : cipher(ciph) { - base_ptr = cipher; position = 0; - counter.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); - enc_buffer.create(ciph->BLOCK_SIZE * CTR_BLOCKS_PARALLEL); + counter.create(ciph->BLOCK_SIZE * PARALLEL_BLOCKS); + enc_buffer.create(ciph->BLOCK_SIZE * PARALLEL_BLOCKS); - set_key(key); + cipher->set_key(key); set_iv(iv); } @@ -67,7 +71,7 @@ void CTR_BE::set_iv(const InitializationVector& iv) enc_buffer.clear(); position = 0; - for(u32bit i = 0; i != CTR_BLOCKS_PARALLEL; ++i) + for(u32bit i = 0; i != PARALLEL_BLOCKS; ++i) { counter.copy(i*cipher->BLOCK_SIZE, iv.begin(), iv.length()); @@ -78,7 +82,7 @@ void CTR_BE::set_iv(const InitializationVector& iv) break; } - cipher->encrypt_n(counter, enc_buffer, CTR_BLOCKS_PARALLEL); + cipher->encrypt_n(counter, enc_buffer, PARALLEL_BLOCKS); } /* @@ -116,24 +120,24 @@ void CTR_BE::write(const byte input[], u32bit length) */ void CTR_BE::increment_counter() { - for(u32bit i = 0; i != CTR_BLOCKS_PARALLEL; ++i) + for(u32bit i = 0; i != PARALLEL_BLOCKS; ++i) { // FIXME: Can do it in a single loop /* for(u32bit j = 1; j != cipher->BLOCK_SIZE; ++j) { byte carry = 0; - byte z = counter[(i+1)*cipher->BLOCK_SIZE-1] + CTR_BLOCKS_PARALLEL; + byte z = counter[(i+1)*cipher->BLOCK_SIZE-1] + PARALLEL_BLOCKS; if( */ - for(u32bit j = 0; j != CTR_BLOCKS_PARALLEL; ++j) + for(u32bit j = 0; j != PARALLEL_BLOCKS; ++j) for(s32bit k = cipher->BLOCK_SIZE - 1; k >= 0; --k) if(++counter[i*cipher->BLOCK_SIZE+k]) break; } - cipher->encrypt_n(counter, enc_buffer, CTR_BLOCKS_PARALLEL); + cipher->encrypt_n(counter, enc_buffer, PARALLEL_BLOCKS); position = 0; } diff --git a/src/modes/ctr/ctr.h b/src/modes/ctr/ctr.h index 3d509f02c..1948ffe48 100644 --- a/src/modes/ctr/ctr.h +++ b/src/modes/ctr/ctr.h @@ -8,7 +8,7 @@ #ifndef BOTAN_COUNTER_MODE_H__ #define BOTAN_COUNTER_MODE_H__ -#include +#include #include namespace Botan { @@ -20,15 +20,19 @@ class BOTAN_DLL CTR_BE : public Keyed_Filter { public: std::string name() const; + void set_iv(const InitializationVector&); + void set_key(const SymmetricKey& key) { cipher->set_key(key); } + + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + CTR_BE(BlockCipher*); CTR_BE(BlockCipher*, const SymmetricKey&, const InitializationVector&); ~CTR_BE(); private: - static const u32bit CTR_BLOCKS_PARALLEL = 8; - void write(const byte[], u32bit); void increment_counter(); diff --git a/src/modes/eax/eax.h b/src/modes/eax/eax.h index 1bb2e510d..f569f2ede 100644 --- a/src/modes/eax/eax.h +++ b/src/modes/eax/eax.h @@ -8,7 +8,7 @@ #ifndef BOTAN_EAX_H__ #define BOTAN_EAX_H__ -#include +#include #include #include diff --git a/src/modes/ecb/ecb.cpp b/src/modes/ecb/ecb.cpp index 8da0a4802..988a8b3f2 100644 --- a/src/modes/ecb/ecb.cpp +++ b/src/modes/ecb/ecb.cpp @@ -9,22 +9,60 @@ namespace Botan { +namespace { + +const u32bit PARALLEL_BLOCKS = BOTAN_PARALLEL_BLOCKS_ECB; + +} + /* -* Verify the IV is not set +* ECB_Encryption Constructor */ -bool ECB::valid_iv_size(u32bit iv_size) const +ECB_Encryption::ECB_Encryption(BlockCipher* ciph, + BlockCipherModePaddingMethod* pad) { - if(iv_size == 0) - return true; - return false; + cipher = ciph; + padder = pad; + + plaintext.create(cipher->BLOCK_SIZE); + ciphertext.create(cipher->BLOCK_SIZE * PARALLEL_BLOCKS); + + position = 0; + } + +/* +* ECB_Encryption Constructor +*/ +ECB_Encryption::ECB_Encryption(BlockCipher* ciph, + BlockCipherModePaddingMethod* pad, + const SymmetricKey& key) + { + cipher = ciph; + padder = pad; + + plaintext.create(cipher->BLOCK_SIZE); + ciphertext.create(cipher->BLOCK_SIZE * PARALLEL_BLOCKS); + + position = 0; + + cipher->set_key(key); + } + +/* +* ECB_Encryption Destructor +*/ +ECB_Encryption::~ECB_Encryption() + { + delete cipher; + delete padder; } /* * Return an ECB mode name */ -std::string ECB::name() const +std::string ECB_Encryption::name() const { - return (cipher->name() + "/" + mode_name + "/" + padder->name()); + return (cipher->name() + "/ECB/" + padder->name()); } /* @@ -32,23 +70,34 @@ std::string ECB::name() const */ void ECB_Encryption::write(const byte input[], u32bit length) { - buffer.copy(position, input, length); - if(position + length >= BLOCK_SIZE) + const u32bit BLOCK_SIZE = cipher->BLOCK_SIZE; + + if(position) { - cipher->encrypt(buffer); - send(buffer, BLOCK_SIZE); - input += (BLOCK_SIZE - position); - length -= (BLOCK_SIZE - position); - while(length >= BLOCK_SIZE) + plaintext.copy(position, input, length); + + if(position + length >= BLOCK_SIZE) { - cipher->encrypt(input, buffer); - send(buffer, BLOCK_SIZE); - input += BLOCK_SIZE; - length -= BLOCK_SIZE; + cipher->encrypt(plaintext, ciphertext); + send(ciphertext, BLOCK_SIZE); + input += (BLOCK_SIZE - position); + length -= (BLOCK_SIZE - position); + position = 0; } - buffer.copy(input, length); - position = 0; } + + while(length >= BLOCK_SIZE) + { + const u32bit to_proc = + std::min(length, ciphertext.size()) / BLOCK_SIZE; + + cipher->encrypt_n(input, ciphertext, to_proc); + send(ciphertext, to_proc * BLOCK_SIZE); + input += to_proc * BLOCK_SIZE; + length -= to_proc * BLOCK_SIZE; + } + + plaintext.copy(position, input, length); position += length; } @@ -57,6 +106,8 @@ void ECB_Encryption::write(const byte input[], u32bit length) */ void ECB_Encryption::end_msg() { + const u32bit BLOCK_SIZE = cipher->BLOCK_SIZE; + SecureVector padding(BLOCK_SIZE); padder->pad(padding, padding.size(), position); write(padding, padder->pad_bytes(BLOCK_SIZE, position)); @@ -64,28 +115,94 @@ void ECB_Encryption::end_msg() throw Encoding_Error(name() + ": Did not pad to full blocksize"); } +/* +* ECB_Decryption Constructor +*/ +ECB_Decryption::ECB_Decryption(BlockCipher* ciph, + BlockCipherModePaddingMethod* pad) + { + cipher = ciph; + padder = pad; + + ciphertext.create(cipher->BLOCK_SIZE); + plaintext.create(cipher->BLOCK_SIZE * PARALLEL_BLOCKS); + + position = 0; + } + +/* +* ECB_Decryption Constructor +*/ +ECB_Decryption::ECB_Decryption(BlockCipher* ciph, + BlockCipherModePaddingMethod* pad, + const SymmetricKey& key) + { + cipher = ciph; + padder = pad; + + ciphertext.create(cipher->BLOCK_SIZE); + plaintext.create(cipher->BLOCK_SIZE * PARALLEL_BLOCKS); + + position = 0; + + cipher->set_key(key); + } + +/* +* ECB_Decryption Destructor +*/ +ECB_Decryption::~ECB_Decryption() + { + delete cipher; + delete padder; + } + +/* +* Return an ECB mode name +*/ +std::string ECB_Decryption::name() const + { + return (cipher->name() + "/ECB/" + padder->name()); + } + /* * Decrypt in ECB mode */ void ECB_Decryption::write(const byte input[], u32bit length) { - buffer.copy(position, input, length); - if(position + length > BLOCK_SIZE) + const u32bit BLOCK_SIZE = cipher->BLOCK_SIZE; + + if(position) { - cipher->decrypt(buffer); - send(buffer, BLOCK_SIZE); - input += (BLOCK_SIZE - position); - length -= (BLOCK_SIZE - position); - while(length > BLOCK_SIZE) + ciphertext.copy(position, input, length); + + if(position + length > BLOCK_SIZE) { - cipher->decrypt(input, buffer); - send(buffer, BLOCK_SIZE); - input += BLOCK_SIZE; - length -= BLOCK_SIZE; + cipher->decrypt(ciphertext, plaintext); + send(plaintext, BLOCK_SIZE); + input += (BLOCK_SIZE - position); + length -= (BLOCK_SIZE - position); + position = 0; } - buffer.copy(input, length); - position = 0; } + + while(length > BLOCK_SIZE) + { + /* Always leave at least 1 byte left over, to ensure that (as long + as the input message actually is a multiple of the block size) + we will have the full final block left over in end_msg so as + to remove the padding + */ + const u32bit to_proc = + std::min(length - 1, plaintext.size()) / BLOCK_SIZE; + + cipher->decrypt_n(input, plaintext, to_proc); + send(plaintext, to_proc * BLOCK_SIZE); + input += to_proc * BLOCK_SIZE; + length -= to_proc * BLOCK_SIZE; + } + + ciphertext.copy(position, input, length); position += length; } @@ -94,11 +211,11 @@ void ECB_Decryption::write(const byte input[], u32bit length) */ void ECB_Decryption::end_msg() { - if(position != BLOCK_SIZE) + if(position != cipher->BLOCK_SIZE) throw Decoding_Error(name()); - cipher->decrypt(buffer); - send(buffer, padder->unpad(buffer, BLOCK_SIZE)); - state = buffer; + + cipher->decrypt(ciphertext); + send(ciphertext, padder->unpad(ciphertext, cipher->BLOCK_SIZE)); position = 0; } diff --git a/src/modes/ecb/ecb.h b/src/modes/ecb/ecb.h index 5230f9b14..ff9ea9635 100644 --- a/src/modes/ecb/ecb.h +++ b/src/modes/ecb/ecb.h @@ -1,6 +1,6 @@ /* * ECB Mode -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -8,64 +8,74 @@ #ifndef BOTAN_ECB_H__ #define BOTAN_ECB_H__ -#include -#include +#include #include +#include -namespace Botan { - -/* -* ECB -*/ -class BOTAN_DLL ECB : public BlockCipherMode - { - protected: - ECB(BlockCipher* ciph, BlockCipherModePaddingMethod* pad) : - BlockCipherMode(ciph, "ECB", 0), padder(pad) {} - ~ECB() { delete padder; } +#include - std::string name() const; - BlockCipherModePaddingMethod* padder; - private: - bool valid_iv_size(u32bit) const; - }; +namespace Botan { /* * ECB Encryption */ -class BOTAN_DLL ECB_Encryption : public ECB +class BOTAN_DLL ECB_Encryption : public Keyed_Filter { public: + std::string name() const; + + void set_key(const SymmetricKey& key) { cipher->set_key(key); } + + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + ECB_Encryption(BlockCipher* ciph, - BlockCipherModePaddingMethod* pad) : - ECB(ciph, pad) {} + BlockCipherModePaddingMethod* pad); ECB_Encryption(BlockCipher* ciph, BlockCipherModePaddingMethod* pad, - const SymmetricKey& key) : - ECB(ciph, pad) { set_key(key); } + const SymmetricKey& key); + + ~ECB_Encryption(); private: void write(const byte[], u32bit); void end_msg(); + + BlockCipher* cipher; + BlockCipherModePaddingMethod* padder; + SecureVector plaintext, ciphertext; + u32bit position; }; /* * ECB Decryption */ -class BOTAN_DLL ECB_Decryption : public ECB +class BOTAN_DLL ECB_Decryption : public Keyed_Filter { public: + std::string name() const; + + void set_key(const SymmetricKey& key) { cipher->set_key(key); } + + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + ECB_Decryption(BlockCipher* ciph, - BlockCipherModePaddingMethod* pad) : - ECB(ciph, pad) {} + BlockCipherModePaddingMethod* pad); ECB_Decryption(BlockCipher* ciph, BlockCipherModePaddingMethod* pad, - const SymmetricKey& key) : - ECB(ciph, pad) { set_key(key); } + const SymmetricKey& key); + + ~ECB_Decryption(); private: void write(const byte[], u32bit); void end_msg(); + + BlockCipher* cipher; + BlockCipherModePaddingMethod* padder; + SecureVector plaintext, ciphertext; + u32bit position; }; } diff --git a/src/modes/modebase.cpp b/src/modes/modebase.cpp index 8293acc54..b048862a4 100644 --- a/src/modes/modebase.cpp +++ b/src/modes/modebase.cpp @@ -19,7 +19,7 @@ BlockCipherMode::BlockCipherMode(BlockCipher* cipher_ptr, BLOCK_SIZE(cipher_ptr->BLOCK_SIZE), BUFFER_SIZE(buf_mult * BLOCK_SIZE), IV_METHOD(iv_meth), mode_name(cipher_mode_name) { - base_ptr = cipher = cipher_ptr; + cipher = cipher_ptr; buffer.create(BUFFER_SIZE); state.create(iv_size); position = 0; diff --git a/src/modes/modebase.h b/src/modes/modebase.h index 173fde58c..4a15524b6 100644 --- a/src/modes/modebase.h +++ b/src/modes/modebase.h @@ -8,7 +8,7 @@ #ifndef BOTAN_MODEBASE_H__ #define BOTAN_MODEBASE_H__ -#include +#include #include namespace Botan { @@ -21,12 +21,17 @@ class BOTAN_DLL BlockCipherMode : public Keyed_Filter public: std::string name() const; + void set_iv(const InitializationVector&); + void set_key(const SymmetricKey& key) { cipher->set_key(key); } + + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + BlockCipherMode(BlockCipher*, const std::string&, u32bit, u32bit = 0, u32bit = 1); virtual ~BlockCipherMode() { delete cipher; } protected: - void set_iv(const InitializationVector&); const u32bit BLOCK_SIZE, BUFFER_SIZE, IV_METHOD; const std::string mode_name; BlockCipher* cipher; diff --git a/src/modes/xts/xts.h b/src/modes/xts/xts.h index 01558175b..9badd3666 100644 --- a/src/modes/xts/xts.h +++ b/src/modes/xts/xts.h @@ -8,7 +8,7 @@ #ifndef BOTAN_XTS_H__ #define BOTAN_XTS_H__ -#include +#include #include namespace Botan { @@ -22,6 +22,9 @@ class BOTAN_DLL XTS_Encryption : public Keyed_Filter void set_key(const SymmetricKey& key); void set_iv(const InitializationVector& iv); + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + std::string name() const; XTS_Encryption(BlockCipher* ciph); @@ -52,6 +55,9 @@ class BOTAN_DLL XTS_Decryption : public Keyed_Filter void set_key(const SymmetricKey& key); void set_iv(const InitializationVector& iv); + bool valid_keylength(u32bit key_len) const + { return cipher->valid_keylength(key_len); } + std::string name() const; XTS_Decryption(BlockCipher* ciph); -- cgit v1.2.3 From c8c3d7f6eecd753aa87a882b1458346682e606db Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 11 Aug 2009 19:34:50 +0000 Subject: Make encrypt_n public for all BlockCipher implementations - unlike the enc/dec functions it replaces, these are public interfaces. Add the first bits of a SSE2 implementation of Serpent. Currently incomplete. --- src/block/aes/aes.h | 6 +- src/block/blowfish/blowfish.h | 6 +- src/block/cast/cast128.h | 6 +- src/block/cast/cast256.h | 6 +- src/block/des/des.h | 12 ++- src/block/des/desx.h | 6 +- src/block/gost_28147/gost_28147.h | 5 +- src/block/idea/idea.h | 6 +- src/block/kasumi/kasumi.h | 5 +- src/block/lion/lion.h | 5 +- src/block/lubyrack/lubyrack.h | 6 +- src/block/mars/mars.h | 6 +- src/block/misty1/misty1.h | 6 +- src/block/noekeon/noekeon.h | 6 +- src/block/rc2/rc2.h | 6 +- src/block/rc5/rc5.h | 6 +- src/block/rc6/rc6.h | 6 +- src/block/safer/safer_sk.h | 7 +- src/block/seed/seed.h | 6 +- src/block/serpent/serpent.h | 5 +- src/block/serpent_ia32/serp_ia32.h | 5 +- src/block/serpent_sse2/info.txt | 27 ++++++ src/block/serpent_sse2/serp_sse2.cpp | 170 +++++++++++++++++++++++++++++++++++ src/block/serpent_sse2/serp_sse2.h | 29 ++++++ src/block/skipjack/skipjack.h | 7 +- src/block/square/square.h | 6 +- src/block/tea/tea.h | 6 +- src/block/twofish/twofish.h | 6 +- src/block/xtea/xtea.h | 6 +- src/engine/sse2_eng/eng_sse2.cpp | 23 ++++- src/engine/sse2_eng/eng_sse2.h | 5 +- 31 files changed, 355 insertions(+), 58 deletions(-) create mode 100644 src/block/serpent_sse2/info.txt create mode 100644 src/block/serpent_sse2/serp_sse2.cpp create mode 100644 src/block/serpent_sse2/serp_sse2.h (limited to 'src') diff --git a/src/block/aes/aes.h b/src/block/aes/aes.h index 940e11a48..768bb09e7 100644 --- a/src/block/aes/aes.h +++ b/src/block/aes/aes.h @@ -18,14 +18,16 @@ namespace Botan { class BOTAN_DLL AES : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "AES"; } BlockCipher* clone() const { return new AES; } + AES() : BlockCipher(16, 16, 32, 8) { ROUNDS = 14; } AES(u32bit); private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static u32bit S(u32bit); diff --git a/src/block/blowfish/blowfish.h b/src/block/blowfish/blowfish.h index 3623c2087..345c1ce49 100644 --- a/src/block/blowfish/blowfish.h +++ b/src/block/blowfish/blowfish.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL Blowfish : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "Blowfish"; } BlockCipher* clone() const { return new Blowfish; } + Blowfish() : BlockCipher(8, 1, 56) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void generate_sbox(u32bit[], u32bit, u32bit&, u32bit&) const; diff --git a/src/block/cast/cast128.h b/src/block/cast/cast128.h index 092ee824b..864a4e47e 100644 --- a/src/block/cast/cast128.h +++ b/src/block/cast/cast128.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL CAST_128 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { MK.clear(); RK.clear(); } std::string name() const { return "CAST-128"; } BlockCipher* clone() const { return new CAST_128; } + CAST_128() : BlockCipher(8, 11, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void key_schedule(u32bit[16], u32bit[4]); diff --git a/src/block/cast/cast256.h b/src/block/cast/cast256.h index ea80df65d..1be7fa9cf 100644 --- a/src/block/cast/cast256.h +++ b/src/block/cast/cast256.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL CAST_256 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { MK.clear(); RK.clear(); } std::string name() const { return "CAST-256"; } BlockCipher* clone() const { return new CAST_256; } + CAST_256() : BlockCipher(16, 4, 32, 4) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const u32bit KEY_MASK[192]; diff --git a/src/block/des/des.h b/src/block/des/des.h index 39d1ac404..856aaf60c 100644 --- a/src/block/des/des.h +++ b/src/block/des/des.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL DES : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { round_key.clear(); } std::string name() const { return "DES"; } BlockCipher* clone() const { return new DES; } + DES() : BlockCipher(8, 8) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; @@ -36,13 +38,15 @@ class BOTAN_DLL DES : public BlockCipher class BOTAN_DLL TripleDES : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { round_key.clear(); } std::string name() const { return "TripleDES"; } BlockCipher* clone() const { return new TripleDES; } + TripleDES() : BlockCipher(8, 16, 24, 8) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; diff --git a/src/block/des/desx.h b/src/block/des/desx.h index d71335071..d22895296 100644 --- a/src/block/des/desx.h +++ b/src/block/des/desx.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL DESX : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { des.clear(); K1.clear(); K2.clear(); } std::string name() const { return "DESX"; } BlockCipher* clone() const { return new DESX; } + DESX() : BlockCipher(8, 24) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K1, K2; DES des; diff --git a/src/block/gost_28147/gost_28147.h b/src/block/gost_28147/gost_28147.h index 825e106a2..18c1d0a29 100644 --- a/src/block/gost_28147/gost_28147.h +++ b/src/block/gost_28147/gost_28147.h @@ -44,6 +44,9 @@ class GOST_28147_89_Params class BOTAN_DLL GOST_28147_89 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); } std::string name() const { return "GOST-28147-89"; } @@ -54,8 +57,6 @@ class BOTAN_DLL GOST_28147_89 : public BlockCipher GOST_28147_89(const SecureBuffer& other_SBOX) : BlockCipher(8, 32), SBOX(other_SBOX) {} - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer SBOX; diff --git a/src/block/idea/idea.h b/src/block/idea/idea.h index a3384b667..59484531b 100644 --- a/src/block/idea/idea.h +++ b/src/block/idea/idea.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL IDEA : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); DK.clear(); } std::string name() const { return "IDEA"; } BlockCipher* clone() const { return new IDEA; } + IDEA() : BlockCipher(8, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK, DK; }; diff --git a/src/block/kasumi/kasumi.h b/src/block/kasumi/kasumi.h index 3b18e675b..0f5a5d182 100644 --- a/src/block/kasumi/kasumi.h +++ b/src/block/kasumi/kasumi.h @@ -18,14 +18,15 @@ namespace Botan { class BOTAN_DLL KASUMI : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); } std::string name() const { return "KASUMI"; } BlockCipher* clone() const { return new KASUMI; } KASUMI() : BlockCipher(8, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK; diff --git a/src/block/lion/lion.h b/src/block/lion/lion.h index d9f933846..d421771d6 100644 --- a/src/block/lion/lion.h +++ b/src/block/lion/lion.h @@ -20,6 +20,9 @@ namespace Botan { class BOTAN_DLL Lion : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const; BlockCipher* clone() const; @@ -27,8 +30,6 @@ class BOTAN_DLL Lion : public BlockCipher Lion(HashFunction*, StreamCipher*, u32bit); ~Lion() { delete hash; delete cipher; } private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); const u32bit LEFT_SIZE, RIGHT_SIZE; diff --git a/src/block/lubyrack/lubyrack.h b/src/block/lubyrack/lubyrack.h index 1e83748a6..940b34603 100644 --- a/src/block/lubyrack/lubyrack.h +++ b/src/block/lubyrack/lubyrack.h @@ -19,6 +19,9 @@ namespace Botan { class BOTAN_DLL LubyRackoff : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const; BlockCipher* clone() const; @@ -26,9 +29,8 @@ class BOTAN_DLL LubyRackoff : public BlockCipher LubyRackoff(HashFunction* hash); ~LubyRackoff() { delete hash; } private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); + HashFunction* hash; SecureVector K1, K2; }; diff --git a/src/block/mars/mars.h b/src/block/mars/mars.h index b3d74b179..7d0bfe4fa 100644 --- a/src/block/mars/mars.h +++ b/src/block/mars/mars.h @@ -15,13 +15,15 @@ namespace Botan { class BOTAN_DLL MARS : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); } std::string name() const { return "MARS"; } BlockCipher* clone() const { return new MARS; } + MARS() : BlockCipher(16, 16, 32, 4) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void encrypt_round(u32bit&, u32bit&, u32bit&, u32bit&, u32bit) const; diff --git a/src/block/misty1/misty1.h b/src/block/misty1/misty1.h index 14b7c8e45..8db6881de 100644 --- a/src/block/misty1/misty1.h +++ b/src/block/misty1/misty1.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL MISTY1 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); DK.clear(); } std::string name() const { return "MISTY1"; } BlockCipher* clone() const { return new MISTY1; } + MISTY1(u32bit = 8); private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK, DK; diff --git a/src/block/noekeon/noekeon.h b/src/block/noekeon/noekeon.h index b4f3980d7..37b24fb7d 100644 --- a/src/block/noekeon/noekeon.h +++ b/src/block/noekeon/noekeon.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL Noekeon : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "Noekeon"; } BlockCipher* clone() const { return new Noekeon; } + Noekeon() : BlockCipher(16, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const byte RC[17]; diff --git a/src/block/rc2/rc2.h b/src/block/rc2/rc2.h index 0d94d69a2..db623b385 100644 --- a/src/block/rc2/rc2.h +++ b/src/block/rc2/rc2.h @@ -18,15 +18,17 @@ namespace Botan { class BOTAN_DLL RC2 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + static byte EKB_code(u32bit); void clear() throw() { K.clear(); } std::string name() const { return "RC2"; } BlockCipher* clone() const { return new RC2; } + RC2() : BlockCipher(8, 1, 32) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K; diff --git a/src/block/rc5/rc5.h b/src/block/rc5/rc5.h index 1816994dc..ff9204710 100644 --- a/src/block/rc5/rc5.h +++ b/src/block/rc5/rc5.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL RC5 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { S.clear(); } std::string name() const; BlockCipher* clone() const { return new RC5(ROUNDS); } + RC5(u32bit); private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureVector S; const u32bit ROUNDS; diff --git a/src/block/rc6/rc6.h b/src/block/rc6/rc6.h index f634ebcd9..5171006f5 100644 --- a/src/block/rc6/rc6.h +++ b/src/block/rc6/rc6.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL RC6 : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { S.clear(); } std::string name() const { return "RC6"; } BlockCipher* clone() const { return new RC6; } + RC6() : BlockCipher(16, 1, 32) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer S; diff --git a/src/block/safer/safer_sk.h b/src/block/safer/safer_sk.h index ae3d4f9a8..4d17bba51 100644 --- a/src/block/safer/safer_sk.h +++ b/src/block/safer/safer_sk.h @@ -18,19 +18,22 @@ namespace Botan { class BOTAN_DLL SAFER_SK : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); } std::string name() const; BlockCipher* clone() const; + SAFER_SK(u32bit); private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static const byte EXP[256]; static const byte LOG[512]; static const byte BIAS[208]; static const byte KEY_INDEX[208]; + SecureVector EK; const u32bit ROUNDS; }; diff --git a/src/block/seed/seed.h b/src/block/seed/seed.h index 0c9c2c8af..5a5a512e7 100644 --- a/src/block/seed/seed.h +++ b/src/block/seed/seed.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL SEED : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { K.clear(); } std::string name() const { return "SEED"; } BlockCipher* clone() const { return new SEED; } + SEED() : BlockCipher(16, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); class G_FUNC diff --git a/src/block/serpent/serpent.h b/src/block/serpent/serpent.h index 9c9fb59f2..d919c3008 100644 --- a/src/block/serpent/serpent.h +++ b/src/block/serpent/serpent.h @@ -18,13 +18,14 @@ namespace Botan { class BOTAN_DLL Serpent : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { round_key.clear(); } std::string name() const { return "Serpent"; } BlockCipher* clone() const { return new Serpent; } Serpent() : BlockCipher(16, 16, 32, 8) {} protected: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer round_key; diff --git a/src/block/serpent_ia32/serp_ia32.h b/src/block/serpent_ia32/serp_ia32.h index 2b4a95d3d..dc6beaf13 100644 --- a/src/block/serpent_ia32/serp_ia32.h +++ b/src/block/serpent_ia32/serp_ia32.h @@ -18,10 +18,11 @@ namespace Botan { class BOTAN_DLL Serpent_IA32 : public Serpent { public: - BlockCipher* clone() const { return new Serpent_IA32; } - private: void encrypt_n(const byte in[], byte out[], u32bit blocks) const; void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + + BlockCipher* clone() const { return new Serpent_IA32; } + private: void key_schedule(const byte[], u32bit); }; diff --git a/src/block/serpent_sse2/info.txt b/src/block/serpent_sse2/info.txt new file mode 100644 index 000000000..ad8323f53 --- /dev/null +++ b/src/block/serpent_sse2/info.txt @@ -0,0 +1,27 @@ +realname "Serpent (SSE2)" + +define SERPENT_SSE2 + +load_on auto + + +serp_sse2.cpp +serp_sse2.h + + + +pentium-m +pentium4 +prescott +amd64 + + + +gcc +icc + + + +serpent +sse2_eng + diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp new file mode 100644 index 000000000..759f3e1d6 --- /dev/null +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -0,0 +1,170 @@ +/* +* Serpent (SSE2) +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include + +namespace Botan { + +namespace { + +#define SBoxE1(b0, b1, b2, b3, b4) \ + do { \ + b3 = _mm_xor_si128(b3, b0); \ + b4 = b1; \ + b1 = _mm_and_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b2); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = _mm_or_si128(b0, b3); \ + b0 = _mm_xor_si128(b0, b4); \ + b4 = _mm_xor_si128(b4, b3); \ + b3 = _mm_xor_si128(b3, b2); \ + b2 = _mm_or_si128(b2, b1); \ + b2 = _mm_xor_si128(b2, b4); \ + b4 = _mm_andnot_si128(b4, all_ones); \ + b4 = _mm_or_si128(b4, b1); \ + b1 = _mm_xor_si128(b1, b3); \ + b1 = _mm_xor_si128(b1, b4); \ + b3 = _mm_or_si128(b3, b0); \ + b1 = _mm_xor_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b3); \ + b3 = b0; b0 = b1; b1 = b4; \ + } while(0); + +#define rotate_left_m128(vec, rot) \ + _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot)) + +#define key_xor(round, b0, b1, b2, b3) \ + do { \ + __m128i key = _mm_loadu_si128(keys + round); \ + b0 = _mm_xor_si128(b0, _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0))); \ + b1 = _mm_xor_si128(b1, _mm_shuffle_epi32(key, _MM_SHUFFLE(1,1,1,1))); \ + b2 = _mm_xor_si128(b2, _mm_shuffle_epi32(key, _MM_SHUFFLE(2,2,2,2))); \ + b3 = _mm_xor_si128(b3, _mm_shuffle_epi32(key, _MM_SHUFFLE(3,3,3,3))); \ + } while(0); + +#define transform(b0, b1, b2, b3) \ + do \ + { \ + b0 = rotate_left_m128(b0, 13); \ + b2 = rotate_left_m128(b2, 3); \ + b1 = _mm_xor_si128(b1, _mm_xor_si128(b0, b2)); \ + b3 = _mm_xor_si128(b3, _mm_xor_si128(b2, _mm_slli_epi32(b0, 3))); \ + b1 = rotate_left_m128(b1, 1); \ + b3 = rotate_left_m128(b3, 7); \ + b0 = _mm_xor_si128(b0, _mm_xor_si128(b1, b3)); \ + b2 = _mm_xor_si128(b2, _mm_xor_si128(b3, _mm_slli_epi32(b1, 7))); \ + b0 = rotate_left_m128(b0, 5); \ + b2 = rotate_left_m128(b2, 22); \ + } while(0); + +void print_simd(const char* name, __m128i vec) + { + union { __m128i v; int32_t ints[4]; } u = { vec }; + + printf("%s: ", name); + for(u32bit i = 0; i != 4; ++i) + printf("%08X ", u.ints[i]); + printf("\n"); + } + +void serpent_encrypt_4(const byte in[64], + byte out[64], + const u32bit keys_32[132]) + { + const __m128i* keys = (const __m128i*)(keys_32); + + /* + FIXME: figure out a fast way to do this with 4 loads with + _mm_loadu_si128 plus shuffle/interleave ops + */ + union { __m128i v; u32bit u32[4]; } convert; + + convert.u32[0] = load_le(in, 0); + convert.u32[1] = load_le(in, 4); + convert.u32[2] = load_le(in, 8); + convert.u32[3] = load_le(in, 12); + + __m128i b0 = convert.v; + + convert.u32[0] = load_le(in, 1); + convert.u32[1] = load_le(in, 5); + convert.u32[2] = load_le(in, 9); + convert.u32[3] = load_le(in, 13); + + __m128i b1 = convert.v; + + convert.u32[0] = load_le(in, 2); + convert.u32[1] = load_le(in, 6); + convert.u32[2] = load_le(in, 10); + convert.u32[3] = load_le(in, 14); + + __m128i b2 = convert.v; + + convert.u32[0] = load_le(in, 3); + convert.u32[1] = load_le(in, 7); + convert.u32[2] = load_le(in, 11); + convert.u32[3] = load_le(in, 15); + + + __m128i b3 = convert.v; + + __m128i b4; // temp + + const __m128i all_ones = _mm_set1_epi8(0xFF); + + key_xor(0, b0, b1, b2, b3); + SBoxE1(b0, b1, b2, b3, b4); + transform(b0, b1, b2, b3); + + key_xor(b0, b1, b2, b3, 1); + + print_simd("b0", b0); + print_simd("b1", b1); + print_simd("b2", b2); + print_simd("b3", b3); + } + +} + +/* +* Serpent Encryption +*/ +void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const + { + while(blocks >= 4) + { + serpent_encrypt_4(in, out, this->round_key); + //Serpent::encrypt_n(in, out, 4); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + + for(u32bit i = 0; i != blocks; ++i) + { + Serpent::encrypt_n(in, out, 1); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* Serpent Decryption +*/ +void Serpent_SSE2::decrypt_n(const byte in[], byte out[], u32bit blocks) const + { + for(u32bit i = 0; i != blocks; ++i) + { + Serpent::decrypt_n(in, out, 1); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +} diff --git a/src/block/serpent_sse2/serp_sse2.h b/src/block/serpent_sse2/serp_sse2.h new file mode 100644 index 000000000..f1e5c2028 --- /dev/null +++ b/src/block/serpent_sse2/serp_sse2.h @@ -0,0 +1,29 @@ +/* +* Serpent (SSE2) +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_SERPENT_SSE2_H__ +#define BOTAN_SERPENT_SSE2_H__ + +#include + +namespace Botan { + +/* +* Serpent +*/ +class BOTAN_DLL Serpent_SSE2 : public Serpent + { + public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + + BlockCipher* clone() const { return new Serpent_SSE2; } + }; + +} + +#endif diff --git a/src/block/skipjack/skipjack.h b/src/block/skipjack/skipjack.h index ec071dfe7..f12032f36 100644 --- a/src/block/skipjack/skipjack.h +++ b/src/block/skipjack/skipjack.h @@ -18,18 +18,21 @@ namespace Botan { class BOTAN_DLL Skipjack : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "Skipjack"; } BlockCipher* clone() const { return new Skipjack; } + Skipjack() : BlockCipher(8, 10) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); void step_A(u16bit&, u16bit&, u32bit) const; void step_B(u16bit&, u16bit&, u32bit) const; void step_Ai(u16bit&, u16bit&, u32bit) const; void step_Bi(u16bit&, u16bit&, u32bit) const; + SecureBuffer FTABLE[10]; }; diff --git a/src/block/square/square.h b/src/block/square/square.h index 0de4c20bd..5d9cfc78c 100644 --- a/src/block/square/square.h +++ b/src/block/square/square.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL Square : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "Square"; } BlockCipher* clone() const { return new Square; } + Square() : BlockCipher(16, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void transform(u32bit[4]); diff --git a/src/block/tea/tea.h b/src/block/tea/tea.h index 141899e88..825a051aa 100644 --- a/src/block/tea/tea.h +++ b/src/block/tea/tea.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL TEA : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { K.clear(); } std::string name() const { return "TEA"; } BlockCipher* clone() const { return new TEA; } + TEA() : BlockCipher(8, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer K; }; diff --git a/src/block/twofish/twofish.h b/src/block/twofish/twofish.h index 640fb58ad..87b9aa626 100644 --- a/src/block/twofish/twofish.h +++ b/src/block/twofish/twofish.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL Twofish : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw(); std::string name() const { return "Twofish"; } BlockCipher* clone() const { return new Twofish; } + Twofish() : BlockCipher(16, 16, 32, 8) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void rs_mul(byte[4], byte, u32bit); diff --git a/src/block/xtea/xtea.h b/src/block/xtea/xtea.h index b50f487b4..de265818d 100644 --- a/src/block/xtea/xtea.h +++ b/src/block/xtea/xtea.h @@ -18,13 +18,15 @@ namespace Botan { class BOTAN_DLL XTEA : public BlockCipher { public: + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; + void clear() throw() { EK.clear(); } std::string name() const { return "XTEA"; } BlockCipher* clone() const { return new XTEA; } + XTEA() : BlockCipher(8, 16) {} private: - void encrypt_n(const byte in[], byte out[], u32bit blocks) const; - void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); SecureBuffer EK; }; diff --git a/src/engine/sse2_eng/eng_sse2.cpp b/src/engine/sse2_eng/eng_sse2.cpp index c738b3d96..9f68a070e 100644 --- a/src/engine/sse2_eng/eng_sse2.cpp +++ b/src/engine/sse2_eng/eng_sse2.cpp @@ -1,6 +1,6 @@ /** * SSE2 Assembly Engine -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -11,10 +11,27 @@ #include #endif +#if defined(BOTAN_HAS_SERPENT_SSE2) + #include +#endif + namespace Botan { -HashFunction* SSE2_Assembler_Engine::find_hash(const SCAN_Name& request, - Algorithm_Factory&) const +BlockCipher* +SSE2_Assembler_Engine::find_block_cipher(const SCAN_Name& request, + Algorithm_Factory&) const + { +#if defined(BOTAN_HAS_SERPENT_SSE2) + if(request.algo_name() == "Serpent") + return new Serpent_SSE2; +#endif + + return 0; + } + +HashFunction* +SSE2_Assembler_Engine::find_hash(const SCAN_Name& request, + Algorithm_Factory&) const { #if defined(BOTAN_HAS_SHA1_SSE2) if(request.algo_name() == "SHA-160") diff --git a/src/engine/sse2_eng/eng_sse2.h b/src/engine/sse2_eng/eng_sse2.h index 129697e8f..c6b0ce889 100644 --- a/src/engine/sse2_eng/eng_sse2.h +++ b/src/engine/sse2_eng/eng_sse2.h @@ -1,6 +1,6 @@ /** * SSE2 Assembly Engine -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2009 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -17,6 +17,9 @@ class BOTAN_DLL SSE2_Assembler_Engine : public Engine public: std::string provider_name() const { return "sse2"; } private: + BlockCipher* find_block_cipher(const SCAN_Name&, + Algorithm_Factory&) const; + HashFunction* find_hash(const SCAN_Name& reqeust, Algorithm_Factory&) const; }; -- cgit v1.2.3 From 285d350ed62d2714592a27bf577832a92cb6902f Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 12:23:16 +0000 Subject: Add full 4-way SSE2 Serpent encryption. Load/store operations are via unions and can be made much faster using interleave operations I think. Currently ~2.5x faster in ECB or CTR mode on a Core2, which isn't too bad. --- src/block/serpent_sse2/info.txt | 1 + src/block/serpent_sse2/serp_sse2.cpp | 111 +++++++++------- src/block/serpent_sse2/serp_sse2_sbox.h | 225 ++++++++++++++++++++++++++++++++ 3 files changed, 290 insertions(+), 47 deletions(-) create mode 100644 src/block/serpent_sse2/serp_sse2_sbox.h (limited to 'src') diff --git a/src/block/serpent_sse2/info.txt b/src/block/serpent_sse2/info.txt index ad8323f53..09733e98f 100644 --- a/src/block/serpent_sse2/info.txt +++ b/src/block/serpent_sse2/info.txt @@ -7,6 +7,7 @@ load_on auto serp_sse2.cpp serp_sse2.h +serp_sse2_sbox.h diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp index 759f3e1d6..3e78f0bac 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -6,6 +6,7 @@ */ #include +#include #include #include @@ -13,29 +14,6 @@ namespace Botan { namespace { -#define SBoxE1(b0, b1, b2, b3, b4) \ - do { \ - b3 = _mm_xor_si128(b3, b0); \ - b4 = b1; \ - b1 = _mm_and_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b2); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = _mm_or_si128(b0, b3); \ - b0 = _mm_xor_si128(b0, b4); \ - b4 = _mm_xor_si128(b4, b3); \ - b3 = _mm_xor_si128(b3, b2); \ - b2 = _mm_or_si128(b2, b1); \ - b2 = _mm_xor_si128(b2, b4); \ - b4 = _mm_andnot_si128(b4, all_ones); \ - b4 = _mm_or_si128(b4, b1); \ - b1 = _mm_xor_si128(b1, b3); \ - b1 = _mm_xor_si128(b1, b4); \ - b3 = _mm_or_si128(b3, b0); \ - b1 = _mm_xor_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b3); \ - b3 = b0; b0 = b1; b1 = b4; \ - } while(0); - #define rotate_left_m128(vec, rot) \ _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot)) @@ -89,45 +67,84 @@ void serpent_encrypt_4(const byte in[64], convert.u32[1] = load_le(in, 4); convert.u32[2] = load_le(in, 8); convert.u32[3] = load_le(in, 12); - - __m128i b0 = convert.v; + __m128i B0 = convert.v; convert.u32[0] = load_le(in, 1); convert.u32[1] = load_le(in, 5); convert.u32[2] = load_le(in, 9); convert.u32[3] = load_le(in, 13); - - __m128i b1 = convert.v; + __m128i B1 = convert.v; convert.u32[0] = load_le(in, 2); convert.u32[1] = load_le(in, 6); convert.u32[2] = load_le(in, 10); convert.u32[3] = load_le(in, 14); - - __m128i b2 = convert.v; + __m128i B2 = convert.v; convert.u32[0] = load_le(in, 3); convert.u32[1] = load_le(in, 7); convert.u32[2] = load_le(in, 11); convert.u32[3] = load_le(in, 15); - - - __m128i b3 = convert.v; - - __m128i b4; // temp - - const __m128i all_ones = _mm_set1_epi8(0xFF); - - key_xor(0, b0, b1, b2, b3); - SBoxE1(b0, b1, b2, b3, b4); - transform(b0, b1, b2, b3); - - key_xor(b0, b1, b2, b3, 1); - - print_simd("b0", b0); - print_simd("b1", b1); - print_simd("b2", b2); - print_simd("b3", b3); + __m128i B3 = convert.v; + + key_xor(0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(3,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(4,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + // FIXME: figure out how to do this fast + union { __m128i v; u32bit u32[4]; } convert_B0; + union { __m128i v; u32bit u32[4]; } convert_B1; + union { __m128i v; u32bit u32[4]; } convert_B2; + union { __m128i v; u32bit u32[4]; } convert_B3; + convert_B0.v = B0; + convert_B1.v = B1; + convert_B2.v = B2; + convert_B3.v = B3; + store_le(out, + convert_B0.u32[0], convert_B1.u32[0], + convert_B2.u32[0], convert_B3.u32[0]); + + store_le(out + 16, + convert_B0.u32[1], convert_B1.u32[1], + convert_B2.u32[1], convert_B3.u32[1]); + + store_le(out + 32, + convert_B0.u32[2], convert_B1.u32[2], + convert_B2.u32[2], convert_B3.u32[2]); + + store_le(out + 48, + convert_B0.u32[3], convert_B1.u32[3], + convert_B2.u32[3], convert_B3.u32[3]); } } diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h new file mode 100644 index 000000000..2c4d9d9cb --- /dev/null +++ b/src/block/serpent_sse2/serp_sse2_sbox.h @@ -0,0 +1,225 @@ +/* +* Serpent Sboxes in SSE2 form +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef SERPENT_SSE2_SBOXES_H__ +#define SERPENT_SSE2_SBOXES_H__ + +#define SBoxE1(b0, b1, b2, b3) \ + do { \ + b3 = _mm_xor_si128(b3, b0); \ + __m128i b4 = b1; \ + b1 = _mm_and_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b2); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = _mm_or_si128(b0, b3); \ + b0 = _mm_xor_si128(b0, b4); \ + b4 = _mm_xor_si128(b4, b3); \ + b3 = _mm_xor_si128(b3, b2); \ + b2 = _mm_or_si128(b2, b1); \ + b2 = _mm_xor_si128(b2, b4); \ + b4 = _mm_andnot_si128(b4, _mm_set1_epi8(0xFF)); \ + b4 = _mm_or_si128(b4, b1); \ + b1 = _mm_xor_si128(b1, b3); \ + b1 = _mm_xor_si128(b1, b4); \ + b3 = _mm_or_si128(b3, b0); \ + b1 = _mm_xor_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b3); \ + b3 = b0; \ + b0 = b1; \ + b1 = b4; \ + } while(0); + +#define SBoxE2(b0, b1, b2, b3) \ + do \ + { \ + b0 = _mm_andnot_si128(b0, _mm_set1_epi8(0xFF)); \ + b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ + __m128i b4 = b0; \ + b0 = _mm_and_si128(b0, b1); \ + b2 = _mm_xor_si128(b2, b0); \ + b0 = _mm_or_si128(b0, b3); \ + b3 = _mm_xor_si128(b3, b2); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = _mm_xor_si128(b0, b4); \ + b4 = _mm_or_si128(b4, b1); \ + b1 = _mm_xor_si128(b1, b3); \ + b2 = _mm_or_si128(b2, b0); \ + b2 = _mm_and_si128(b2, b4); \ + b0 = _mm_xor_si128(b0, b1); \ + b1 = _mm_and_si128(b1, b2); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = _mm_and_si128(b0, b2); \ + b4 = _mm_xor_si128(b4, b0); \ + b0 = b2; \ + b2 = b3; \ + b3 = b1; \ + b1 = b4; \ + } while(0); + +#define SBoxE3(b0, b1, b2, b3) \ + do \ + { \ + __m128i b4 = b0; \ + b0 = _mm_and_si128(b0, b2); \ + b0 = _mm_xor_si128(b0, b3); \ + b2 = _mm_xor_si128(b2, b1); \ + b2 = _mm_xor_si128(b2, b0); \ + b3 = _mm_or_si128(b3, b4); \ + b3 = _mm_xor_si128(b3, b1); \ + b4 = _mm_xor_si128(b4, b2); \ + b1 = b3; \ + b3 = _mm_or_si128(b3, b4); \ + b3 = _mm_xor_si128(b3, b0); \ + b0 = _mm_and_si128(b0, b1); \ + b4 = _mm_xor_si128(b4, b0); \ + b1 = _mm_xor_si128(b1, b3); \ + b1 = _mm_xor_si128(b1, b4); \ + b4 = _mm_andnot_si128(b4, _mm_set1_epi8(0xFF)); \ + b0 = b2; \ + b2 = b1; \ + b1 = b3; \ + b3 = b4; \ + } while(0); + +#define SBoxE4(b0, b1, b2, b3) \ + do \ + { \ + __m128i b4 = b0; \ + b0 = _mm_or_si128(b0, b3); \ + b3 = _mm_xor_si128(b3, b1); \ + b1 = _mm_and_si128(b1, b4); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = _mm_xor_si128(b2, b3); \ + b3 = _mm_and_si128(b3, b0); \ + b4 = _mm_or_si128(b4, b1); \ + b3 = _mm_xor_si128(b3, b4); \ + b0 = _mm_xor_si128(b0, b1); \ + b4 = _mm_and_si128(b4, b0); \ + b1 = _mm_xor_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b2); \ + b1 = _mm_or_si128(b1, b0); \ + b1 = _mm_xor_si128(b1, b2); \ + b0 = _mm_xor_si128(b0, b3); \ + b2 = b1; \ + b1 = _mm_or_si128(b1, b3); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = b1; \ + b1 = b2; \ + b2 = b3; \ + b3 = b4; \ + } while(0); + +#define SBoxE5(b0, b1, b2, b3) \ + do \ + { \ + b1 = _mm_xor_si128(b1, b3); \ + b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ + b2 = _mm_xor_si128(b2, b3); \ + b3 = _mm_xor_si128(b3, b0); \ + __m128i b4 = b1; \ + b1 = _mm_and_si128(b1, b3); \ + b1 = _mm_xor_si128(b1, b2); \ + b4 = _mm_xor_si128(b4, b3); \ + b0 = _mm_xor_si128(b0, b4); \ + b2 = _mm_and_si128(b2, b4); \ + b2 = _mm_xor_si128(b2, b0); \ + b0 = _mm_and_si128(b0, b1); \ + b3 = _mm_xor_si128(b3, b0); \ + b4 = _mm_or_si128(b4, b1); \ + b4 = _mm_xor_si128(b4, b0); \ + b0 = _mm_or_si128(b0, b3); \ + b0 = _mm_xor_si128(b0, b2); \ + b2 = _mm_and_si128(b2, b3); \ + b0 = _mm_andnot_si128(b0, _mm_set1_epi8(0xFF)); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = b0; \ + b0 = b1; \ + b1 = b4; \ + } while(0); + +#define SBoxE6(b0, b1, b2, b3) \ + do \ + { \ + b0 = _mm_xor_si128(b0, b1); \ + b1 = _mm_xor_si128(b1, b3); \ + b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ + __m128i b4 = b1; \ + b1 = _mm_and_si128(b1, b0); \ + b2 = _mm_xor_si128(b2, b3); \ + b1 = _mm_xor_si128(b1, b2); \ + b2 = _mm_or_si128(b2, b4); \ + b4 = _mm_xor_si128(b4, b3); \ + b3 = _mm_and_si128(b3, b1); \ + b3 = _mm_xor_si128(b3, b0); \ + b4 = _mm_xor_si128(b4, b1); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = _mm_xor_si128(b2, b0); \ + b0 = _mm_and_si128(b0, b3); \ + b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ + b0 = _mm_xor_si128(b0, b4); \ + b4 = _mm_or_si128(b4, b3); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = b0; \ + b0 = b1; \ + b1 = b3; \ + b3 = b4; \ + } while(0); + +#define SBoxE7(b0, b1, b2, b3) \ + do \ + { \ + b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ + __m128i b4 = b3; \ + b3 = _mm_and_si128(b3, b0); \ + b0 = _mm_xor_si128(b0, b4); \ + b3 = _mm_xor_si128(b3, b2); \ + b2 = _mm_or_si128(b2, b4); \ + b1 = _mm_xor_si128(b1, b3); \ + b2 = _mm_xor_si128(b2, b0); \ + b0 = _mm_or_si128(b0, b1); \ + b2 = _mm_xor_si128(b2, b1); \ + b4 = _mm_xor_si128(b4, b0); \ + b0 = _mm_or_si128(b0, b3); \ + b0 = _mm_xor_si128(b0, b2); \ + b4 = _mm_xor_si128(b4, b3); \ + b4 = _mm_xor_si128(b4, b0); \ + b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ + b2 = _mm_and_si128(b2, b4); \ + b3 = _mm_xor_si128(b3, b2); \ + b2 = b4; \ + } while(0); + +#define SBoxE8(b0, b1, b2, b3) \ + do \ + { \ + __m128i b4 = b1; \ + b1 = _mm_or_si128(b1, b2); \ + b1 = _mm_xor_si128(b1, b3); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = _mm_xor_si128(b2, b1); \ + b3 = _mm_or_si128(b3, b4); \ + b3 = _mm_and_si128(b3, b0); \ + b4 = _mm_xor_si128(b4, b2); \ + b3 = _mm_xor_si128(b3, b1); \ + b1 = _mm_or_si128(b1, b4); \ + b1 = _mm_xor_si128(b1, b0); \ + b0 = _mm_or_si128(b0, b4); \ + b0 = _mm_xor_si128(b0, b2); \ + b1 = _mm_xor_si128(b1, b4); \ + b2 = _mm_xor_si128(b2, b1); \ + b1 = _mm_and_si128(b1, b0); \ + b1 = _mm_xor_si128(b1, b4); \ + b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ + b2 = _mm_or_si128(b2, b0); \ + b4 = _mm_xor_si128(b4, b2); \ + b2 = b1; \ + b1 = b3; \ + b3 = b0; \ + b0 = b4; \ + } while(0); + +#endif -- cgit v1.2.3 From 89eb757b344d3605f3f8012079749f01ef23bb6b Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 13:49:19 +0000 Subject: Use SSE2 unpack instructions instead of unions for input/output conversion. About 10% faster than previous. Currently 112 MiB/s in ECB mode, versus about 40 MiB/s in scalar mode, on my 2.4 GHz Core2 --- src/block/serpent_sse2/serp_sse2.cpp | 162 ++++++------- src/block/serpent_sse2/serp_sse2_sbox.h | 401 ++++++++++++++++---------------- 2 files changed, 263 insertions(+), 300 deletions(-) (limited to 'src') diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp index 3e78f0bac..ea937c95a 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -14,88 +14,76 @@ namespace Botan { namespace { +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + __m128i key = _mm_loadu_si128(keys + round); \ + B0 = _mm_xor_si128(B0, _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0))); \ + B1 = _mm_xor_si128(B1, _mm_shuffle_epi32(key, _MM_SHUFFLE(1,1,1,1))); \ + B2 = _mm_xor_si128(B2, _mm_shuffle_epi32(key, _MM_SHUFFLE(2,2,2,2))); \ + B3 = _mm_xor_si128(B3, _mm_shuffle_epi32(key, _MM_SHUFFLE(3,3,3,3))); \ + } while(0); + +/* +* Serpent's linear transformation +*/ #define rotate_left_m128(vec, rot) \ _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot)) -#define key_xor(round, b0, b1, b2, b3) \ - do { \ - __m128i key = _mm_loadu_si128(keys + round); \ - b0 = _mm_xor_si128(b0, _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0))); \ - b1 = _mm_xor_si128(b1, _mm_shuffle_epi32(key, _MM_SHUFFLE(1,1,1,1))); \ - b2 = _mm_xor_si128(b2, _mm_shuffle_epi32(key, _MM_SHUFFLE(2,2,2,2))); \ - b3 = _mm_xor_si128(b3, _mm_shuffle_epi32(key, _MM_SHUFFLE(3,3,3,3))); \ - } while(0); - -#define transform(b0, b1, b2, b3) \ - do \ - { \ - b0 = rotate_left_m128(b0, 13); \ - b2 = rotate_left_m128(b2, 3); \ - b1 = _mm_xor_si128(b1, _mm_xor_si128(b0, b2)); \ - b3 = _mm_xor_si128(b3, _mm_xor_si128(b2, _mm_slli_epi32(b0, 3))); \ - b1 = rotate_left_m128(b1, 1); \ - b3 = rotate_left_m128(b3, 7); \ - b0 = _mm_xor_si128(b0, _mm_xor_si128(b1, b3)); \ - b2 = _mm_xor_si128(b2, _mm_xor_si128(b3, _mm_slli_epi32(b1, 7))); \ - b0 = rotate_left_m128(b0, 5); \ - b2 = rotate_left_m128(b2, 22); \ - } while(0); - -void print_simd(const char* name, __m128i vec) - { - union { __m128i v; int32_t ints[4]; } u = { vec }; +#define transform(B0, B1, B2, B3) \ + do { \ + B0 = rotate_left_m128(B0, 13); \ + B2 = rotate_left_m128(B2, 3); \ + B1 = _mm_xor_si128(B1, _mm_xor_si128(B0, B2)); \ + B3 = _mm_xor_si128(B3, _mm_xor_si128(B2, _mm_slli_epi32(B0, 3))); \ + B1 = rotate_left_m128(B1, 1); \ + B3 = rotate_left_m128(B3, 7); \ + B0 = _mm_xor_si128(B0, _mm_xor_si128(B1, B3)); \ + B2 = _mm_xor_si128(B2, _mm_xor_si128(B3, _mm_slli_epi32(B1, 7))); \ + B0 = rotate_left_m128(B0, 5); \ + B2 = rotate_left_m128(B2, 22); \ + } while(0); - printf("%s: ", name); - for(u32bit i = 0; i != 4; ++i) - printf("%08X ", u.ints[i]); - printf("\n"); - } +/* +* 4x4 SSE2 integer matrix transpose +*/ +#define transpose(B0, B1, B2, B3) \ + do { \ + __m128i T0 = _mm_unpacklo_epi32(B0, B1); \ + __m128i T1 = _mm_unpacklo_epi32(B2, B3); \ + __m128i T2 = _mm_unpackhi_epi32(B0, B1); \ + __m128i T3 = _mm_unpackhi_epi32(B2, B3); \ + B0 = _mm_unpacklo_epi64(T0, T1); \ + B1 = _mm_unpackhi_epi64(T0, T1); \ + B2 = _mm_unpacklo_epi64(T2, T3); \ + B3 = _mm_unpackhi_epi64(T2, T3); \ + } while(0); +/* +* SSE2 Serpent Encryption of 4 blocks in parallel +*/ void serpent_encrypt_4(const byte in[64], byte out[64], const u32bit keys_32[132]) { const __m128i* keys = (const __m128i*)(keys_32); - - /* - FIXME: figure out a fast way to do this with 4 loads with - _mm_loadu_si128 plus shuffle/interleave ops - */ - union { __m128i v; u32bit u32[4]; } convert; - - convert.u32[0] = load_le(in, 0); - convert.u32[1] = load_le(in, 4); - convert.u32[2] = load_le(in, 8); - convert.u32[3] = load_le(in, 12); - __m128i B0 = convert.v; - - convert.u32[0] = load_le(in, 1); - convert.u32[1] = load_le(in, 5); - convert.u32[2] = load_le(in, 9); - convert.u32[3] = load_le(in, 13); - __m128i B1 = convert.v; - - convert.u32[0] = load_le(in, 2); - convert.u32[1] = load_le(in, 6); - convert.u32[2] = load_le(in, 10); - convert.u32[3] = load_le(in, 14); - __m128i B2 = convert.v; - - convert.u32[0] = load_le(in, 3); - convert.u32[1] = load_le(in, 7); - convert.u32[2] = load_le(in, 11); - convert.u32[3] = load_le(in, 15); - __m128i B3 = convert.v; - - key_xor(0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(3,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(4,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); - key_xor(7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); - + __m128i* out_mm = (__m128i*)(out); + __m128i* in_mm = (__m128i*)(in); + + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + transpose(B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -121,30 +109,12 @@ void serpent_encrypt_4(const byte in[64], key_xor(30,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); - // FIXME: figure out how to do this fast - union { __m128i v; u32bit u32[4]; } convert_B0; - union { __m128i v; u32bit u32[4]; } convert_B1; - union { __m128i v; u32bit u32[4]; } convert_B2; - union { __m128i v; u32bit u32[4]; } convert_B3; - convert_B0.v = B0; - convert_B1.v = B1; - convert_B2.v = B2; - convert_B3.v = B3; - store_le(out, - convert_B0.u32[0], convert_B1.u32[0], - convert_B2.u32[0], convert_B3.u32[0]); - - store_le(out + 16, - convert_B0.u32[1], convert_B1.u32[1], - convert_B2.u32[1], convert_B3.u32[1]); - - store_le(out + 32, - convert_B0.u32[2], convert_B1.u32[2], - convert_B2.u32[2], convert_B3.u32[2]); - - store_le(out + 48, - convert_B0.u32[3], convert_B1.u32[3], - convert_B2.u32[3], convert_B3.u32[3]); + transpose(B0, B1, B2, B3); + + _mm_storeu_si128(out_mm , B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); } } diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h index 2c4d9d9cb..bc8678a89 100644 --- a/src/block/serpent_sse2/serp_sse2_sbox.h +++ b/src/block/serpent_sse2/serp_sse2_sbox.h @@ -8,218 +8,211 @@ #ifndef SERPENT_SSE2_SBOXES_H__ #define SERPENT_SSE2_SBOXES_H__ -#define SBoxE1(b0, b1, b2, b3) \ +#define SBoxE1(B0, B1, B2, B3) \ do { \ - b3 = _mm_xor_si128(b3, b0); \ - __m128i b4 = b1; \ - b1 = _mm_and_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b2); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = _mm_or_si128(b0, b3); \ - b0 = _mm_xor_si128(b0, b4); \ - b4 = _mm_xor_si128(b4, b3); \ - b3 = _mm_xor_si128(b3, b2); \ - b2 = _mm_or_si128(b2, b1); \ - b2 = _mm_xor_si128(b2, b4); \ - b4 = _mm_andnot_si128(b4, _mm_set1_epi8(0xFF)); \ - b4 = _mm_or_si128(b4, b1); \ - b1 = _mm_xor_si128(b1, b3); \ - b1 = _mm_xor_si128(b1, b4); \ - b3 = _mm_or_si128(b3, b0); \ - b1 = _mm_xor_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b3); \ - b3 = b0; \ - b0 = b1; \ - b1 = b4; \ + B3 = _mm_xor_si128(B3, B0); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = _mm_or_si128(B2, B1); \ + B2 = _mm_xor_si128(B2, B4); \ + B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \ + B4 = _mm_or_si128(B4, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B3 = _mm_or_si128(B3, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = B0; \ + B0 = B1; \ + B1 = B4; \ } while(0); -#define SBoxE2(b0, b1, b2, b3) \ - do \ - { \ - b0 = _mm_andnot_si128(b0, _mm_set1_epi8(0xFF)); \ - b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ - __m128i b4 = b0; \ - b0 = _mm_and_si128(b0, b1); \ - b2 = _mm_xor_si128(b2, b0); \ - b0 = _mm_or_si128(b0, b3); \ - b3 = _mm_xor_si128(b3, b2); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = _mm_xor_si128(b0, b4); \ - b4 = _mm_or_si128(b4, b1); \ - b1 = _mm_xor_si128(b1, b3); \ - b2 = _mm_or_si128(b2, b0); \ - b2 = _mm_and_si128(b2, b4); \ - b0 = _mm_xor_si128(b0, b1); \ - b1 = _mm_and_si128(b1, b2); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = _mm_and_si128(b0, b2); \ - b4 = _mm_xor_si128(b4, b0); \ - b0 = b2; \ - b2 = b3; \ - b3 = b1; \ - b1 = b4; \ - } while(0); +#define SBoxE2(B0, B1, B2, B3) \ + do { \ + B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \ + B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ + __m128i B4 = B0; \ + B0 = _mm_and_si128(B0, B1); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B3 = _mm_xor_si128(B3, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_or_si128(B4, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B2 = _mm_or_si128(B2, B0); \ + B2 = _mm_and_si128(B2, B4); \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = _mm_and_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_and_si128(B0, B2); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = B2; \ + B2 = B3; \ + B3 = B1; \ + B1 = B4; \ + } while(0); -#define SBoxE3(b0, b1, b2, b3) \ - do \ - { \ - __m128i b4 = b0; \ - b0 = _mm_and_si128(b0, b2); \ - b0 = _mm_xor_si128(b0, b3); \ - b2 = _mm_xor_si128(b2, b1); \ - b2 = _mm_xor_si128(b2, b0); \ - b3 = _mm_or_si128(b3, b4); \ - b3 = _mm_xor_si128(b3, b1); \ - b4 = _mm_xor_si128(b4, b2); \ - b1 = b3; \ - b3 = _mm_or_si128(b3, b4); \ - b3 = _mm_xor_si128(b3, b0); \ - b0 = _mm_and_si128(b0, b1); \ - b4 = _mm_xor_si128(b4, b0); \ - b1 = _mm_xor_si128(b1, b3); \ - b1 = _mm_xor_si128(b1, b4); \ - b4 = _mm_andnot_si128(b4, _mm_set1_epi8(0xFF)); \ - b0 = b2; \ - b2 = b1; \ - b1 = b3; \ - b3 = b4; \ - } while(0); +#define SBoxE3(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B0; \ + B0 = _mm_and_si128(B0, B2); \ + B0 = _mm_xor_si128(B0, B3); \ + B2 = _mm_xor_si128(B2, B1); \ + B2 = _mm_xor_si128(B2, B0); \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_xor_si128(B3, B1); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = B3; \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_xor_si128(B3, B0); \ + B0 = _mm_and_si128(B0, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \ + B0 = B2; \ + B2 = B1; \ + B1 = B3; \ + B3 = B4; \ + } while(0); -#define SBoxE4(b0, b1, b2, b3) \ - do \ - { \ - __m128i b4 = b0; \ - b0 = _mm_or_si128(b0, b3); \ - b3 = _mm_xor_si128(b3, b1); \ - b1 = _mm_and_si128(b1, b4); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = _mm_xor_si128(b2, b3); \ - b3 = _mm_and_si128(b3, b0); \ - b4 = _mm_or_si128(b4, b1); \ - b3 = _mm_xor_si128(b3, b4); \ - b0 = _mm_xor_si128(b0, b1); \ - b4 = _mm_and_si128(b4, b0); \ - b1 = _mm_xor_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b2); \ - b1 = _mm_or_si128(b1, b0); \ - b1 = _mm_xor_si128(b1, b2); \ - b0 = _mm_xor_si128(b0, b3); \ - b2 = b1; \ - b1 = _mm_or_si128(b1, b3); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = b1; \ - b1 = b2; \ - b2 = b3; \ - b3 = b4; \ - } while(0); +#define SBoxE4(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B0; \ + B0 = _mm_or_si128(B0, B3); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_and_si128(B1, B4); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B3); \ + B3 = _mm_and_si128(B3, B0); \ + B4 = _mm_or_si128(B4, B1); \ + B3 = _mm_xor_si128(B3, B4); \ + B0 = _mm_xor_si128(B0, B1); \ + B4 = _mm_and_si128(B4, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = _mm_or_si128(B1, B0); \ + B1 = _mm_xor_si128(B1, B2); \ + B0 = _mm_xor_si128(B0, B3); \ + B2 = B1; \ + B1 = _mm_or_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = B1; \ + B1 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); -#define SBoxE5(b0, b1, b2, b3) \ - do \ - { \ - b1 = _mm_xor_si128(b1, b3); \ - b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ - b2 = _mm_xor_si128(b2, b3); \ - b3 = _mm_xor_si128(b3, b0); \ - __m128i b4 = b1; \ - b1 = _mm_and_si128(b1, b3); \ - b1 = _mm_xor_si128(b1, b2); \ - b4 = _mm_xor_si128(b4, b3); \ - b0 = _mm_xor_si128(b0, b4); \ - b2 = _mm_and_si128(b2, b4); \ - b2 = _mm_xor_si128(b2, b0); \ - b0 = _mm_and_si128(b0, b1); \ - b3 = _mm_xor_si128(b3, b0); \ - b4 = _mm_or_si128(b4, b1); \ - b4 = _mm_xor_si128(b4, b0); \ - b0 = _mm_or_si128(b0, b3); \ - b0 = _mm_xor_si128(b0, b2); \ - b2 = _mm_and_si128(b2, b3); \ - b0 = _mm_andnot_si128(b0, _mm_set1_epi8(0xFF)); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = b0; \ - b0 = b1; \ - b1 = b4; \ - } while(0); +#define SBoxE5(B0, B1, B2, B3) \ + do { \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ + B2 = _mm_xor_si128(B2, B3); \ + B3 = _mm_xor_si128(B3, B0); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B2); \ + B4 = _mm_xor_si128(B4, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B2 = _mm_and_si128(B2, B4); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B1); \ + B3 = _mm_xor_si128(B3, B0); \ + B4 = _mm_or_si128(B4, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B2); \ + B2 = _mm_and_si128(B2, B3); \ + B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B0; \ + B0 = B1; \ + B1 = B4; \ + } while(0); -#define SBoxE6(b0, b1, b2, b3) \ - do \ - { \ - b0 = _mm_xor_si128(b0, b1); \ - b1 = _mm_xor_si128(b1, b3); \ - b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ - __m128i b4 = b1; \ - b1 = _mm_and_si128(b1, b0); \ - b2 = _mm_xor_si128(b2, b3); \ - b1 = _mm_xor_si128(b1, b2); \ - b2 = _mm_or_si128(b2, b4); \ - b4 = _mm_xor_si128(b4, b3); \ - b3 = _mm_and_si128(b3, b1); \ - b3 = _mm_xor_si128(b3, b0); \ - b4 = _mm_xor_si128(b4, b1); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = _mm_xor_si128(b2, b0); \ - b0 = _mm_and_si128(b0, b3); \ - b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ - b0 = _mm_xor_si128(b0, b4); \ - b4 = _mm_or_si128(b4, b3); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = b0; \ - b0 = b1; \ - b1 = b3; \ - b3 = b4; \ - } while(0); +#define SBoxE6(B0, B1, B2, B3) \ + do { \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B0); \ + B2 = _mm_xor_si128(B2, B3); \ + B1 = _mm_xor_si128(B1, B2); \ + B2 = _mm_or_si128(B2, B4); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = _mm_and_si128(B3, B1); \ + B3 = _mm_xor_si128(B3, B0); \ + B4 = _mm_xor_si128(B4, B1); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B3); \ + B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_or_si128(B4, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B0; \ + B0 = B1; \ + B1 = B3; \ + B3 = B4; \ + } while(0); -#define SBoxE7(b0, b1, b2, b3) \ - do \ - { \ - b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ - __m128i b4 = b3; \ - b3 = _mm_and_si128(b3, b0); \ - b0 = _mm_xor_si128(b0, b4); \ - b3 = _mm_xor_si128(b3, b2); \ - b2 = _mm_or_si128(b2, b4); \ - b1 = _mm_xor_si128(b1, b3); \ - b2 = _mm_xor_si128(b2, b0); \ - b0 = _mm_or_si128(b0, b1); \ - b2 = _mm_xor_si128(b2, b1); \ - b4 = _mm_xor_si128(b4, b0); \ - b0 = _mm_or_si128(b0, b3); \ - b0 = _mm_xor_si128(b0, b2); \ - b4 = _mm_xor_si128(b4, b3); \ - b4 = _mm_xor_si128(b4, b0); \ - b3 = _mm_andnot_si128(b3, _mm_set1_epi8(0xFF)); \ - b2 = _mm_and_si128(b2, b4); \ - b3 = _mm_xor_si128(b3, b2); \ - b2 = b4; \ - } while(0); +#define SBoxE7(B0, B1, B2, B3) \ + do { \ + B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ + __m128i B4 = B3; \ + B3 = _mm_and_si128(B3, B0); \ + B0 = _mm_xor_si128(B0, B4); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = _mm_or_si128(B2, B4); \ + B1 = _mm_xor_si128(B1, B3); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_or_si128(B0, B1); \ + B2 = _mm_xor_si128(B2, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B2); \ + B4 = _mm_xor_si128(B4, B3); \ + B4 = _mm_xor_si128(B4, B0); \ + B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ + B2 = _mm_and_si128(B2, B4); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = B4; \ + } while(0); -#define SBoxE8(b0, b1, b2, b3) \ - do \ - { \ - __m128i b4 = b1; \ - b1 = _mm_or_si128(b1, b2); \ - b1 = _mm_xor_si128(b1, b3); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = _mm_xor_si128(b2, b1); \ - b3 = _mm_or_si128(b3, b4); \ - b3 = _mm_and_si128(b3, b0); \ - b4 = _mm_xor_si128(b4, b2); \ - b3 = _mm_xor_si128(b3, b1); \ - b1 = _mm_or_si128(b1, b4); \ - b1 = _mm_xor_si128(b1, b0); \ - b0 = _mm_or_si128(b0, b4); \ - b0 = _mm_xor_si128(b0, b2); \ - b1 = _mm_xor_si128(b1, b4); \ - b2 = _mm_xor_si128(b2, b1); \ - b1 = _mm_and_si128(b1, b0); \ - b1 = _mm_xor_si128(b1, b4); \ - b2 = _mm_andnot_si128(b2, _mm_set1_epi8(0xFF)); \ - b2 = _mm_or_si128(b2, b0); \ - b4 = _mm_xor_si128(b4, b2); \ - b2 = b1; \ - b1 = b3; \ - b3 = b0; \ - b0 = b4; \ - } while(0); +#define SBoxE8(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B1; \ + B1 = _mm_or_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B1); \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_and_si128(B3, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_or_si128(B1, B4); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_or_si128(B0, B4); \ + B0 = _mm_xor_si128(B0, B2); \ + B1 = _mm_xor_si128(B1, B4); \ + B2 = _mm_xor_si128(B2, B1); \ + B1 = _mm_and_si128(B1, B0); \ + B1 = _mm_xor_si128(B1, B4); \ + B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ + B2 = _mm_or_si128(B2, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B1; \ + B1 = B3; \ + B3 = B0; \ + B0 = B4; \ + } while(0); #endif -- cgit v1.2.3 From 8d2ceae6d43ab1e21604e112f437b1494def5ef8 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 14:42:34 +0000 Subject: Small code cleanups in SSE2 Serpent --- src/block/serpent_sse2/serp_sse2.cpp | 6 +- src/block/serpent_sse2/serp_sse2_sbox.h | 381 ++++++++++++++++---------------- 2 files changed, 195 insertions(+), 192 deletions(-) (limited to 'src') diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp index ea937c95a..5ce7d8f47 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -65,6 +65,8 @@ void serpent_encrypt_4(const byte in[64], byte out[64], const u32bit keys_32[132]) { + const __m128i all_ones = _mm_set1_epi8(0xFF); + const __m128i* keys = (const __m128i*)(keys_32); __m128i* out_mm = (__m128i*)(out); __m128i* in_mm = (__m128i*)(in); @@ -84,6 +86,7 @@ void serpent_encrypt_4(const byte in[64], key_xor( 5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -92,6 +95,7 @@ void serpent_encrypt_4(const byte in[64], key_xor(13,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -100,6 +104,7 @@ void serpent_encrypt_4(const byte in[64], key_xor(21,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); @@ -127,7 +132,6 @@ void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const while(blocks >= 4) { serpent_encrypt_4(in, out, this->round_key); - //Serpent::encrypt_n(in, out, 4); in += 4 * BLOCK_SIZE; out += 4 * BLOCK_SIZE; blocks -= 4; diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h index bc8678a89..1660643ad 100644 --- a/src/block/serpent_sse2/serp_sse2_sbox.h +++ b/src/block/serpent_sse2/serp_sse2_sbox.h @@ -8,211 +8,210 @@ #ifndef SERPENT_SSE2_SBOXES_H__ #define SERPENT_SSE2_SBOXES_H__ -#define SBoxE1(B0, B1, B2, B3) \ - do { \ - B3 = _mm_xor_si128(B3, B0); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = _mm_or_si128(B2, B1); \ - B2 = _mm_xor_si128(B2, B4); \ - B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \ - B4 = _mm_or_si128(B4, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B3 = _mm_or_si128(B3, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = B0; \ - B0 = B1; \ - B1 = B4; \ +#define SBoxE1(B0, B1, B2, B3) \ + do { \ + B3 = _mm_xor_si128(B3, B0); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = _mm_or_si128(B2, B1); \ + B2 = _mm_xor_si128(B2, B4); \ + B4 = _mm_xor_si128(B4, all_ones); \ + B4 = _mm_or_si128(B4, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B3 = _mm_or_si128(B3, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = B0; \ + B0 = B1; \ + B1 = B4; \ } while(0); -#define SBoxE2(B0, B1, B2, B3) \ - do { \ - B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \ - B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ - __m128i B4 = B0; \ - B0 = _mm_and_si128(B0, B1); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B3 = _mm_xor_si128(B3, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_or_si128(B4, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B2 = _mm_or_si128(B2, B0); \ - B2 = _mm_and_si128(B2, B4); \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = _mm_and_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_and_si128(B0, B2); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = B2; \ - B2 = B3; \ - B3 = B1; \ - B1 = B4; \ +#define SBoxE2(B0, B1, B2, B3) \ + do { \ + B0 = _mm_xor_si128(B0, all_ones); \ + B2 = _mm_xor_si128(B2, all_ones); \ + __m128i B4 = B0; \ + B0 = _mm_and_si128(B0, B1); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B3 = _mm_xor_si128(B3, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_or_si128(B4, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B2 = _mm_or_si128(B2, B0); \ + B2 = _mm_and_si128(B2, B4); \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = _mm_and_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_and_si128(B0, B2); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = B2; \ + B2 = B3; \ + B3 = B1; \ + B1 = B4; \ } while(0); -#define SBoxE3(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B0; \ - B0 = _mm_and_si128(B0, B2); \ - B0 = _mm_xor_si128(B0, B3); \ - B2 = _mm_xor_si128(B2, B1); \ - B2 = _mm_xor_si128(B2, B0); \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_xor_si128(B3, B1); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = B3; \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_xor_si128(B3, B0); \ - B0 = _mm_and_si128(B0, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B4); \ - B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \ - B0 = B2; \ - B2 = B1; \ - B1 = B3; \ - B3 = B4; \ +#define SBoxE3(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B0; \ + B0 = _mm_and_si128(B0, B2); \ + B0 = _mm_xor_si128(B0, B3); \ + B2 = _mm_xor_si128(B2, B1); \ + B2 = _mm_xor_si128(B2, B0); \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_xor_si128(B3, B1); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = B3; \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_xor_si128(B3, B0); \ + B0 = _mm_and_si128(B0, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B4 = _mm_xor_si128(B4, all_ones); \ + B0 = B2; \ + B2 = B1; \ + B1 = B3; \ + B3 = B4; \ } while(0); -#define SBoxE4(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B0; \ - B0 = _mm_or_si128(B0, B3); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_and_si128(B1, B4); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B3); \ - B3 = _mm_and_si128(B3, B0); \ - B4 = _mm_or_si128(B4, B1); \ - B3 = _mm_xor_si128(B3, B4); \ - B0 = _mm_xor_si128(B0, B1); \ - B4 = _mm_and_si128(B4, B0); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B1 = _mm_or_si128(B1, B0); \ - B1 = _mm_xor_si128(B1, B2); \ - B0 = _mm_xor_si128(B0, B3); \ - B2 = B1; \ - B1 = _mm_or_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = B1; \ - B1 = B2; \ - B2 = B3; \ - B3 = B4; \ +#define SBoxE4(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B0; \ + B0 = _mm_or_si128(B0, B3); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_and_si128(B1, B4); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B3); \ + B3 = _mm_and_si128(B3, B0); \ + B4 = _mm_or_si128(B4, B1); \ + B3 = _mm_xor_si128(B3, B4); \ + B0 = _mm_xor_si128(B0, B1); \ + B4 = _mm_and_si128(B4, B0); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B1 = _mm_or_si128(B1, B0); \ + B1 = _mm_xor_si128(B1, B2); \ + B0 = _mm_xor_si128(B0, B3); \ + B2 = B1; \ + B1 = _mm_or_si128(B1, B3); \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = B2; \ + B2 = B3; \ + B3 = B4; \ } while(0); -#define SBoxE5(B0, B1, B2, B3) \ - do { \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ - B2 = _mm_xor_si128(B2, B3); \ - B3 = _mm_xor_si128(B3, B0); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B3); \ - B1 = _mm_xor_si128(B1, B2); \ - B4 = _mm_xor_si128(B4, B3); \ - B0 = _mm_xor_si128(B0, B4); \ - B2 = _mm_and_si128(B2, B4); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B1); \ - B3 = _mm_xor_si128(B3, B0); \ - B4 = _mm_or_si128(B4, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B2); \ - B2 = _mm_and_si128(B2, B3); \ - B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B0; \ - B0 = B1; \ - B1 = B4; \ +#define SBoxE5(B0, B1, B2, B3) \ + do { \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_xor_si128(B3, all_ones); \ + B2 = _mm_xor_si128(B2, B3); \ + B3 = _mm_xor_si128(B3, B0); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B2); \ + B4 = _mm_xor_si128(B4, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B2 = _mm_and_si128(B2, B4); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B1); \ + B3 = _mm_xor_si128(B3, B0); \ + B4 = _mm_or_si128(B4, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B2); \ + B2 = _mm_and_si128(B2, B3); \ + B0 = _mm_xor_si128(B0, all_ones); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B0; \ + B0 = B1; \ + B1 = B4; \ } while(0); -#define SBoxE6(B0, B1, B2, B3) \ - do { \ - B0 = _mm_xor_si128(B0, B1); \ - B1 = _mm_xor_si128(B1, B3); \ - B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ - __m128i B4 = B1; \ - B1 = _mm_and_si128(B1, B0); \ - B2 = _mm_xor_si128(B2, B3); \ - B1 = _mm_xor_si128(B1, B2); \ - B2 = _mm_or_si128(B2, B4); \ - B4 = _mm_xor_si128(B4, B3); \ - B3 = _mm_and_si128(B3, B1); \ - B3 = _mm_xor_si128(B3, B0); \ - B4 = _mm_xor_si128(B4, B1); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_and_si128(B0, B3); \ - B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ - B0 = _mm_xor_si128(B0, B4); \ - B4 = _mm_or_si128(B4, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B0; \ - B0 = B1; \ - B1 = B3; \ - B3 = B4; \ +#define SBoxE6(B0, B1, B2, B3) \ + do { \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_xor_si128(B3, all_ones); \ + __m128i B4 = B1; \ + B1 = _mm_and_si128(B1, B0); \ + B2 = _mm_xor_si128(B2, B3); \ + B1 = _mm_xor_si128(B1, B2); \ + B2 = _mm_or_si128(B2, B4); \ + B4 = _mm_xor_si128(B4, B3); \ + B3 = _mm_and_si128(B3, B1); \ + B3 = _mm_xor_si128(B3, B0); \ + B4 = _mm_xor_si128(B4, B1); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B3); \ + B2 = _mm_xor_si128(B2, all_ones); \ + B0 = _mm_xor_si128(B0, B4); \ + B4 = _mm_or_si128(B4, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B0; \ + B0 = B1; \ + B1 = B3; \ + B3 = B4; \ } while(0); -#define SBoxE7(B0, B1, B2, B3) \ - do { \ - B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ - __m128i B4 = B3; \ - B3 = _mm_and_si128(B3, B0); \ - B0 = _mm_xor_si128(B0, B4); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = _mm_or_si128(B2, B4); \ - B1 = _mm_xor_si128(B1, B3); \ - B2 = _mm_xor_si128(B2, B0); \ - B0 = _mm_or_si128(B0, B1); \ - B2 = _mm_xor_si128(B2, B1); \ - B4 = _mm_xor_si128(B4, B0); \ - B0 = _mm_or_si128(B0, B3); \ - B0 = _mm_xor_si128(B0, B2); \ - B4 = _mm_xor_si128(B4, B3); \ - B4 = _mm_xor_si128(B4, B0); \ - B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \ - B2 = _mm_and_si128(B2, B4); \ - B3 = _mm_xor_si128(B3, B2); \ - B2 = B4; \ +#define SBoxE7(B0, B1, B2, B3) \ + do { \ + B2 = _mm_xor_si128(B2, all_ones); \ + __m128i B4 = B3; \ + B3 = _mm_and_si128(B3, B0); \ + B0 = _mm_xor_si128(B0, B4); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = _mm_or_si128(B2, B4); \ + B1 = _mm_xor_si128(B1, B3); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_or_si128(B0, B1); \ + B2 = _mm_xor_si128(B2, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_or_si128(B0, B3); \ + B0 = _mm_xor_si128(B0, B2); \ + B4 = _mm_xor_si128(B4, B3); \ + B4 = _mm_xor_si128(B4, B0); \ + B3 = _mm_xor_si128(B3, all_ones); \ + B2 = _mm_and_si128(B2, B4); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = B4; \ } while(0); -#define SBoxE8(B0, B1, B2, B3) \ - do { \ - __m128i B4 = B1; \ - B1 = _mm_or_si128(B1, B2); \ - B1 = _mm_xor_si128(B1, B3); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = _mm_xor_si128(B2, B1); \ - B3 = _mm_or_si128(B3, B4); \ - B3 = _mm_and_si128(B3, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B3 = _mm_xor_si128(B3, B1); \ - B1 = _mm_or_si128(B1, B4); \ - B1 = _mm_xor_si128(B1, B0); \ - B0 = _mm_or_si128(B0, B4); \ - B0 = _mm_xor_si128(B0, B2); \ - B1 = _mm_xor_si128(B1, B4); \ - B2 = _mm_xor_si128(B2, B1); \ - B1 = _mm_and_si128(B1, B0); \ - B1 = _mm_xor_si128(B1, B4); \ - B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \ - B2 = _mm_or_si128(B2, B0); \ - B4 = _mm_xor_si128(B4, B2); \ - B2 = B1; \ - B1 = B3; \ - B3 = B0; \ - B0 = B4; \ +#define SBoxE8(B0, B1, B2, B3) \ + do { \ + __m128i B4 = B1; \ + B1 = _mm_or_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_xor_si128(B2, B1); \ + B3 = _mm_or_si128(B3, B4); \ + B3 = _mm_and_si128(B3, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_or_si128(B1, B4); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_or_si128(B0, B4); \ + B0 = _mm_xor_si128(B0, B2); \ + B1 = _mm_xor_si128(B1, B4); \ + B2 = _mm_xor_si128(B2, B1); \ + B1 = _mm_and_si128(B1, B0); \ + B1 = _mm_xor_si128(B1, B4); \ + B2 = _mm_xor_si128(B2, all_ones); \ + B2 = _mm_or_si128(B2, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B1; \ + B1 = B3; \ + B3 = B0; \ + B0 = B4; \ } while(0); #endif -- cgit v1.2.3 From fdba61bb05f26f8ed979543b8ee9cfab96295800 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 15:20:34 +0000 Subject: Add SSE2 Serpent decryption --- src/block/serpent_sse2/serp_sse2.cpp | 91 +++++++++++++- src/block/serpent_sse2/serp_sse2_sbox.h | 217 ++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp index 5ce7d8f47..a3319afec 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -24,11 +24,14 @@ namespace { } while(0); /* -* Serpent's linear transformation +* Serpent's linear transformations */ #define rotate_left_m128(vec, rot) \ _mm_or_si128(_mm_slli_epi32(vec, rot), _mm_srli_epi32(vec, 32-rot)) +#define rotate_right_m128(vec, rot) \ + _mm_or_si128(_mm_srli_epi32(vec, rot), _mm_slli_epi32(vec, 32-rot)) + #define transform(B0, B1, B2, B3) \ do { \ B0 = rotate_left_m128(B0, 13); \ @@ -43,6 +46,20 @@ namespace { B2 = rotate_left_m128(B2, 22); \ } while(0); +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2 = rotate_right_m128(B2, 22); \ + B0 = rotate_right_m128(B0, 5); \ + B2 = _mm_xor_si128(B2, _mm_xor_si128(B3, _mm_slli_epi32(B1, 7))); \ + B0 = _mm_xor_si128(B0, _mm_xor_si128(B1, B3)); \ + B3 = rotate_right_m128(B3, 7); \ + B1 = rotate_right_m128(B1, 1); \ + B3 = _mm_xor_si128(B3, _mm_xor_si128(B2, _mm_slli_epi32(B0, 3))); \ + B1 = _mm_xor_si128(B1, _mm_xor_si128(B0, B2)); \ + B2 = rotate_right_m128(B2, 3); \ + B0 = rotate_right_m128(B0, 13); \ + } while(0); + /* * 4x4 SSE2 integer matrix transpose */ @@ -122,6 +139,70 @@ void serpent_encrypt_4(const byte in[64], _mm_storeu_si128(out_mm + 3, B3); } +/* +* SSE2 Serpent Decryption of 4 blocks in parallel +*/ +void serpent_decrypt_4(const byte in[64], + byte out[64], + const u32bit keys_32[132]) + { + const __m128i all_ones = _mm_set1_epi8(0xFF); + + const __m128i* keys = (const __m128i*)(keys_32); + __m128i* out_mm = (__m128i*)(out); + __m128i* in_mm = (__m128i*)(in); + + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + transpose(B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD8(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + transpose(B0, B1, B2, B3); + + _mm_storeu_si128(out_mm , B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + } + } /* @@ -150,6 +231,14 @@ void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const */ void Serpent_SSE2::decrypt_n(const byte in[], byte out[], u32bit blocks) const { + while(blocks >= 4) + { + serpent_decrypt_4(in, out, this->round_key); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + for(u32bit i = 0; i != blocks; ++i) { Serpent::decrypt_n(in, out, 1); diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h index 1660643ad..40c552e87 100644 --- a/src/block/serpent_sse2/serp_sse2_sbox.h +++ b/src/block/serpent_sse2/serp_sse2_sbox.h @@ -214,4 +214,221 @@ B0 = B4; \ } while(0); +#define SBoxD1(B0, B1, B2, B3) \ + do \ + { \ + B2 = _mm_xor_si128(B2, all_ones); \ + __m128i B4 = B1; \ + B1 = _mm_or_si128(B1, B0); \ + B4 = _mm_xor_si128(B4, all_ones); \ + B1 = _mm_xor_si128(B1, B2); \ + B2 = _mm_or_si128(B2, B4); \ + B1 = _mm_xor_si128(B1, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B3); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_or_si128(B0, B1); \ + B0 = _mm_xor_si128(B0, B2); \ + B3 = _mm_xor_si128(B3, B4); \ + B2 = _mm_xor_si128(B2, B1); \ + B3 = _mm_xor_si128(B3, B0); \ + B3 = _mm_xor_si128(B3, B1); \ + B2 = _mm_and_si128(B2, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxD2(B0, B1, B2, B3) \ + do \ + { \ + __m128i B4 = B1; \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_and_si128(B3, B1); \ + B4 = _mm_xor_si128(B4, B2); \ + B3 = _mm_xor_si128(B3, B0); \ + B0 = _mm_or_si128(B0, B1); \ + B2 = _mm_xor_si128(B2, B3); \ + B0 = _mm_xor_si128(B0, B4); \ + B0 = _mm_or_si128(B0, B2); \ + B1 = _mm_xor_si128(B1, B3); \ + B0 = _mm_xor_si128(B0, B1); \ + B1 = _mm_or_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B0); \ + B4 = _mm_xor_si128(B4, all_ones); \ + B4 = _mm_xor_si128(B4, B1); \ + B1 = _mm_or_si128(B1, B0); \ + B1 = _mm_xor_si128(B1, B0); \ + B1 = _mm_or_si128(B1, B4); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = B0; \ + B0 = B4; \ + B4 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD3(B0, B1, B2, B3) \ + do \ + { \ + B2 = _mm_xor_si128(B2, B3); \ + B3 = _mm_xor_si128(B3, B0); \ + __m128i B4 = B3; \ + B3 = _mm_and_si128(B3, B2); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_or_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B4); \ + B4 = _mm_and_si128(B4, B3); \ + B2 = _mm_xor_si128(B2, B3); \ + B4 = _mm_and_si128(B4, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = _mm_and_si128(B2, B1); \ + B2 = _mm_or_si128(B2, B0); \ + B3 = _mm_xor_si128(B3, all_ones); \ + B2 = _mm_xor_si128(B2, B3); \ + B0 = _mm_xor_si128(B0, B3); \ + B0 = _mm_and_si128(B0, B1); \ + B3 = _mm_xor_si128(B3, B4); \ + B3 = _mm_xor_si128(B3, B0); \ + B0 = B1; \ + B1 = B4; \ + } while(0); + +#define SBoxD4(B0, B1, B2, B3) \ + do \ + { \ + __m128i B4 = B2; \ + B2 = _mm_xor_si128(B2, B1); \ + B0 = _mm_xor_si128(B0, B2); \ + B4 = _mm_and_si128(B4, B2); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = _mm_and_si128(B0, B1); \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_or_si128(B3, B4); \ + B2 = _mm_xor_si128(B2, B3); \ + B0 = _mm_xor_si128(B0, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B3 = _mm_and_si128(B3, B2); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_xor_si128(B1, B0); \ + B1 = _mm_or_si128(B1, B2); \ + B0 = _mm_xor_si128(B0, B3); \ + B1 = _mm_xor_si128(B1, B4); \ + B0 = _mm_xor_si128(B0, B1); \ + B4 = B0; \ + B0 = B2; \ + B2 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD5(B0, B1, B2, B3) \ + do \ + { \ + __m128i B4 = B2; \ + B2 = _mm_and_si128(B2, B3); \ + B2 = _mm_xor_si128(B2, B1); \ + B1 = _mm_or_si128(B1, B3); \ + B1 = _mm_and_si128(B1, B0); \ + B4 = _mm_xor_si128(B4, B2); \ + B4 = _mm_xor_si128(B4, B1); \ + B1 = _mm_and_si128(B1, B2); \ + B0 = _mm_xor_si128(B0, all_ones); \ + B3 = _mm_xor_si128(B3, B4); \ + B1 = _mm_xor_si128(B1, B3); \ + B3 = _mm_and_si128(B3, B0); \ + B3 = _mm_xor_si128(B3, B2); \ + B0 = _mm_xor_si128(B0, B1); \ + B2 = _mm_and_si128(B2, B0); \ + B3 = _mm_xor_si128(B3, B0); \ + B2 = _mm_xor_si128(B2, B4); \ + B2 = _mm_or_si128(B2, B3); \ + B3 = _mm_xor_si128(B3, B0); \ + B2 = _mm_xor_si128(B2, B1); \ + B1 = B3; \ + B3 = B4; \ + } while(0); + +#define SBoxD6(B0, B1, B2, B3) \ + do \ + { \ + B1 = _mm_xor_si128(B1, all_ones); \ + __m128i B4 = B3; \ + B2 = _mm_xor_si128(B2, B1); \ + B3 = _mm_or_si128(B3, B0); \ + B3 = _mm_xor_si128(B3, B2); \ + B2 = _mm_or_si128(B2, B1); \ + B2 = _mm_and_si128(B2, B0); \ + B4 = _mm_xor_si128(B4, B3); \ + B2 = _mm_xor_si128(B2, B4); \ + B4 = _mm_or_si128(B4, B0); \ + B4 = _mm_xor_si128(B4, B1); \ + B1 = _mm_and_si128(B1, B2); \ + B1 = _mm_xor_si128(B1, B3); \ + B4 = _mm_xor_si128(B4, B2); \ + B3 = _mm_and_si128(B3, B4); \ + B4 = _mm_xor_si128(B4, B1); \ + B3 = _mm_xor_si128(B3, B4); \ + B4 = _mm_xor_si128(B4, all_ones); \ + B3 = _mm_xor_si128(B3, B0); \ + B0 = B1; \ + B1 = B4; \ + B4 = B3; \ + B3 = B2; \ + B2 = B4; \ + } while(0); + +#define SBoxD7(B0, B1, B2, B3) \ + do \ + { \ + B0 = _mm_xor_si128(B0, B2); \ + __m128i B4 = B2; \ + B2 = _mm_and_si128(B2, B0); \ + B4 = _mm_xor_si128(B4, B3); \ + B2 = _mm_xor_si128(B2, all_ones); \ + B3 = _mm_xor_si128(B3, B1); \ + B2 = _mm_xor_si128(B2, B3); \ + B4 = _mm_or_si128(B4, B0); \ + B0 = _mm_xor_si128(B0, B2); \ + B3 = _mm_xor_si128(B3, B4); \ + B4 = _mm_xor_si128(B4, B1); \ + B1 = _mm_and_si128(B1, B3); \ + B1 = _mm_xor_si128(B1, B0); \ + B0 = _mm_xor_si128(B0, B3); \ + B0 = _mm_or_si128(B0, B2); \ + B3 = _mm_xor_si128(B3, B1); \ + B4 = _mm_xor_si128(B4, B0); \ + B0 = B1; \ + B1 = B2; \ + B2 = B4; \ + } while(0); + +#define SBoxD8(B0, B1, B2, B3) \ + do \ + { \ + __m128i B4 = B2; \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_and_si128(B0, B3); \ + B4 = _mm_or_si128(B4, B3); \ + B2 = _mm_xor_si128(B2, all_ones); \ + B3 = _mm_xor_si128(B3, B1); \ + B1 = _mm_or_si128(B1, B0); \ + B0 = _mm_xor_si128(B0, B2); \ + B2 = _mm_and_si128(B2, B4); \ + B3 = _mm_and_si128(B3, B4); \ + B1 = _mm_xor_si128(B1, B2); \ + B2 = _mm_xor_si128(B2, B0); \ + B0 = _mm_or_si128(B0, B2); \ + B4 = _mm_xor_si128(B4, B1); \ + B0 = _mm_xor_si128(B0, B3); \ + B3 = _mm_xor_si128(B3, B4); \ + B4 = _mm_or_si128(B4, B0); \ + B3 = _mm_xor_si128(B3, B2); \ + B4 = _mm_xor_si128(B4, B2); \ + B2 = B1; \ + B1 = B0; \ + B0 = B3; \ + B3 = B4; \ + } while(0); + #endif -- cgit v1.2.3 From 1c9b4fda79951b1d0cd3c49ad8db8549cf9d9444 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 18:19:39 +0000 Subject: For handling the last few blocks in Serpent_SSE2, invoke encrypt_n with however many blocks remain, rather than looping calling encrypt_n with a block size of 1 each time. --- src/block/serpent_sse2/serp_sse2.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp index a3319afec..c51bb69ab 100644 --- a/src/block/serpent_sse2/serp_sse2.cpp +++ b/src/block/serpent_sse2/serp_sse2.cpp @@ -218,12 +218,7 @@ void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const blocks -= 4; } - for(u32bit i = 0; i != blocks; ++i) - { - Serpent::encrypt_n(in, out, 1); - in += BLOCK_SIZE; - out += BLOCK_SIZE; - } + Serpent::encrypt_n(in, out, blocks); } /* @@ -239,12 +234,7 @@ void Serpent_SSE2::decrypt_n(const byte in[], byte out[], u32bit blocks) const blocks -= 4; } - for(u32bit i = 0; i != blocks; ++i) - { - Serpent::decrypt_n(in, out, 1); - in += BLOCK_SIZE; - out += BLOCK_SIZE; - } + Serpent::decrypt_n(in, out, blocks); } } -- cgit v1.2.3 From 690e9fc77872f253004c37d4c6a0707f1a3f1856 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 18:38:15 +0000 Subject: In benchmark.cpp, use encrypt_n when benchmarking block ciphers --- src/benchmark/benchmark.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/benchmark/benchmark.cpp b/src/benchmark/benchmark.cpp index 01e3b94f3..3bbc1f883 100644 --- a/src/benchmark/benchmark.cpp +++ b/src/benchmark/benchmark.cpp @@ -57,8 +57,7 @@ bench_block_cipher(BlockCipher* block_cipher, while(nanoseconds_used < nanoseconds_max) { - for(u32bit i = 0; i != in_blocks; ++i) - block_cipher->encrypt(buf + block_cipher->BLOCK_SIZE * i); + block_cipher->encrypt_n(buf, buf, in_blocks); ++reps; nanoseconds_used = timer.clock() - start; -- cgit v1.2.3 From 921ef0147666aca06693f04155e590e16dde9726 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 12 Aug 2009 20:44:09 +0000 Subject: Use a much faster counter increment system, noticable speedups (~15%) for both Serpent and AES-128 in CTR mode. --- src/modes/ctr/ctr.cpp | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/modes/ctr/ctr.cpp b/src/modes/ctr/ctr.cpp index a3476c474..d458d7848 100644 --- a/src/modes/ctr/ctr.cpp +++ b/src/modes/ctr/ctr.cpp @@ -65,21 +65,24 @@ std::string CTR_BE::name() const */ void CTR_BE::set_iv(const InitializationVector& iv) { - if(iv.length() != cipher->BLOCK_SIZE) + const u32bit BLOCK_SIZE = cipher->BLOCK_SIZE; + + if(iv.length() != BLOCK_SIZE) throw Invalid_IV_Length(name(), iv.length()); enc_buffer.clear(); position = 0; - for(u32bit i = 0; i != PARALLEL_BLOCKS; ++i) + counter.copy(0, iv.begin(), iv.length()); + + for(u32bit i = 1; i != PARALLEL_BLOCKS; ++i) { - counter.copy(i*cipher->BLOCK_SIZE, iv.begin(), iv.length()); + counter.copy(i*BLOCK_SIZE, + counter.begin() + (i-1)*BLOCK_SIZE, BLOCK_SIZE); - // FIXME: this is stupid - for(u32bit j = 0; j != i; ++j) - for(s32bit k = cipher->BLOCK_SIZE - 1; k >= 0; --k) - if(++counter[i*cipher->BLOCK_SIZE+k]) - break; + for(s32bit j = BLOCK_SIZE - 1; j >= 0; --j) + if(++counter[i*BLOCK_SIZE+j]) + break; } cipher->encrypt_n(counter, enc_buffer, PARALLEL_BLOCKS); @@ -122,19 +125,17 @@ void CTR_BE::increment_counter() { for(u32bit i = 0; i != PARALLEL_BLOCKS; ++i) { - // FIXME: Can do it in a single loop - /* - for(u32bit j = 1; j != cipher->BLOCK_SIZE; ++j) - { - byte carry = 0; - byte z = counter[(i+1)*cipher->BLOCK_SIZE-1] + PARALLEL_BLOCKS; - - if( - */ - for(u32bit j = 0; j != PARALLEL_BLOCKS; ++j) - for(s32bit k = cipher->BLOCK_SIZE - 1; k >= 0; --k) - if(++counter[i*cipher->BLOCK_SIZE+k]) + byte* this_ctr = counter + i*cipher->BLOCK_SIZE; + + byte last_byte = this_ctr[cipher->BLOCK_SIZE-1]; + last_byte += PARALLEL_BLOCKS; + + if(this_ctr[cipher->BLOCK_SIZE-1] > last_byte) + for(s32bit j = cipher->BLOCK_SIZE - 2; j >= 0; --j) + if(++this_ctr[j]) break; + + this_ctr[cipher->BLOCK_SIZE-1] = last_byte; } cipher->encrypt_n(counter, enc_buffer, PARALLEL_BLOCKS); -- cgit v1.2.3 From 5a1329ff7d80349598e50a1ec63abb5334ddc2ac Mon Sep 17 00:00:00 2001 From: lloyd Date: Thu, 13 Aug 2009 18:32:38 +0000 Subject: Include basefilt.h in filters.h --- src/filters/filters.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/filters/filters.h b/src/filters/filters.h index 26de6e63c..964be0bd8 100644 --- a/src/filters/filters.h +++ b/src/filters/filters.h @@ -14,8 +14,10 @@ #include #include +#include #include #include + #include #if defined(BOTAN_HAS_BASE64_CODEC) -- cgit v1.2.3 From 6897292463197ed6880d91e76c4ee13f624905c3 Mon Sep 17 00:00:00 2001 From: lloyd Date: Thu, 27 Aug 2009 18:14:55 +0000 Subject: Instead of each SSE2 implementation specifying which compilers + CPUs it works on, have sse2_eng rely on a specific compiler/arch; each sse2 impl depends on the engine anyway, so they will only be loaded if OK. --- src/block/serpent_sse2/info.txt | 12 ------------ src/engine/sse2_eng/info.txt | 9 ++++++++- src/hash/sha1_sse2/info.txt | 12 ------------ 3 files changed, 8 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/block/serpent_sse2/info.txt b/src/block/serpent_sse2/info.txt index 09733e98f..b00ab6e88 100644 --- a/src/block/serpent_sse2/info.txt +++ b/src/block/serpent_sse2/info.txt @@ -10,18 +10,6 @@ serp_sse2.h serp_sse2_sbox.h - -pentium-m -pentium4 -prescott -amd64 - - - -gcc -icc - - serpent sse2_eng diff --git a/src/engine/sse2_eng/info.txt b/src/engine/sse2_eng/info.txt index 6242c7fee..7595b8eb5 100644 --- a/src/engine/sse2_eng/info.txt +++ b/src/engine/sse2_eng/info.txt @@ -10,6 +10,13 @@ eng_sse2.h -ia32 +pentium-m +pentium4 +prescott amd64 + + +gcc +icc + diff --git a/src/hash/sha1_sse2/info.txt b/src/hash/sha1_sse2/info.txt index b8d693b70..995c2513e 100644 --- a/src/hash/sha1_sse2/info.txt +++ b/src/hash/sha1_sse2/info.txt @@ -10,18 +10,6 @@ sha1_sse2.cpp sha1_sse2.h - -pentium-m -pentium4 -prescott -amd64 - - - -gcc -icc - - sha1 sse2_eng -- cgit v1.2.3 From 7c35179056ef9f223eb3ad4c20e16ed8f8540bbb Mon Sep 17 00:00:00 2001 From: lloyd Date: Thu, 27 Aug 2009 18:15:12 +0000 Subject: Remove unneeded include in xtea.cpp --- src/block/xtea/xtea.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/block/xtea/xtea.cpp b/src/block/xtea/xtea.cpp index 0dba5f2be..77543e1e8 100644 --- a/src/block/xtea/xtea.cpp +++ b/src/block/xtea/xtea.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace Botan { -- cgit v1.2.3 From de705485bccc89a695488ebe69b744433388bf21 Mon Sep 17 00:00:00 2001 From: lloyd Date: Mon, 31 Aug 2009 18:19:09 +0000 Subject: Hoist creation of buffer in Lion encrypt loop --- src/block/lion/lion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/block/lion/lion.cpp b/src/block/lion/lion.cpp index bfff64b67..83c1e3aa3 100644 --- a/src/block/lion/lion.cpp +++ b/src/block/lion/lion.cpp @@ -16,10 +16,10 @@ namespace Botan { */ void Lion::encrypt_n(const byte in[], byte out[], u32bit blocks) const { + SecureVector buffer(LEFT_SIZE); + for(u32bit i = 0; i != blocks; ++i) { - SecureVector buffer(LEFT_SIZE); - xor_buf(buffer, in, key1, LEFT_SIZE); cipher->set_key(buffer, LEFT_SIZE); cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); @@ -42,10 +42,10 @@ void Lion::encrypt_n(const byte in[], byte out[], u32bit blocks) const */ void Lion::decrypt_n(const byte in[], byte out[], u32bit blocks) const { + SecureVector buffer(LEFT_SIZE); + for(u32bit i = 0; i != blocks; ++i) { - SecureVector buffer(LEFT_SIZE); - xor_buf(buffer, in, key2, LEFT_SIZE); cipher->set_key(buffer, LEFT_SIZE); cipher->encrypt(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); -- cgit v1.2.3 From 74d860bb2b35ee5184f77a1e3e2bb1b5b92cacd6 Mon Sep 17 00:00:00 2001 From: lloyd Date: Mon, 31 Aug 2009 18:37:43 +0000 Subject: Combine the 4 sbox calculations in the key schedule so that all four are computed in parallel. Not a huge win but slightly faster (which affects things like Lion when using Turing), most likely due to more available ILP --- src/stream/turing/turing.cpp | 48 +++++++++++++++++++++++--------------------- src/stream/turing/turing.h | 2 -- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/stream/turing/turing.cpp b/src/stream/turing/turing.cpp index b988568c3..1e2203480 100644 --- a/src/stream/turing/turing.cpp +++ b/src/stream/turing/turing.cpp @@ -217,25 +217,6 @@ u32bit Turing::fixedS(u32bit W) return W; } -/* -* Generate the expanded Turing Sbox tables -*/ -void Turing::gen_sbox(MemoryRegion& S, u32bit which, - const MemoryRegion& K) - { - for(u32bit j = 0; j != 256; ++j) - { - u32bit W = 0, C = j; - - for(u32bit k = 0; k < K.size(); ++k) - { - C = SBOX[get_byte(which, K[k]) ^ C]; - W ^= rotate_left(Q_BOX[C], k + 8*which); - } - S[j] = (W & rotate_right(0x00FFFFFF, 8*which)) | (C << (24 - 8*which)); - } - } - /* * Turing Key Schedule */ @@ -250,10 +231,31 @@ void Turing::key_schedule(const byte key[], u32bit length) PHT(K); - gen_sbox(S0, 0, K); - gen_sbox(S1, 1, K); - gen_sbox(S2, 2, K); - gen_sbox(S3, 3, K); + for(u32bit i = 0; i != 256; ++i) + { + u32bit W0 = 0, C0 = i; + u32bit W1 = 0, C1 = i; + u32bit W2 = 0, C2 = i; + u32bit W3 = 0, C3 = i; + + for(u32bit j = 0; j < K.size(); ++j) + { + C0 = SBOX[get_byte(0, K[j]) ^ C0]; + C1 = SBOX[get_byte(1, K[j]) ^ C1]; + C2 = SBOX[get_byte(2, K[j]) ^ C2]; + C3 = SBOX[get_byte(3, K[j]) ^ C3]; + + W0 ^= rotate_left(Q_BOX[C0], j); + W1 ^= rotate_left(Q_BOX[C1], j + 8); + W2 ^= rotate_left(Q_BOX[C2], j + 16); + W3 ^= rotate_left(Q_BOX[C3], j + 24); + } + + S0[i] = (W0 & 0x00FFFFFF) | (C0 << 24); + S1[i] = (W1 & 0xFF00FFFF) | (C1 << 16); + S2[i] = (W2 & 0xFFFF00FF) | (C2 << 8); + S3[i] = (W3 & 0xFFFFFF00) | C3; + } resync(0, 0); } diff --git a/src/stream/turing/turing.h b/src/stream/turing/turing.h index d48c1d8a8..455d3c612 100644 --- a/src/stream/turing/turing.h +++ b/src/stream/turing/turing.h @@ -29,8 +29,6 @@ class BOTAN_DLL Turing : public StreamCipher void generate(); static u32bit fixedS(u32bit); - static void gen_sbox(MemoryRegion&, u32bit, - const MemoryRegion&); static const u32bit Q_BOX[256]; static const byte SBOX[256]; -- cgit v1.2.3 From 8381ceca38f0eee44e893921cbd74fdb3014f7ba Mon Sep 17 00:00:00 2001 From: lloyd Date: Mon, 31 Aug 2009 18:40:51 +0000 Subject: Add Rivest's package transform --- doc/log.txt | 2 + src/aont/info.txt | 17 +++++++ src/aont/package.cpp | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/aont/package.h | 45 ++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 src/aont/info.txt create mode 100644 src/aont/package.cpp create mode 100644 src/aont/package.h (limited to 'src') diff --git a/doc/log.txt b/doc/log.txt index 992829b39..a84e72d62 100644 --- a/doc/log.txt +++ b/doc/log.txt @@ -2,6 +2,8 @@ * 1.9.0-pre, 2009-??-?? - Add support for parallel invocation of block ciphers where possible - Add SSE2 implementation of Serpent + - Add Rivest's package transform (an all or nothing transform) + - Minor speedups to the Turing key schedule * 1.8.6, 2009-08-13 - Add Cryptobox, a set of simple password-based encryption routines diff --git a/src/aont/info.txt b/src/aont/info.txt new file mode 100644 index 000000000..a0387f358 --- /dev/null +++ b/src/aont/info.txt @@ -0,0 +1,17 @@ +realname "All or Nothing Transforms" + +define PACKAGE_TRANSFORM + +load_on auto + + +package.cpp +package.h + + + +block +ctr +rng +filters + diff --git a/src/aont/package.cpp b/src/aont/package.cpp new file mode 100644 index 000000000..6c6b56865 --- /dev/null +++ b/src/aont/package.cpp @@ -0,0 +1,128 @@ +/* +* Rivest's Package Tranform +* +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include +#include +#include + +namespace Botan { + +namespace AllOrNothingTransform { + +void package(RandomNumberGenerator& rng, + BlockCipher* cipher, + const byte input[], u32bit input_len, + byte output[]) + { + if(!cipher->valid_keylength(cipher->BLOCK_SIZE)) + throw Invalid_Argument("AONT::package: Invalid cipher"); + + // The all-zero string which is used both as the CTR IV and as K0 + const std::string all_zeros(cipher->BLOCK_SIZE*2, '0'); + + SymmetricKey package_key(rng, cipher->BLOCK_SIZE); + + // takes ownership of cipher object + Keyed_Filter* ctr_mode = new CTR_BE(cipher, + package_key, + InitializationVector(all_zeros)); + + Pipe pipe(ctr_mode); + + pipe.process_msg(input, input_len); + pipe.read(output, pipe.remaining()); + + // Set K0 (the all zero key) + cipher->set_key(SymmetricKey(all_zeros)); + + SecureVector buf(cipher->BLOCK_SIZE); + + const u32bit blocks = + (input_len + cipher->BLOCK_SIZE - 1) / cipher->BLOCK_SIZE; + + byte* final_block = output + input_len; + clear_mem(final_block, cipher->BLOCK_SIZE); + + // XOR the hash blocks into the final block + for(u32bit i = 0; i != blocks; ++i) + { + u32bit left = std::min(cipher->BLOCK_SIZE, + input_len - cipher->BLOCK_SIZE * i); + + buf.clear(); + copy_mem(&buf[0], output + cipher->BLOCK_SIZE * i, left); + + for(u32bit j = 0; j != 4; ++j) + buf[cipher->BLOCK_SIZE - 1 - j] ^= get_byte(3-j, i); + + cipher->encrypt(buf); + + xor_buf(final_block, buf, cipher->BLOCK_SIZE); + } + + // XOR the random package key into the final block + xor_buf(final_block, package_key.begin(), cipher->BLOCK_SIZE); + } + +void unpackage(BlockCipher* cipher, + const byte input[], u32bit input_len, + byte output[]) + { + if(!cipher->valid_keylength(cipher->BLOCK_SIZE)) + throw Invalid_Argument("AONT::unpackage: Invalid cipher"); + + if(input_len < cipher->BLOCK_SIZE) + throw Invalid_Argument("AONT::unpackage: Input too short"); + + // The all-zero string which is used both as the CTR IV and as K0 + const std::string all_zeros(cipher->BLOCK_SIZE*2, '0'); + + cipher->set_key(SymmetricKey(all_zeros)); + + SecureVector package_key(cipher->BLOCK_SIZE); + SecureVector buf(cipher->BLOCK_SIZE); + + // Copy the package key (masked with the block hashes) + copy_mem(&package_key[0], + input + (input_len - cipher->BLOCK_SIZE), + cipher->BLOCK_SIZE); + + const u32bit blocks = ((input_len - 1) / cipher->BLOCK_SIZE); + + // XOR the blocks into the package key bits + for(u32bit i = 0; i != blocks; ++i) + { + u32bit left = std::min(cipher->BLOCK_SIZE, + input_len - cipher->BLOCK_SIZE * (i+1)); + + buf.clear(); + copy_mem(&buf[0], input + cipher->BLOCK_SIZE * i, left); + + for(u32bit j = 0; j != 4; ++j) + buf[cipher->BLOCK_SIZE - 1 - j] ^= get_byte(3-j, i); + + cipher->encrypt(buf); + + xor_buf(&package_key[0], buf, cipher->BLOCK_SIZE); + } + + // takes ownership of cipher object + Pipe pipe(new CTR_BE(cipher, + SymmetricKey(package_key), + InitializationVector(all_zeros))); + + pipe.process_msg(input, input_len - cipher->BLOCK_SIZE); + + pipe.read(output, pipe.remaining()); + } + +} + +} diff --git a/src/aont/package.h b/src/aont/package.h new file mode 100644 index 000000000..35d2a23fc --- /dev/null +++ b/src/aont/package.h @@ -0,0 +1,45 @@ +/* +* Rivest's Package Tranform +* +* (C) 2009 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include + +namespace Botan { + +namespace AllOrNothingTransform { + +/** +* Rivest's Package Tranform +* @arg rng the random number generator to use +* @arg cipher the block cipher to use +* @arg input the input data buffer +* @arg input_len the length of the input data in bytes +* @arg output the output data buffer (must be at least +* input_len + cipher->BLOCK_SIZE bytes long) +*/ +void package(RandomNumberGenerator& rng, + BlockCipher* cipher, + const byte input[], u32bit input_len, + byte output[]); + +/** +* Rivest's Package Tranform (Inversion) +* @arg rng the random number generator to use +* @arg cipher the block cipher to use +* @arg input the input data buffer +* @arg input_len the length of the input data in bytes +* @arg output the output data buffer (must be at least +* input_len - cipher->BLOCK_SIZE bytes long) +*/ +void unpackage(BlockCipher* cipher, + const byte input[], u32bit input_len, + byte output[]); + +} + +} -- cgit v1.2.3 From f7595eeae651ff36709793989fd1688303915f5a Mon Sep 17 00:00:00 2001 From: lloyd Date: Fri, 4 Sep 2009 16:11:40 +0000 Subject: According to the Linux sources, S/390 and PowerPC can both do unaligned memory accesses. Since this can be a pretty big win, enable it for them. The m68k apparently also can, except in its (modern) Coldfire version, but it's always big endian so mark that as such. --- src/build-data/arch/m68k | 5 +++++ src/build-data/arch/ppc | 1 + src/build-data/arch/s390 | 3 +++ src/build-data/arch/s390x | 3 +++ 4 files changed, 12 insertions(+) (limited to 'src') diff --git a/src/build-data/arch/m68k b/src/build-data/arch/m68k index 27f246abc..759a3dac1 100644 --- a/src/build-data/arch/m68k +++ b/src/build-data/arch/m68k @@ -2,6 +2,11 @@ realname "Motorola 680x0" default_submodel 68020 +endian big + +# Except for Coldfire +#unaligned ok + 680x0 68k diff --git a/src/build-data/arch/ppc b/src/build-data/arch/ppc index 16112f389..e2dfa6ea2 100644 --- a/src/build-data/arch/ppc +++ b/src/build-data/arch/ppc @@ -1,6 +1,7 @@ realname "PowerPC" endian big +unaligned ok default_submodel ppc604 diff --git a/src/build-data/arch/s390 b/src/build-data/arch/s390 index 392f51397..312b262c4 100644 --- a/src/build-data/arch/s390 +++ b/src/build-data/arch/s390 @@ -2,6 +2,9 @@ realname "S/390 31-bit" default_submodel s390 +endian big +unaligned ok + s390 diff --git a/src/build-data/arch/s390x b/src/build-data/arch/s390x index 49fb0bda7..9fe6bd615 100644 --- a/src/build-data/arch/s390x +++ b/src/build-data/arch/s390x @@ -2,6 +2,9 @@ realname "S/390 64-bit" default_submodel s390x +endian big +unaligned ok + s390x -- cgit v1.2.3 From 0a7e8ead1a85f7003b729742d795730f2b782116 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 8 Sep 2009 15:22:31 +0000 Subject: Add *s before comment lines in file headers --- src/algo_factory/algo_factory.cpp | 4 ++-- src/libstate/scan_name.cpp | 4 ++-- src/libstate/scan_name.h | 4 ++-- src/mac/mac.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/algo_factory/algo_factory.cpp b/src/algo_factory/algo_factory.cpp index 269c58c3b..3523b19d4 100644 --- a/src/algo_factory/algo_factory.cpp +++ b/src/algo_factory/algo_factory.cpp @@ -1,6 +1,6 @@ /* -Algorithm Factory -(C) 2008 Jack Lloyd +* Algorithm Factory +* (C) 2008 Jack Lloyd * * Distributed under the terms of the Botan license */ diff --git a/src/libstate/scan_name.cpp b/src/libstate/scan_name.cpp index ef771871d..88992d66e 100644 --- a/src/libstate/scan_name.cpp +++ b/src/libstate/scan_name.cpp @@ -1,6 +1,6 @@ /** -SCAN Name Abstraction -(C) 2008 Jack Lloyd +* SCAN Name Abstraction +* (C) 2008 Jack Lloyd * * Distributed under the terms of the Botan license */ diff --git a/src/libstate/scan_name.h b/src/libstate/scan_name.h index 9e7af40d6..b3f2004e2 100644 --- a/src/libstate/scan_name.h +++ b/src/libstate/scan_name.h @@ -1,6 +1,6 @@ /** -SCAN Name Abstraction -(C) 2008 Jack Lloyd +* SCAN Name Abstraction +* (C) 2008 Jack Lloyd * * Distributed under the terms of the Botan license */ diff --git a/src/mac/mac.cpp b/src/mac/mac.cpp index 96df25503..04b259647 100644 --- a/src/mac/mac.cpp +++ b/src/mac/mac.cpp @@ -1,6 +1,6 @@ /** -Message Authentication Code base class -(C) 1999-2008 Jack Lloyd +* Message Authentication Code base class +* (C) 1999-2008 Jack Lloyd * * Distributed under the terms of the Botan license */ -- cgit v1.2.3 From 166e979eebe6e3f136df4deba626b584e1d735ae Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 8 Sep 2009 15:54:09 +0000 Subject: Throw Internal_Error instead of Algorithm_Not_Found if no usable RNG is enabled in the build. --- src/rng/auto_rng/auto_rng.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/rng/auto_rng/auto_rng.cpp b/src/rng/auto_rng/auto_rng.cpp index 171c83cca..07b2ddec2 100644 --- a/src/rng/auto_rng/auto_rng.cpp +++ b/src/rng/auto_rng/auto_rng.cpp @@ -140,7 +140,7 @@ AutoSeeded_RNG::AutoSeeded_RNG(u32bit poll_bits) #endif if(!rng) - throw Algorithm_Not_Found("No usable RNG found enabled in build"); + throw Internal_Error("No usable RNG found enabled in build"); /* If X9.31 is available, use it to wrap the other RNG as a failsafe */ #if defined(BOTAN_HAS_X931_RNG) -- cgit v1.2.3 From 1a0f377896cb88b9933b857579113283324d9464 Mon Sep 17 00:00:00 2001 From: lloyd Date: Wed, 9 Sep 2009 15:15:26 +0000 Subject: The first argument of Library_State::set_option was accidentally being passed as a 'const std::string' instead of a const reference. Can't fix in 1.8 since it is ABI breaking. --- src/libstate/libstate.cpp | 2 +- src/libstate/libstate.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/libstate/libstate.cpp b/src/libstate/libstate.cpp index 3275c6493..20e736868 100644 --- a/src/libstate/libstate.cpp +++ b/src/libstate/libstate.cpp @@ -212,7 +212,7 @@ std::string Library_State::deref_alias(const std::string& key) const /* * Set/Add an option */ -void Library_State::set_option(const std::string key, +void Library_State::set_option(const std::string& key, const std::string& value) { set("conf", key, value); diff --git a/src/libstate/libstate.h b/src/libstate/libstate.h index 2493863a9..a0421953e 100644 --- a/src/libstate/libstate.h +++ b/src/libstate/libstate.h @@ -77,7 +77,7 @@ class BOTAN_DLL Library_State * @param key the key of the option to set * @param value the value to set */ - void set_option(const std::string key, const std::string& value); + void set_option(const std::string& key, const std::string& value); /** * Add a parameter value to the "alias" section. -- cgit v1.2.3 From d61103aada1e69598792addb026aa9f10137e586 Mon Sep 17 00:00:00 2001 From: lloyd Date: Thu, 10 Sep 2009 00:16:18 +0000 Subject: Add parallel block toggles for CBC and CFB (decrypt direction only) --- src/build-data/buildh.in | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/build-data/buildh.in b/src/build-data/buildh.in index bd900d412..e0d8368ed 100644 --- a/src/build-data/buildh.in +++ b/src/build-data/buildh.in @@ -22,8 +22,14 @@ #define BOTAN_KARAT_SQR_THRESHOLD 32 #define BOTAN_PRIVATE_KEY_OP_BLINDING_BITS 64 -/* Toggles for parallel block cipher mode processing */ +/* +* Toggles for parallel block cipher mode processing +* +* CBC and CFB can only use parallel processing in decryption mode +*/ #define BOTAN_PARALLEL_BLOCKS_ECB 8 +#define BOTAN_PARALLEL_BLOCKS_CBC 8 +#define BOTAN_PARALLEL_BLOCKS_CFB 8 #define BOTAN_PARALLEL_BLOCKS_CTR 8 #define BOTAN_PARALLEL_BLOCKS_EAX 8 #define BOTAN_PARALLEL_BLOCKS_XTS 8 -- cgit v1.2.3 From a10e163e3a557cbc2d5291ab45ad068158cff525 Mon Sep 17 00:00:00 2001 From: lloyd Date: Sun, 13 Sep 2009 17:59:36 +0000 Subject: Make some changes to the SSE2 implementation of SHA-1 for compatability with Visual C++. --- src/hash/sha1_sse2/sha1_sse2_imp.cpp | 108 +++++++++++++++-------------------- 1 file changed, 46 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/hash/sha1_sse2/sha1_sse2_imp.cpp b/src/hash/sha1_sse2/sha1_sse2_imp.cpp index 90a8dccd5..57b287bdb 100644 --- a/src/hash/sha1_sse2/sha1_sse2_imp.cpp +++ b/src/hash/sha1_sse2/sha1_sse2_imp.cpp @@ -20,8 +20,10 @@ * on a Linux/Core2 system. */ + #include -#include +#include +#include namespace Botan { @@ -30,21 +32,13 @@ namespace { typedef union { u32bit u32[4]; __m128i u128; - } v4si __attribute__((aligned(16))); + } v4si; static const v4si K00_19 = { { 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 } }; static const v4si K20_39 = { { 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 } }; static const v4si K40_59 = { { 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc } }; static const v4si K60_79 = { { 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 } }; -#define UNALIGNED 1 -#if UNALIGNED -#define load(p) _mm_loadu_si128(p) -#else -#define load(p) (*p) -#endif - - /* the first 16 bytes only need byte swapping @@ -57,19 +51,15 @@ and is overwritten with the swapped bytes __m128i r1, r2; \ \ r1 = (W); \ - if (1) { \ r1 = _mm_shufflehi_epi16(r1, _MM_SHUFFLE(2, 3, 0, 1)); \ r1 = _mm_shufflelo_epi16(r1, _MM_SHUFFLE(2, 3, 0, 1)); \ r2 = _mm_slli_epi16(r1, 8); \ r1 = _mm_srli_epi16(r1, 8); \ r1 = _mm_or_si128(r1, r2); \ (W) = r1; \ - } \ (prep).u128 = _mm_add_epi32(K00_19.u128, r1); \ } while(0) - - /* for each multiple of 4, t, we want to calculate this: @@ -115,45 +105,38 @@ W0 = W[t]..W[t+3] * efficeon, pentium-m, and opteron but shifts are available in * only one unit. */ -#define prep(prep, XW0, XW1, XW2, XW3, K) do { \ - __m128i r0, r1, r2, r3; \ - \ - /* load W[t-4] 16-byte aligned, and shift */ \ - r3 = _mm_srli_si128((XW3), 4); \ - r0 = (XW0); \ - /* get high 64-bits of XW0 into low 64-bits */ \ - r1 = _mm_shuffle_epi32((XW0), _MM_SHUFFLE(1,0,3,2)); \ - /* load high 64-bits of r1 */ \ - r1 = _mm_unpacklo_epi64(r1, (XW1)); \ - r2 = (XW2); \ - \ - r0 = _mm_xor_si128(r1, r0); \ - r2 = _mm_xor_si128(r3, r2); \ - r0 = _mm_xor_si128(r2, r0); \ - /* unrotated W[t]..W[t+2] in r0 ... still need W[t+3] */ \ - \ - r2 = _mm_slli_si128(r0, 12); \ - r1 = _mm_cmplt_epi32(r0, _mm_setzero_si128()); \ - r0 = _mm_add_epi32(r0, r0); /* shift left by 1 */ \ - r0 = _mm_sub_epi32(r0, r1); /* r0 has W[t]..W[t+2] */ \ - \ - r3 = _mm_srli_epi32(r2, 30); \ - r2 = _mm_slli_epi32(r2, 2); \ - \ - r0 = _mm_xor_si128(r0, r3); \ - r0 = _mm_xor_si128(r0, r2); /* r0 now has W[t+3] */ \ - \ - (XW0) = r0; \ - (prep).u128 = _mm_add_epi32(r0, (K).u128); \ - } while(0) - - -static inline u32bit rol(u32bit src, u32bit amt) - { - /* gcc and icc appear to turn this into a rotate */ - return (src << amt) | (src >> (32 - amt)); - } - +#define prep(prep, XW0, XW1, XW2, XW3, K) \ + do { \ + __m128i r0, r1, r2, r3; \ + \ + /* load W[t-4] 16-byte aligned, and shift */ \ + r3 = _mm_srli_si128((XW3), 4); \ + r0 = (XW0); \ + /* get high 64-bits of XW0 into low 64-bits */ \ + r1 = _mm_shuffle_epi32((XW0), _MM_SHUFFLE(1,0,3,2)); \ + /* load high 64-bits of r1 */ \ + r1 = _mm_unpacklo_epi64(r1, (XW1)); \ + r2 = (XW2); \ + \ + r0 = _mm_xor_si128(r1, r0); \ + r2 = _mm_xor_si128(r3, r2); \ + r0 = _mm_xor_si128(r2, r0); \ + /* unrotated W[t]..W[t+2] in r0 ... still need W[t+3] */ \ + \ + r2 = _mm_slli_si128(r0, 12); \ + r1 = _mm_cmplt_epi32(r0, _mm_setzero_si128()); \ + r0 = _mm_add_epi32(r0, r0); /* shift left by 1 */ \ + r0 = _mm_sub_epi32(r0, r1); /* r0 has W[t]..W[t+2] */ \ + \ + r3 = _mm_srli_epi32(r2, 30); \ + r2 = _mm_slli_epi32(r2, 2); \ + \ + r0 = _mm_xor_si128(r0, r3); \ + r0 = _mm_xor_si128(r0, r2); /* r0 now has W[t+3] */ \ + \ + (XW0) = r0; \ + (prep).u128 = _mm_add_epi32(r0, (K).u128); \ + } while(0) static inline u32bit f00_19(u32bit x, u32bit y, u32bit z) { @@ -184,11 +167,12 @@ static inline u32bit f60_79(u32bit x, u32bit y, u32bit z) return f20_39(x, y, z); } -#define step(nn_mm, xa, xb, xc, xd, xe, xt, input) do { \ - (xt) = (input) + f##nn_mm((xb), (xc), (xd)); \ - (xb) = rol((xb), 30); \ - (xt) += ((xe) + rol((xa), 5)); \ - } while(0) +#define step(nn_mm, xa, xb, xc, xd, xe, xt, input) \ + do { \ + (xt) = (input) + f##nn_mm((xb), (xc), (xd)); \ + (xb) = rotate_left((xb), 30); \ + (xt) += ((xe) + rotate_left((xa), 5)); \ + } while(0) } @@ -210,14 +194,14 @@ extern "C" void botan_sha1_sse2_compress(u32bit H[5], * steps ahead of the integer code. 12 steps ahead seems * to produce the best performance. -dean */ - W0 = load(&input[0]); + W0 = _mm_loadu_si128(&input[0]); prep00_15(prep0, W0); /* prepare for 00 through 03 */ - W1 = load(&input[1]); + W1 = _mm_loadu_si128(&input[1]); prep00_15(prep1, W1); /* prepare for 04 through 07 */ - W2 = load(&input[2]); + W2 = _mm_loadu_si128(&input[2]); prep00_15(prep2, W2); /* prepare for 08 through 11 */ - W3 = load(&input[3]); + W3 = _mm_loadu_si128(&input[3]); step(00_19, a, b, c, d, e, t, prep0.u32[0]); /* 00 */ step(00_19, t, a, b, c, d, e, prep0.u32[1]); /* 01 */ step(00_19, e, t, a, b, c, d, prep0.u32[2]); /* 02 */ -- cgit v1.2.3 From 02fc388212efa21a3f489a28e7b3738956a1358c Mon Sep 17 00:00:00 2001 From: lloyd Date: Sun, 13 Sep 2009 18:00:05 +0000 Subject: Enable SSE2 engine under VC++ --- src/engine/sse2_eng/info.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/engine/sse2_eng/info.txt b/src/engine/sse2_eng/info.txt index 7595b8eb5..7508b9874 100644 --- a/src/engine/sse2_eng/info.txt +++ b/src/engine/sse2_eng/info.txt @@ -19,4 +19,5 @@ amd64 gcc icc +msvc -- cgit v1.2.3