diff options
author | lloyd <[email protected]> | 2009-08-11 02:31:17 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-08-11 02:31:17 +0000 |
commit | f51841ba5237952dda3e76df643d3ae13bed3df5 (patch) | |
tree | 7fd004a107bae55a5f87c4e8bc35b0012334b29b /src/block/square | |
parent | 34eb8de4ed014ab8913bdb34b096d60880b1c14a (diff) |
Change the BlockCipher interface to support multi-block encryption and
decryption. Currently only used for counter mode. Doesn't offer much
advantage as-is (though might help slightly, in terms of cache effects),
but allows for SIMD implementations to process multiple blocks in parallel
when possible. Particularly thinking here of Serpent; TEA/XTEA also seem
promising in this sense, as is Threefish once that is implemented as a
standalone block cipher.
Diffstat (limited to 'src/block/square')
-rw-r--r-- | src/block/square/square.cpp | 192 | ||||
-rw-r--r-- | src/block/square/square.h | 4 |
2 files changed, 108 insertions, 88 deletions
diff --git a/src/block/square/square.cpp b/src/block/square/square.cpp index cb226542d..fdd47d3b2 100644 --- a/src/block/square/square.cpp +++ b/src/block/square/square.cpp @@ -14,103 +14,123 @@ namespace Botan { /* * Square Encryption */ -void Square::enc(const byte in[], byte out[]) const +void Square::encrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit T0, T1, T2, T3, B0, B1, B2, B3; - B0 = TE0[in[ 0] ^ ME[ 0]] ^ TE1[in[ 4] ^ ME[ 4]] ^ - TE2[in[ 8] ^ ME[ 8]] ^ TE3[in[12] ^ ME[12]] ^ EK[0]; - B1 = TE0[in[ 1] ^ ME[ 1]] ^ TE1[in[ 5] ^ ME[ 5]] ^ - TE2[in[ 9] ^ ME[ 9]] ^ TE3[in[13] ^ ME[13]] ^ EK[1]; - B2 = TE0[in[ 2] ^ ME[ 2]] ^ TE1[in[ 6] ^ ME[ 6]] ^ - TE2[in[10] ^ ME[10]] ^ TE3[in[14] ^ ME[14]] ^ EK[2]; - B3 = TE0[in[ 3] ^ ME[ 3]] ^ TE1[in[ 7] ^ ME[ 7]] ^ - TE2[in[11] ^ ME[11]] ^ TE3[in[15] ^ ME[15]] ^ EK[3]; - for(u32bit j = 1; j != 7; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(0, B1)] ^ - TE2[get_byte(0, B2)] ^ TE3[get_byte(0, B3)] ^ EK[4*j+0]; - T1 = TE0[get_byte(1, B0)] ^ TE1[get_byte(1, B1)] ^ - TE2[get_byte(1, B2)] ^ TE3[get_byte(1, B3)] ^ EK[4*j+1]; - T2 = TE0[get_byte(2, B0)] ^ TE1[get_byte(2, B1)] ^ - TE2[get_byte(2, B2)] ^ TE3[get_byte(2, B3)] ^ EK[4*j+2]; - T3 = TE0[get_byte(3, B0)] ^ TE1[get_byte(3, B1)] ^ - TE2[get_byte(3, B2)] ^ TE3[get_byte(3, B3)] ^ EK[4*j+3]; - B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(0, T1)] ^ - TE2[get_byte(0, T2)] ^ TE3[get_byte(0, T3)] ^ EK[4*j+4]; - B1 = TE0[get_byte(1, T0)] ^ TE1[get_byte(1, T1)] ^ - TE2[get_byte(1, T2)] ^ TE3[get_byte(1, T3)] ^ EK[4*j+5]; - B2 = TE0[get_byte(2, T0)] ^ TE1[get_byte(2, T1)] ^ - TE2[get_byte(2, T2)] ^ TE3[get_byte(2, T3)] ^ EK[4*j+6]; - B3 = TE0[get_byte(3, T0)] ^ TE1[get_byte(3, T1)] ^ - TE2[get_byte(3, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4*j+7]; + u32bit T0, T1, T2, T3, B0, B1, B2, B3; + + B0 = TE0[in[ 0] ^ ME[ 0]] ^ TE1[in[ 4] ^ ME[ 4]] ^ + TE2[in[ 8] ^ ME[ 8]] ^ TE3[in[12] ^ ME[12]] ^ EK[0]; + B1 = TE0[in[ 1] ^ ME[ 1]] ^ TE1[in[ 5] ^ ME[ 5]] ^ + TE2[in[ 9] ^ ME[ 9]] ^ TE3[in[13] ^ ME[13]] ^ EK[1]; + B2 = TE0[in[ 2] ^ ME[ 2]] ^ TE1[in[ 6] ^ ME[ 6]] ^ + TE2[in[10] ^ ME[10]] ^ TE3[in[14] ^ ME[14]] ^ EK[2]; + B3 = TE0[in[ 3] ^ ME[ 3]] ^ TE1[in[ 7] ^ ME[ 7]] ^ + TE2[in[11] ^ ME[11]] ^ TE3[in[15] ^ ME[15]] ^ EK[3]; + + for(u32bit j = 1; j != 7; j += 2) + { + T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(0, B1)] ^ + TE2[get_byte(0, B2)] ^ TE3[get_byte(0, B3)] ^ EK[4*j+0]; + T1 = TE0[get_byte(1, B0)] ^ TE1[get_byte(1, B1)] ^ + TE2[get_byte(1, B2)] ^ TE3[get_byte(1, B3)] ^ EK[4*j+1]; + T2 = TE0[get_byte(2, B0)] ^ TE1[get_byte(2, B1)] ^ + TE2[get_byte(2, B2)] ^ TE3[get_byte(2, B3)] ^ EK[4*j+2]; + T3 = TE0[get_byte(3, B0)] ^ TE1[get_byte(3, B1)] ^ + TE2[get_byte(3, B2)] ^ TE3[get_byte(3, B3)] ^ EK[4*j+3]; + + B0 = TE0[get_byte(0, T0)] ^ TE1[get_byte(0, T1)] ^ + TE2[get_byte(0, T2)] ^ TE3[get_byte(0, T3)] ^ EK[4*j+4]; + B1 = TE0[get_byte(1, T0)] ^ TE1[get_byte(1, T1)] ^ + TE2[get_byte(1, T2)] ^ TE3[get_byte(1, T3)] ^ EK[4*j+5]; + B2 = TE0[get_byte(2, T0)] ^ TE1[get_byte(2, T1)] ^ + TE2[get_byte(2, T2)] ^ TE3[get_byte(2, T3)] ^ EK[4*j+6]; + B3 = TE0[get_byte(3, T0)] ^ TE1[get_byte(3, T1)] ^ + TE2[get_byte(3, T2)] ^ TE3[get_byte(3, T3)] ^ EK[4*j+7]; + } + + out[ 0] = SE[get_byte(0, B0)] ^ ME[16]; + out[ 1] = SE[get_byte(0, B1)] ^ ME[17]; + out[ 2] = SE[get_byte(0, B2)] ^ ME[18]; + out[ 3] = SE[get_byte(0, B3)] ^ ME[19]; + out[ 4] = SE[get_byte(1, B0)] ^ ME[20]; + out[ 5] = SE[get_byte(1, B1)] ^ ME[21]; + out[ 6] = SE[get_byte(1, B2)] ^ ME[22]; + out[ 7] = SE[get_byte(1, B3)] ^ ME[23]; + out[ 8] = SE[get_byte(2, B0)] ^ ME[24]; + out[ 9] = SE[get_byte(2, B1)] ^ ME[25]; + out[10] = SE[get_byte(2, B2)] ^ ME[26]; + out[11] = SE[get_byte(2, B3)] ^ ME[27]; + out[12] = SE[get_byte(3, B0)] ^ ME[28]; + out[13] = SE[get_byte(3, B1)] ^ ME[29]; + out[14] = SE[get_byte(3, B2)] ^ ME[30]; + out[15] = SE[get_byte(3, B3)] ^ ME[31]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[ 0] = SE[get_byte(0, B0)] ^ ME[16]; - out[ 1] = SE[get_byte(0, B1)] ^ ME[17]; - out[ 2] = SE[get_byte(0, B2)] ^ ME[18]; - out[ 3] = SE[get_byte(0, B3)] ^ ME[19]; - out[ 4] = SE[get_byte(1, B0)] ^ ME[20]; - out[ 5] = SE[get_byte(1, B1)] ^ ME[21]; - out[ 6] = SE[get_byte(1, B2)] ^ ME[22]; - out[ 7] = SE[get_byte(1, B3)] ^ ME[23]; - out[ 8] = SE[get_byte(2, B0)] ^ ME[24]; - out[ 9] = SE[get_byte(2, B1)] ^ ME[25]; - out[10] = SE[get_byte(2, B2)] ^ ME[26]; - out[11] = SE[get_byte(2, B3)] ^ ME[27]; - out[12] = SE[get_byte(3, B0)] ^ ME[28]; - out[13] = SE[get_byte(3, B1)] ^ ME[29]; - out[14] = SE[get_byte(3, B2)] ^ ME[30]; - out[15] = SE[get_byte(3, B3)] ^ ME[31]; } /* * Square Decryption */ -void Square::dec(const byte in[], byte out[]) const +void Square::decrypt_n(const byte in[], byte out[], u32bit blocks) const { - u32bit T0, T1, T2, T3, B0, B1, B2, B3; - B0 = TD0[in[ 0] ^ MD[ 0]] ^ TD1[in[ 4] ^ MD[ 4]] ^ - TD2[in[ 8] ^ MD[ 8]] ^ TD3[in[12] ^ MD[12]] ^ DK[0]; - B1 = TD0[in[ 1] ^ MD[ 1]] ^ TD1[in[ 5] ^ MD[ 5]] ^ - TD2[in[ 9] ^ MD[ 9]] ^ TD3[in[13] ^ MD[13]] ^ DK[1]; - B2 = TD0[in[ 2] ^ MD[ 2]] ^ TD1[in[ 6] ^ MD[ 6]] ^ - TD2[in[10] ^ MD[10]] ^ TD3[in[14] ^ MD[14]] ^ DK[2]; - B3 = TD0[in[ 3] ^ MD[ 3]] ^ TD1[in[ 7] ^ MD[ 7]] ^ - TD2[in[11] ^ MD[11]] ^ TD3[in[15] ^ MD[15]] ^ DK[3]; - for(u32bit j = 1; j != 7; j += 2) + for(u32bit i = 0; i != blocks; ++i) { - T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(0, B1)] ^ - TD2[get_byte(0, B2)] ^ TD3[get_byte(0, B3)] ^ DK[4*j+0]; - T1 = TD0[get_byte(1, B0)] ^ TD1[get_byte(1, B1)] ^ - TD2[get_byte(1, B2)] ^ TD3[get_byte(1, B3)] ^ DK[4*j+1]; - T2 = TD0[get_byte(2, B0)] ^ TD1[get_byte(2, B1)] ^ - TD2[get_byte(2, B2)] ^ TD3[get_byte(2, B3)] ^ DK[4*j+2]; - T3 = TD0[get_byte(3, B0)] ^ TD1[get_byte(3, B1)] ^ - TD2[get_byte(3, B2)] ^ TD3[get_byte(3, B3)] ^ DK[4*j+3]; - B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(0, T1)] ^ - TD2[get_byte(0, T2)] ^ TD3[get_byte(0, T3)] ^ DK[4*j+4]; - B1 = TD0[get_byte(1, T0)] ^ TD1[get_byte(1, T1)] ^ - TD2[get_byte(1, T2)] ^ TD3[get_byte(1, T3)] ^ DK[4*j+5]; - B2 = TD0[get_byte(2, T0)] ^ TD1[get_byte(2, T1)] ^ - TD2[get_byte(2, T2)] ^ TD3[get_byte(2, T3)] ^ DK[4*j+6]; - B3 = TD0[get_byte(3, T0)] ^ TD1[get_byte(3, T1)] ^ - TD2[get_byte(3, T2)] ^ TD3[get_byte(3, T3)] ^ DK[4*j+7]; + u32bit T0, T1, T2, T3, B0, B1, B2, B3; + + B0 = TD0[in[ 0] ^ MD[ 0]] ^ TD1[in[ 4] ^ MD[ 4]] ^ + TD2[in[ 8] ^ MD[ 8]] ^ TD3[in[12] ^ MD[12]] ^ DK[0]; + B1 = TD0[in[ 1] ^ MD[ 1]] ^ TD1[in[ 5] ^ MD[ 5]] ^ + TD2[in[ 9] ^ MD[ 9]] ^ TD3[in[13] ^ MD[13]] ^ DK[1]; + B2 = TD0[in[ 2] ^ MD[ 2]] ^ TD1[in[ 6] ^ MD[ 6]] ^ + TD2[in[10] ^ MD[10]] ^ TD3[in[14] ^ MD[14]] ^ DK[2]; + B3 = TD0[in[ 3] ^ MD[ 3]] ^ TD1[in[ 7] ^ MD[ 7]] ^ + TD2[in[11] ^ MD[11]] ^ TD3[in[15] ^ MD[15]] ^ DK[3]; + + for(u32bit j = 1; j != 7; j += 2) + { + T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(0, B1)] ^ + TD2[get_byte(0, B2)] ^ TD3[get_byte(0, B3)] ^ DK[4*j+0]; + T1 = TD0[get_byte(1, B0)] ^ TD1[get_byte(1, B1)] ^ + TD2[get_byte(1, B2)] ^ TD3[get_byte(1, B3)] ^ DK[4*j+1]; + T2 = TD0[get_byte(2, B0)] ^ TD1[get_byte(2, B1)] ^ + TD2[get_byte(2, B2)] ^ TD3[get_byte(2, B3)] ^ DK[4*j+2]; + T3 = TD0[get_byte(3, B0)] ^ TD1[get_byte(3, B1)] ^ + TD2[get_byte(3, B2)] ^ TD3[get_byte(3, B3)] ^ DK[4*j+3]; + + B0 = TD0[get_byte(0, T0)] ^ TD1[get_byte(0, T1)] ^ + TD2[get_byte(0, T2)] ^ TD3[get_byte(0, T3)] ^ DK[4*j+4]; + B1 = TD0[get_byte(1, T0)] ^ TD1[get_byte(1, T1)] ^ + TD2[get_byte(1, T2)] ^ TD3[get_byte(1, T3)] ^ DK[4*j+5]; + B2 = TD0[get_byte(2, T0)] ^ TD1[get_byte(2, T1)] ^ + TD2[get_byte(2, T2)] ^ TD3[get_byte(2, T3)] ^ DK[4*j+6]; + B3 = TD0[get_byte(3, T0)] ^ TD1[get_byte(3, T1)] ^ + TD2[get_byte(3, T2)] ^ TD3[get_byte(3, T3)] ^ DK[4*j+7]; + } + + out[ 0] = SD[get_byte(0, B0)] ^ MD[16]; + out[ 1] = SD[get_byte(0, B1)] ^ MD[17]; + out[ 2] = SD[get_byte(0, B2)] ^ MD[18]; + out[ 3] = SD[get_byte(0, B3)] ^ MD[19]; + out[ 4] = SD[get_byte(1, B0)] ^ MD[20]; + out[ 5] = SD[get_byte(1, B1)] ^ MD[21]; + out[ 6] = SD[get_byte(1, B2)] ^ MD[22]; + out[ 7] = SD[get_byte(1, B3)] ^ MD[23]; + out[ 8] = SD[get_byte(2, B0)] ^ MD[24]; + out[ 9] = SD[get_byte(2, B1)] ^ MD[25]; + out[10] = SD[get_byte(2, B2)] ^ MD[26]; + out[11] = SD[get_byte(2, B3)] ^ MD[27]; + out[12] = SD[get_byte(3, B0)] ^ MD[28]; + out[13] = SD[get_byte(3, B1)] ^ MD[29]; + out[14] = SD[get_byte(3, B2)] ^ MD[30]; + out[15] = SD[get_byte(3, B3)] ^ MD[31]; + + in += BLOCK_SIZE; + out += BLOCK_SIZE; } - out[ 0] = SD[get_byte(0, B0)] ^ MD[16]; - out[ 1] = SD[get_byte(0, B1)] ^ MD[17]; - out[ 2] = SD[get_byte(0, B2)] ^ MD[18]; - out[ 3] = SD[get_byte(0, B3)] ^ MD[19]; - out[ 4] = SD[get_byte(1, B0)] ^ MD[20]; - out[ 5] = SD[get_byte(1, B1)] ^ MD[21]; - out[ 6] = SD[get_byte(1, B2)] ^ MD[22]; - out[ 7] = SD[get_byte(1, B3)] ^ MD[23]; - out[ 8] = SD[get_byte(2, B0)] ^ MD[24]; - out[ 9] = SD[get_byte(2, B1)] ^ MD[25]; - out[10] = SD[get_byte(2, B2)] ^ MD[26]; - out[11] = SD[get_byte(2, B3)] ^ MD[27]; - out[12] = SD[get_byte(3, B0)] ^ MD[28]; - out[13] = SD[get_byte(3, B1)] ^ MD[29]; - out[14] = SD[get_byte(3, B2)] ^ MD[30]; - out[15] = SD[get_byte(3, B3)] ^ MD[31]; } /* diff --git a/src/block/square/square.h b/src/block/square/square.h index 94a1fc370..0de4c20bd 100644 --- a/src/block/square/square.h +++ b/src/block/square/square.h @@ -23,8 +23,8 @@ class BOTAN_DLL Square : public BlockCipher BlockCipher* clone() const { return new Square; } Square() : BlockCipher(16, 16) {} private: - void enc(const byte[], byte[]) const; - void dec(const byte[], byte[]) const; + void encrypt_n(const byte in[], byte out[], u32bit blocks) const; + void decrypt_n(const byte in[], byte out[], u32bit blocks) const; void key_schedule(const byte[], u32bit); static void transform(u32bit[4]); |