diff options
author | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
commit | f4ad8ecfa14b59dbf06a9595f8dacccb3b7f04e3 (patch) | |
tree | 76077c951bb44373ad7c2ca632f2e24cf758e88f /src/hash/whirlpool/whrlpool.cpp | |
parent | 8ec4a1fe7e8af6f8223908cdf8fe7de39170fc2a (diff) |
I had not anticipated this being really worthwhile, but it turns out
to have been so! Change MDx_HashFunction::hash to a new compress_n
which hashes an arbitrary number of blocks. I had a thought this might
reduce a bit of loop overhead but the results were far better than I
anticipated. Speedup across the board of about 2%, and very
noticable (+10%) increases for MD4 and Tiger (probably b/c both
of those have so few instructions in each iteration of the
compression function).
Before:
SHA-1:
amd64: 211.9 MiB/s
core: 210.0 MiB/s
sse2: 295.2 MiB/s
MD4: 476.2 MiB/s
MD5: 355.2 MiB/s
SHA-256: 99.8 MiB/s
SHA-512: 151.4 MiB/s
RIPEMD-128: 326.9 MiB/s
RIPEMD-160: 225.1 MiB/s
Tiger: 214.8 MiB/s
Whirlpool: 38.4 MiB/s
After:
SHA-1:
amd64: 215.6 MiB/s
core: 213.8 MiB/s
sse2: 299.9 MiB/s
MD4: 528.4 MiB/s
MD5: 368.8 MiB/s
SHA-256: 103.9 MiB/s
SHA-512: 156.8 MiB/s
RIPEMD-128: 334.8 MiB/s
RIPEMD-160: 229.7 MiB/s
Tiger: 240.7 MiB/s
Whirlpool: 38.6 MiB/s
Diffstat (limited to 'src/hash/whirlpool/whrlpool.cpp')
-rw-r--r-- | src/hash/whirlpool/whrlpool.cpp | 182 |
1 files changed, 93 insertions, 89 deletions
diff --git a/src/hash/whirlpool/whrlpool.cpp b/src/hash/whirlpool/whrlpool.cpp index 9319a8b2a..b604f9400 100644 --- a/src/hash/whirlpool/whrlpool.cpp +++ b/src/hash/whirlpool/whrlpool.cpp @@ -11,7 +11,7 @@ namespace Botan { /************************************************* * Whirlpool Compression Function * *************************************************/ -void Whirlpool::hash(const byte in[]) +void Whirlpool::compress_n(const byte in[], u32bit blocks) { static const u64bit RC[10] = { 0x1823C6E887B8014F, 0x36A6D2F5796F9152, @@ -21,101 +21,105 @@ void Whirlpool::hash(const byte in[]) 0xFBEE7C66DD17479E, 0xCA2DBF07AD5A8333 }; - for(u32bit j = 0; j != 8; ++j) - M[j] = load_be<u64bit>(in, j); + for(u32bit i = 0; i != blocks; ++i) + { + for(u32bit j = 0; j != 8; ++j) + M[j] = load_be<u64bit>(in, j); + in += HASH_BLOCK_SIZE; - u64bit K0, K1, K2, K3, K4, K5, K6, K7; - K0 = digest[0]; K1 = digest[1]; K2 = digest[2]; K3 = digest[3]; - K4 = digest[4]; K5 = digest[5]; K6 = digest[6]; K7 = digest[7]; + u64bit K0, K1, K2, K3, K4, K5, K6, K7; + K0 = digest[0]; K1 = digest[1]; K2 = digest[2]; K3 = digest[3]; + K4 = digest[4]; K5 = digest[5]; K6 = digest[6]; K7 = digest[7]; - u64bit B0, B1, B2, B3, B4, B5, B6, B7; - B0 = K0 ^ M[0]; B1 = K1 ^ M[1]; B2 = K2 ^ M[2]; B3 = K3 ^ M[3]; - B4 = K4 ^ M[4]; B5 = K5 ^ M[5]; B6 = K6 ^ M[6]; B7 = K7 ^ M[7]; + u64bit B0, B1, B2, B3, B4, B5, B6, B7; + B0 = K0 ^ M[0]; B1 = K1 ^ M[1]; B2 = K2 ^ M[2]; B3 = K3 ^ M[3]; + B4 = K4 ^ M[4]; B5 = K5 ^ M[5]; B6 = K6 ^ M[6]; B7 = K7 ^ M[7]; - for(u32bit j = 0; j != 10; ++j) - { - u64bit T0, T1, T2, T3, T4, T5, T6, T7; - T0 = C0[get_byte(0, K0)] ^ C1[get_byte(1, K7)] ^ - C2[get_byte(2, K6)] ^ C3[get_byte(3, K5)] ^ - C4[get_byte(4, K4)] ^ C5[get_byte(5, K3)] ^ - C6[get_byte(6, K2)] ^ C7[get_byte(7, K1)] ^ RC[j]; - T1 = C0[get_byte(0, K1)] ^ C1[get_byte(1, K0)] ^ - C2[get_byte(2, K7)] ^ C3[get_byte(3, K6)] ^ - C4[get_byte(4, K5)] ^ C5[get_byte(5, K4)] ^ - C6[get_byte(6, K3)] ^ C7[get_byte(7, K2)]; - T2 = C0[get_byte(0, K2)] ^ C1[get_byte(1, K1)] ^ - C2[get_byte(2, K0)] ^ C3[get_byte(3, K7)] ^ - C4[get_byte(4, K6)] ^ C5[get_byte(5, K5)] ^ - C6[get_byte(6, K4)] ^ C7[get_byte(7, K3)]; - T3 = C0[get_byte(0, K3)] ^ C1[get_byte(1, K2)] ^ - C2[get_byte(2, K1)] ^ C3[get_byte(3, K0)] ^ - C4[get_byte(4, K7)] ^ C5[get_byte(5, K6)] ^ - C6[get_byte(6, K5)] ^ C7[get_byte(7, K4)]; - T4 = C0[get_byte(0, K4)] ^ C1[get_byte(1, K3)] ^ - C2[get_byte(2, K2)] ^ C3[get_byte(3, K1)] ^ - C4[get_byte(4, K0)] ^ C5[get_byte(5, K7)] ^ - C6[get_byte(6, K6)] ^ C7[get_byte(7, K5)]; - T5 = C0[get_byte(0, K5)] ^ C1[get_byte(1, K4)] ^ - C2[get_byte(2, K3)] ^ C3[get_byte(3, K2)] ^ - C4[get_byte(4, K1)] ^ C5[get_byte(5, K0)] ^ - C6[get_byte(6, K7)] ^ C7[get_byte(7, K6)]; - T6 = C0[get_byte(0, K6)] ^ C1[get_byte(1, K5)] ^ - C2[get_byte(2, K4)] ^ C3[get_byte(3, K3)] ^ - C4[get_byte(4, K2)] ^ C5[get_byte(5, K1)] ^ - C6[get_byte(6, K0)] ^ C7[get_byte(7, K7)]; - T7 = C0[get_byte(0, K7)] ^ C1[get_byte(1, K6)] ^ - C2[get_byte(2, K5)] ^ C3[get_byte(3, K4)] ^ - C4[get_byte(4, K3)] ^ C5[get_byte(5, K2)] ^ - C6[get_byte(6, K1)] ^ C7[get_byte(7, K0)]; + for(u32bit j = 0; j != 10; ++j) + { + u64bit T0, T1, T2, T3, T4, T5, T6, T7; + T0 = C0[get_byte(0, K0)] ^ C1[get_byte(1, K7)] ^ + C2[get_byte(2, K6)] ^ C3[get_byte(3, K5)] ^ + C4[get_byte(4, K4)] ^ C5[get_byte(5, K3)] ^ + C6[get_byte(6, K2)] ^ C7[get_byte(7, K1)] ^ RC[j]; + T1 = C0[get_byte(0, K1)] ^ C1[get_byte(1, K0)] ^ + C2[get_byte(2, K7)] ^ C3[get_byte(3, K6)] ^ + C4[get_byte(4, K5)] ^ C5[get_byte(5, K4)] ^ + C6[get_byte(6, K3)] ^ C7[get_byte(7, K2)]; + T2 = C0[get_byte(0, K2)] ^ C1[get_byte(1, K1)] ^ + C2[get_byte(2, K0)] ^ C3[get_byte(3, K7)] ^ + C4[get_byte(4, K6)] ^ C5[get_byte(5, K5)] ^ + C6[get_byte(6, K4)] ^ C7[get_byte(7, K3)]; + T3 = C0[get_byte(0, K3)] ^ C1[get_byte(1, K2)] ^ + C2[get_byte(2, K1)] ^ C3[get_byte(3, K0)] ^ + C4[get_byte(4, K7)] ^ C5[get_byte(5, K6)] ^ + C6[get_byte(6, K5)] ^ C7[get_byte(7, K4)]; + T4 = C0[get_byte(0, K4)] ^ C1[get_byte(1, K3)] ^ + C2[get_byte(2, K2)] ^ C3[get_byte(3, K1)] ^ + C4[get_byte(4, K0)] ^ C5[get_byte(5, K7)] ^ + C6[get_byte(6, K6)] ^ C7[get_byte(7, K5)]; + T5 = C0[get_byte(0, K5)] ^ C1[get_byte(1, K4)] ^ + C2[get_byte(2, K3)] ^ C3[get_byte(3, K2)] ^ + C4[get_byte(4, K1)] ^ C5[get_byte(5, K0)] ^ + C6[get_byte(6, K7)] ^ C7[get_byte(7, K6)]; + T6 = C0[get_byte(0, K6)] ^ C1[get_byte(1, K5)] ^ + C2[get_byte(2, K4)] ^ C3[get_byte(3, K3)] ^ + C4[get_byte(4, K2)] ^ C5[get_byte(5, K1)] ^ + C6[get_byte(6, K0)] ^ C7[get_byte(7, K7)]; + T7 = C0[get_byte(0, K7)] ^ C1[get_byte(1, K6)] ^ + C2[get_byte(2, K5)] ^ C3[get_byte(3, K4)] ^ + C4[get_byte(4, K3)] ^ C5[get_byte(5, K2)] ^ + C6[get_byte(6, K1)] ^ C7[get_byte(7, K0)]; - K0 = T0; K1 = T1; K2 = T2; K3 = T3; - K4 = T4; K5 = T5; K6 = T6; K7 = T7; + K0 = T0; K1 = T1; K2 = T2; K3 = T3; + K4 = T4; K5 = T5; K6 = T6; K7 = T7; - T0 = C0[get_byte(0, B0)] ^ C1[get_byte(1, B7)] ^ - C2[get_byte(2, B6)] ^ C3[get_byte(3, B5)] ^ - C4[get_byte(4, B4)] ^ C5[get_byte(5, B3)] ^ - C6[get_byte(6, B2)] ^ C7[get_byte(7, B1)] ^ K0; - T1 = C0[get_byte(0, B1)] ^ C1[get_byte(1, B0)] ^ - C2[get_byte(2, B7)] ^ C3[get_byte(3, B6)] ^ - C4[get_byte(4, B5)] ^ C5[get_byte(5, B4)] ^ - C6[get_byte(6, B3)] ^ C7[get_byte(7, B2)] ^ K1; - T2 = C0[get_byte(0, B2)] ^ C1[get_byte(1, B1)] ^ - C2[get_byte(2, B0)] ^ C3[get_byte(3, B7)] ^ - C4[get_byte(4, B6)] ^ C5[get_byte(5, B5)] ^ - C6[get_byte(6, B4)] ^ C7[get_byte(7, B3)] ^ K2; - T3 = C0[get_byte(0, B3)] ^ C1[get_byte(1, B2)] ^ - C2[get_byte(2, B1)] ^ C3[get_byte(3, B0)] ^ - C4[get_byte(4, B7)] ^ C5[get_byte(5, B6)] ^ - C6[get_byte(6, B5)] ^ C7[get_byte(7, B4)] ^ K3; - T4 = C0[get_byte(0, B4)] ^ C1[get_byte(1, B3)] ^ - C2[get_byte(2, B2)] ^ C3[get_byte(3, B1)] ^ - C4[get_byte(4, B0)] ^ C5[get_byte(5, B7)] ^ - C6[get_byte(6, B6)] ^ C7[get_byte(7, B5)] ^ K4; - T5 = C0[get_byte(0, B5)] ^ C1[get_byte(1, B4)] ^ - C2[get_byte(2, B3)] ^ C3[get_byte(3, B2)] ^ - C4[get_byte(4, B1)] ^ C5[get_byte(5, B0)] ^ - C6[get_byte(6, B7)] ^ C7[get_byte(7, B6)] ^ K5; - T6 = C0[get_byte(0, B6)] ^ C1[get_byte(1, B5)] ^ - C2[get_byte(2, B4)] ^ C3[get_byte(3, B3)] ^ - C4[get_byte(4, B2)] ^ C5[get_byte(5, B1)] ^ - C6[get_byte(6, B0)] ^ C7[get_byte(7, B7)] ^ K6; - T7 = C0[get_byte(0, B7)] ^ C1[get_byte(1, B6)] ^ - C2[get_byte(2, B5)] ^ C3[get_byte(3, B4)] ^ - C4[get_byte(4, B3)] ^ C5[get_byte(5, B2)] ^ - C6[get_byte(6, B1)] ^ C7[get_byte(7, B0)] ^ K7; + T0 = C0[get_byte(0, B0)] ^ C1[get_byte(1, B7)] ^ + C2[get_byte(2, B6)] ^ C3[get_byte(3, B5)] ^ + C4[get_byte(4, B4)] ^ C5[get_byte(5, B3)] ^ + C6[get_byte(6, B2)] ^ C7[get_byte(7, B1)] ^ K0; + T1 = C0[get_byte(0, B1)] ^ C1[get_byte(1, B0)] ^ + C2[get_byte(2, B7)] ^ C3[get_byte(3, B6)] ^ + C4[get_byte(4, B5)] ^ C5[get_byte(5, B4)] ^ + C6[get_byte(6, B3)] ^ C7[get_byte(7, B2)] ^ K1; + T2 = C0[get_byte(0, B2)] ^ C1[get_byte(1, B1)] ^ + C2[get_byte(2, B0)] ^ C3[get_byte(3, B7)] ^ + C4[get_byte(4, B6)] ^ C5[get_byte(5, B5)] ^ + C6[get_byte(6, B4)] ^ C7[get_byte(7, B3)] ^ K2; + T3 = C0[get_byte(0, B3)] ^ C1[get_byte(1, B2)] ^ + C2[get_byte(2, B1)] ^ C3[get_byte(3, B0)] ^ + C4[get_byte(4, B7)] ^ C5[get_byte(5, B6)] ^ + C6[get_byte(6, B5)] ^ C7[get_byte(7, B4)] ^ K3; + T4 = C0[get_byte(0, B4)] ^ C1[get_byte(1, B3)] ^ + C2[get_byte(2, B2)] ^ C3[get_byte(3, B1)] ^ + C4[get_byte(4, B0)] ^ C5[get_byte(5, B7)] ^ + C6[get_byte(6, B6)] ^ C7[get_byte(7, B5)] ^ K4; + T5 = C0[get_byte(0, B5)] ^ C1[get_byte(1, B4)] ^ + C2[get_byte(2, B3)] ^ C3[get_byte(3, B2)] ^ + C4[get_byte(4, B1)] ^ C5[get_byte(5, B0)] ^ + C6[get_byte(6, B7)] ^ C7[get_byte(7, B6)] ^ K5; + T6 = C0[get_byte(0, B6)] ^ C1[get_byte(1, B5)] ^ + C2[get_byte(2, B4)] ^ C3[get_byte(3, B3)] ^ + C4[get_byte(4, B2)] ^ C5[get_byte(5, B1)] ^ + C6[get_byte(6, B0)] ^ C7[get_byte(7, B7)] ^ K6; + T7 = C0[get_byte(0, B7)] ^ C1[get_byte(1, B6)] ^ + C2[get_byte(2, B5)] ^ C3[get_byte(3, B4)] ^ + C4[get_byte(4, B3)] ^ C5[get_byte(5, B2)] ^ + C6[get_byte(6, B1)] ^ C7[get_byte(7, B0)] ^ K7; - B0 = T0; B1 = T1; B2 = T2; B3 = T3; - B4 = T4; B5 = T5; B6 = T6; B7 = T7; - } + B0 = T0; B1 = T1; B2 = T2; B3 = T3; + B4 = T4; B5 = T5; B6 = T6; B7 = T7; + } - digest[0] ^= B0 ^ M[0]; - digest[1] ^= B1 ^ M[1]; - digest[2] ^= B2 ^ M[2]; - digest[3] ^= B3 ^ M[3]; - digest[4] ^= B4 ^ M[4]; - digest[5] ^= B5 ^ M[5]; - digest[6] ^= B6 ^ M[6]; - digest[7] ^= B7 ^ M[7]; + digest[0] ^= B0 ^ M[0]; + digest[1] ^= B1 ^ M[1]; + digest[2] ^= B2 ^ M[2]; + digest[3] ^= B3 ^ M[3]; + digest[4] ^= B4 ^ M[4]; + digest[5] ^= B5 ^ M[5]; + digest[6] ^= B6 ^ M[6]; + digest[7] ^= B7 ^ M[7]; + } } /************************************************* |