diff options
author | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
commit | f4ad8ecfa14b59dbf06a9595f8dacccb3b7f04e3 (patch) | |
tree | 76077c951bb44373ad7c2ca632f2e24cf758e88f /src/hash/md4/md4.cpp | |
parent | 8ec4a1fe7e8af6f8223908cdf8fe7de39170fc2a (diff) |
I had not anticipated this being really worthwhile, but it turns out
to have been so! Change MDx_HashFunction::hash to a new compress_n
which hashes an arbitrary number of blocks. I had a thought this might
reduce a bit of loop overhead but the results were far better than I
anticipated. Speedup across the board of about 2%, and very
noticable (+10%) increases for MD4 and Tiger (probably b/c both
of those have so few instructions in each iteration of the
compression function).
Before:
SHA-1:
amd64: 211.9 MiB/s
core: 210.0 MiB/s
sse2: 295.2 MiB/s
MD4: 476.2 MiB/s
MD5: 355.2 MiB/s
SHA-256: 99.8 MiB/s
SHA-512: 151.4 MiB/s
RIPEMD-128: 326.9 MiB/s
RIPEMD-160: 225.1 MiB/s
Tiger: 214.8 MiB/s
Whirlpool: 38.4 MiB/s
After:
SHA-1:
amd64: 215.6 MiB/s
core: 213.8 MiB/s
sse2: 299.9 MiB/s
MD4: 528.4 MiB/s
MD5: 368.8 MiB/s
SHA-256: 103.9 MiB/s
SHA-512: 156.8 MiB/s
RIPEMD-128: 334.8 MiB/s
RIPEMD-160: 229.7 MiB/s
Tiger: 240.7 MiB/s
Whirlpool: 38.6 MiB/s
Diffstat (limited to 'src/hash/md4/md4.cpp')
-rw-r--r-- | src/hash/md4/md4.cpp | 60 |
1 files changed, 32 insertions, 28 deletions
diff --git a/src/hash/md4/md4.cpp b/src/hash/md4/md4.cpp index e2a18f424..c6adeb744 100644 --- a/src/hash/md4/md4.cpp +++ b/src/hash/md4/md4.cpp @@ -43,35 +43,39 @@ inline void HH(u32bit& A, u32bit B, u32bit C, u32bit D, u32bit M, byte S) /************************************************* * MD4 Compression Function * *************************************************/ -void MD4::hash(const byte input[]) +void MD4::compress_n(const byte input[], u32bit blocks) { - for(u32bit j = 0; j != 16; ++j) - M[j] = load_le<u32bit>(input, j); - - u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3]; - - FF(A,B,C,D,M[ 0], 3); FF(D,A,B,C,M[ 1], 7); FF(C,D,A,B,M[ 2],11); - FF(B,C,D,A,M[ 3],19); FF(A,B,C,D,M[ 4], 3); FF(D,A,B,C,M[ 5], 7); - FF(C,D,A,B,M[ 6],11); FF(B,C,D,A,M[ 7],19); FF(A,B,C,D,M[ 8], 3); - FF(D,A,B,C,M[ 9], 7); FF(C,D,A,B,M[10],11); FF(B,C,D,A,M[11],19); - FF(A,B,C,D,M[12], 3); FF(D,A,B,C,M[13], 7); FF(C,D,A,B,M[14],11); - FF(B,C,D,A,M[15],19); - - GG(A,B,C,D,M[ 0], 3); GG(D,A,B,C,M[ 4], 5); GG(C,D,A,B,M[ 8], 9); - GG(B,C,D,A,M[12],13); GG(A,B,C,D,M[ 1], 3); GG(D,A,B,C,M[ 5], 5); - GG(C,D,A,B,M[ 9], 9); GG(B,C,D,A,M[13],13); GG(A,B,C,D,M[ 2], 3); - GG(D,A,B,C,M[ 6], 5); GG(C,D,A,B,M[10], 9); GG(B,C,D,A,M[14],13); - GG(A,B,C,D,M[ 3], 3); GG(D,A,B,C,M[ 7], 5); GG(C,D,A,B,M[11], 9); - GG(B,C,D,A,M[15],13); - - HH(A,B,C,D,M[ 0], 3); HH(D,A,B,C,M[ 8], 9); HH(C,D,A,B,M[ 4],11); - HH(B,C,D,A,M[12],15); HH(A,B,C,D,M[ 2], 3); HH(D,A,B,C,M[10], 9); - HH(C,D,A,B,M[ 6],11); HH(B,C,D,A,M[14],15); HH(A,B,C,D,M[ 1], 3); - HH(D,A,B,C,M[ 9], 9); HH(C,D,A,B,M[ 5],11); HH(B,C,D,A,M[13],15); - HH(A,B,C,D,M[ 3], 3); HH(D,A,B,C,M[11], 9); HH(C,D,A,B,M[ 7],11); - HH(B,C,D,A,M[15],15); - - digest[0] += A; digest[1] += B; digest[2] += C; digest[3] += D; + for(u32bit i = 0; i != blocks; ++i) + { + for(u32bit j = 0; j != 16; ++j) + M[j] = load_le<u32bit>(input, j); + input += HASH_BLOCK_SIZE; + + u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3]; + + FF(A,B,C,D,M[ 0], 3); FF(D,A,B,C,M[ 1], 7); FF(C,D,A,B,M[ 2],11); + FF(B,C,D,A,M[ 3],19); FF(A,B,C,D,M[ 4], 3); FF(D,A,B,C,M[ 5], 7); + FF(C,D,A,B,M[ 6],11); FF(B,C,D,A,M[ 7],19); FF(A,B,C,D,M[ 8], 3); + FF(D,A,B,C,M[ 9], 7); FF(C,D,A,B,M[10],11); FF(B,C,D,A,M[11],19); + FF(A,B,C,D,M[12], 3); FF(D,A,B,C,M[13], 7); FF(C,D,A,B,M[14],11); + FF(B,C,D,A,M[15],19); + + GG(A,B,C,D,M[ 0], 3); GG(D,A,B,C,M[ 4], 5); GG(C,D,A,B,M[ 8], 9); + GG(B,C,D,A,M[12],13); GG(A,B,C,D,M[ 1], 3); GG(D,A,B,C,M[ 5], 5); + GG(C,D,A,B,M[ 9], 9); GG(B,C,D,A,M[13],13); GG(A,B,C,D,M[ 2], 3); + GG(D,A,B,C,M[ 6], 5); GG(C,D,A,B,M[10], 9); GG(B,C,D,A,M[14],13); + GG(A,B,C,D,M[ 3], 3); GG(D,A,B,C,M[ 7], 5); GG(C,D,A,B,M[11], 9); + GG(B,C,D,A,M[15],13); + + HH(A,B,C,D,M[ 0], 3); HH(D,A,B,C,M[ 8], 9); HH(C,D,A,B,M[ 4],11); + HH(B,C,D,A,M[12],15); HH(A,B,C,D,M[ 2], 3); HH(D,A,B,C,M[10], 9); + HH(C,D,A,B,M[ 6],11); HH(B,C,D,A,M[14],15); HH(A,B,C,D,M[ 1], 3); + HH(D,A,B,C,M[ 9], 9); HH(C,D,A,B,M[ 5],11); HH(B,C,D,A,M[13],15); + HH(A,B,C,D,M[ 3], 3); HH(D,A,B,C,M[11], 9); HH(C,D,A,B,M[ 7],11); + HH(B,C,D,A,M[15],15); + + digest[0] += A; digest[1] += B; digest[2] += C; digest[3] += D; + } } /************************************************* |