aboutsummaryrefslogtreecommitdiffstats
path: root/src/hash/sha1/sha160.cpp
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-11-23 02:03:16 +0000
committerlloyd <[email protected]>2008-11-23 02:03:16 +0000
commitf4ad8ecfa14b59dbf06a9595f8dacccb3b7f04e3 (patch)
tree76077c951bb44373ad7c2ca632f2e24cf758e88f /src/hash/sha1/sha160.cpp
parent8ec4a1fe7e8af6f8223908cdf8fe7de39170fc2a (diff)
I had not anticipated this being really worthwhile, but it turns out
to have been so! Change MDx_HashFunction::hash to a new compress_n which hashes an arbitrary number of blocks. I had a thought this might reduce a bit of loop overhead but the results were far better than I anticipated. Speedup across the board of about 2%, and very noticable (+10%) increases for MD4 and Tiger (probably b/c both of those have so few instructions in each iteration of the compression function). Before: SHA-1: amd64: 211.9 MiB/s core: 210.0 MiB/s sse2: 295.2 MiB/s MD4: 476.2 MiB/s MD5: 355.2 MiB/s SHA-256: 99.8 MiB/s SHA-512: 151.4 MiB/s RIPEMD-128: 326.9 MiB/s RIPEMD-160: 225.1 MiB/s Tiger: 214.8 MiB/s Whirlpool: 38.4 MiB/s After: SHA-1: amd64: 215.6 MiB/s core: 213.8 MiB/s sse2: 299.9 MiB/s MD4: 528.4 MiB/s MD5: 368.8 MiB/s SHA-256: 103.9 MiB/s SHA-512: 156.8 MiB/s RIPEMD-128: 334.8 MiB/s RIPEMD-160: 229.7 MiB/s Tiger: 240.7 MiB/s Whirlpool: 38.6 MiB/s
Diffstat (limited to 'src/hash/sha1/sha160.cpp')
-rw-r--r--src/hash/sha1/sha160.cpp108
1 files changed, 56 insertions, 52 deletions
diff --git a/src/hash/sha1/sha160.cpp b/src/hash/sha1/sha160.cpp
index 142cddcd3..302151921 100644
--- a/src/hash/sha1/sha160.cpp
+++ b/src/hash/sha1/sha160.cpp
@@ -52,61 +52,65 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, u32bit msg)
/*************************************************
* SHA-160 Compression Function *
*************************************************/
-void SHA_160::hash(const byte input[])
+void SHA_160::compress_n(const byte input[], u32bit blocks)
{
- for(u32bit j = 0; j != 16; j += 4)
+ for(u32bit i = 0; i != blocks; ++i)
{
- W[j ] = load_be<u32bit>(input, j);
- W[j+1] = load_be<u32bit>(input, j+1);
- W[j+2] = load_be<u32bit>(input, j+2);
- W[j+3] = load_be<u32bit>(input, j+3);
+ for(u32bit j = 0; j != 16; j += 4)
+ {
+ W[j ] = load_be<u32bit>(input, j);
+ W[j+1] = load_be<u32bit>(input, j+1);
+ W[j+2] = load_be<u32bit>(input, j+2);
+ W[j+3] = load_be<u32bit>(input, j+3);
+ }
+ input += HASH_BLOCK_SIZE;
+
+ for(u32bit j = 16; j != 80; j += 4)
+ {
+ W[j ] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1);
+ W[j+1] = rotate_left((W[j-2] ^ W[j-7] ^ W[j-13] ^ W[j-15]), 1);
+ W[j+2] = rotate_left((W[j-1] ^ W[j-6] ^ W[j-12] ^ W[j-14]), 1);
+ W[j+3] = rotate_left((W[j ] ^ W[j-5] ^ W[j-11] ^ W[j-13]), 1);
+ }
+
+ u32bit A = digest[0], B = digest[1], C = digest[2],
+ D = digest[3], E = digest[4];
+
+ F1(A,B,C,D,E,W[ 0]); F1(E,A,B,C,D,W[ 1]); F1(D,E,A,B,C,W[ 2]);
+ F1(C,D,E,A,B,W[ 3]); F1(B,C,D,E,A,W[ 4]); F1(A,B,C,D,E,W[ 5]);
+ F1(E,A,B,C,D,W[ 6]); F1(D,E,A,B,C,W[ 7]); F1(C,D,E,A,B,W[ 8]);
+ F1(B,C,D,E,A,W[ 9]); F1(A,B,C,D,E,W[10]); F1(E,A,B,C,D,W[11]);
+ F1(D,E,A,B,C,W[12]); F1(C,D,E,A,B,W[13]); F1(B,C,D,E,A,W[14]);
+ F1(A,B,C,D,E,W[15]); F1(E,A,B,C,D,W[16]); F1(D,E,A,B,C,W[17]);
+ F1(C,D,E,A,B,W[18]); F1(B,C,D,E,A,W[19]);
+
+ F2(A,B,C,D,E,W[20]); F2(E,A,B,C,D,W[21]); F2(D,E,A,B,C,W[22]);
+ F2(C,D,E,A,B,W[23]); F2(B,C,D,E,A,W[24]); F2(A,B,C,D,E,W[25]);
+ F2(E,A,B,C,D,W[26]); F2(D,E,A,B,C,W[27]); F2(C,D,E,A,B,W[28]);
+ F2(B,C,D,E,A,W[29]); F2(A,B,C,D,E,W[30]); F2(E,A,B,C,D,W[31]);
+ F2(D,E,A,B,C,W[32]); F2(C,D,E,A,B,W[33]); F2(B,C,D,E,A,W[34]);
+ F2(A,B,C,D,E,W[35]); F2(E,A,B,C,D,W[36]); F2(D,E,A,B,C,W[37]);
+ F2(C,D,E,A,B,W[38]); F2(B,C,D,E,A,W[39]);
+
+ F3(A,B,C,D,E,W[40]); F3(E,A,B,C,D,W[41]); F3(D,E,A,B,C,W[42]);
+ F3(C,D,E,A,B,W[43]); F3(B,C,D,E,A,W[44]); F3(A,B,C,D,E,W[45]);
+ F3(E,A,B,C,D,W[46]); F3(D,E,A,B,C,W[47]); F3(C,D,E,A,B,W[48]);
+ F3(B,C,D,E,A,W[49]); F3(A,B,C,D,E,W[50]); F3(E,A,B,C,D,W[51]);
+ F3(D,E,A,B,C,W[52]); F3(C,D,E,A,B,W[53]); F3(B,C,D,E,A,W[54]);
+ F3(A,B,C,D,E,W[55]); F3(E,A,B,C,D,W[56]); F3(D,E,A,B,C,W[57]);
+ F3(C,D,E,A,B,W[58]); F3(B,C,D,E,A,W[59]);
+
+ F4(A,B,C,D,E,W[60]); F4(E,A,B,C,D,W[61]); F4(D,E,A,B,C,W[62]);
+ F4(C,D,E,A,B,W[63]); F4(B,C,D,E,A,W[64]); F4(A,B,C,D,E,W[65]);
+ F4(E,A,B,C,D,W[66]); F4(D,E,A,B,C,W[67]); F4(C,D,E,A,B,W[68]);
+ F4(B,C,D,E,A,W[69]); F4(A,B,C,D,E,W[70]); F4(E,A,B,C,D,W[71]);
+ F4(D,E,A,B,C,W[72]); F4(C,D,E,A,B,W[73]); F4(B,C,D,E,A,W[74]);
+ F4(A,B,C,D,E,W[75]); F4(E,A,B,C,D,W[76]); F4(D,E,A,B,C,W[77]);
+ F4(C,D,E,A,B,W[78]); F4(B,C,D,E,A,W[79]);
+
+ digest[0] += A; digest[1] += B; digest[2] += C;
+ digest[3] += D; digest[4] += E;
}
-
- for(u32bit j = 16; j != 80; j += 4)
- {
- W[j ] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1);
- W[j+1] = rotate_left((W[j-2] ^ W[j-7] ^ W[j-13] ^ W[j-15]), 1);
- W[j+2] = rotate_left((W[j-1] ^ W[j-6] ^ W[j-12] ^ W[j-14]), 1);
- W[j+3] = rotate_left((W[j ] ^ W[j-5] ^ W[j-11] ^ W[j-13]), 1);
- }
-
- u32bit A = digest[0], B = digest[1], C = digest[2],
- D = digest[3], E = digest[4];
-
- F1(A,B,C,D,E,W[ 0]); F1(E,A,B,C,D,W[ 1]); F1(D,E,A,B,C,W[ 2]);
- F1(C,D,E,A,B,W[ 3]); F1(B,C,D,E,A,W[ 4]); F1(A,B,C,D,E,W[ 5]);
- F1(E,A,B,C,D,W[ 6]); F1(D,E,A,B,C,W[ 7]); F1(C,D,E,A,B,W[ 8]);
- F1(B,C,D,E,A,W[ 9]); F1(A,B,C,D,E,W[10]); F1(E,A,B,C,D,W[11]);
- F1(D,E,A,B,C,W[12]); F1(C,D,E,A,B,W[13]); F1(B,C,D,E,A,W[14]);
- F1(A,B,C,D,E,W[15]); F1(E,A,B,C,D,W[16]); F1(D,E,A,B,C,W[17]);
- F1(C,D,E,A,B,W[18]); F1(B,C,D,E,A,W[19]);
-
- F2(A,B,C,D,E,W[20]); F2(E,A,B,C,D,W[21]); F2(D,E,A,B,C,W[22]);
- F2(C,D,E,A,B,W[23]); F2(B,C,D,E,A,W[24]); F2(A,B,C,D,E,W[25]);
- F2(E,A,B,C,D,W[26]); F2(D,E,A,B,C,W[27]); F2(C,D,E,A,B,W[28]);
- F2(B,C,D,E,A,W[29]); F2(A,B,C,D,E,W[30]); F2(E,A,B,C,D,W[31]);
- F2(D,E,A,B,C,W[32]); F2(C,D,E,A,B,W[33]); F2(B,C,D,E,A,W[34]);
- F2(A,B,C,D,E,W[35]); F2(E,A,B,C,D,W[36]); F2(D,E,A,B,C,W[37]);
- F2(C,D,E,A,B,W[38]); F2(B,C,D,E,A,W[39]);
-
- F3(A,B,C,D,E,W[40]); F3(E,A,B,C,D,W[41]); F3(D,E,A,B,C,W[42]);
- F3(C,D,E,A,B,W[43]); F3(B,C,D,E,A,W[44]); F3(A,B,C,D,E,W[45]);
- F3(E,A,B,C,D,W[46]); F3(D,E,A,B,C,W[47]); F3(C,D,E,A,B,W[48]);
- F3(B,C,D,E,A,W[49]); F3(A,B,C,D,E,W[50]); F3(E,A,B,C,D,W[51]);
- F3(D,E,A,B,C,W[52]); F3(C,D,E,A,B,W[53]); F3(B,C,D,E,A,W[54]);
- F3(A,B,C,D,E,W[55]); F3(E,A,B,C,D,W[56]); F3(D,E,A,B,C,W[57]);
- F3(C,D,E,A,B,W[58]); F3(B,C,D,E,A,W[59]);
-
- F4(A,B,C,D,E,W[60]); F4(E,A,B,C,D,W[61]); F4(D,E,A,B,C,W[62]);
- F4(C,D,E,A,B,W[63]); F4(B,C,D,E,A,W[64]); F4(A,B,C,D,E,W[65]);
- F4(E,A,B,C,D,W[66]); F4(D,E,A,B,C,W[67]); F4(C,D,E,A,B,W[68]);
- F4(B,C,D,E,A,W[69]); F4(A,B,C,D,E,W[70]); F4(E,A,B,C,D,W[71]);
- F4(D,E,A,B,C,W[72]); F4(C,D,E,A,B,W[73]); F4(B,C,D,E,A,W[74]);
- F4(A,B,C,D,E,W[75]); F4(E,A,B,C,D,W[76]); F4(D,E,A,B,C,W[77]);
- F4(C,D,E,A,B,W[78]); F4(B,C,D,E,A,W[79]);
-
- digest[0] += A; digest[1] += B; digest[2] += C;
- digest[3] += D; digest[4] += E;
}
/*************************************************