aboutsummaryrefslogtreecommitdiffstats
path: root/src/hash/fork256
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-11-23 02:03:16 +0000
committerlloyd <[email protected]>2008-11-23 02:03:16 +0000
commitf4ad8ecfa14b59dbf06a9595f8dacccb3b7f04e3 (patch)
tree76077c951bb44373ad7c2ca632f2e24cf758e88f /src/hash/fork256
parent8ec4a1fe7e8af6f8223908cdf8fe7de39170fc2a (diff)
I had not anticipated this being really worthwhile, but it turns out
to have been so! Change MDx_HashFunction::hash to a new compress_n which hashes an arbitrary number of blocks. I had a thought this might reduce a bit of loop overhead but the results were far better than I anticipated. Speedup across the board of about 2%, and very noticable (+10%) increases for MD4 and Tiger (probably b/c both of those have so few instructions in each iteration of the compression function). Before: SHA-1: amd64: 211.9 MiB/s core: 210.0 MiB/s sse2: 295.2 MiB/s MD4: 476.2 MiB/s MD5: 355.2 MiB/s SHA-256: 99.8 MiB/s SHA-512: 151.4 MiB/s RIPEMD-128: 326.9 MiB/s RIPEMD-160: 225.1 MiB/s Tiger: 214.8 MiB/s Whirlpool: 38.4 MiB/s After: SHA-1: amd64: 215.6 MiB/s core: 213.8 MiB/s sse2: 299.9 MiB/s MD4: 528.4 MiB/s MD5: 368.8 MiB/s SHA-256: 103.9 MiB/s SHA-512: 156.8 MiB/s RIPEMD-128: 334.8 MiB/s RIPEMD-160: 229.7 MiB/s Tiger: 240.7 MiB/s Whirlpool: 38.6 MiB/s
Diffstat (limited to 'src/hash/fork256')
-rw-r--r--src/hash/fork256/fork256.cpp136
-rw-r--r--src/hash/fork256/fork256.h2
2 files changed, 71 insertions, 67 deletions
diff --git a/src/hash/fork256/fork256.cpp b/src/hash/fork256/fork256.cpp
index 5ac779cab..dc023004d 100644
--- a/src/hash/fork256/fork256.cpp
+++ b/src/hash/fork256/fork256.cpp
@@ -40,7 +40,7 @@ inline void step(u32bit& A, u32bit& B, u32bit& C, u32bit& D,
/*************************************************
* FORK-256 Compression Function *
*************************************************/
-void FORK_256::hash(const byte input[])
+void FORK_256::compress_n(const byte input[], u32bit blocks)
{
const u32bit DELTA[16] = {
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1,
@@ -48,71 +48,75 @@ void FORK_256::hash(const byte input[])
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174
};
- u32bit A1, B1, C1, D1, E1, F1, G1, H1;
- u32bit A2, B2, C2, D2, E2, F2, G2, H2;
- u32bit A3, B3, C3, D3, E3, F3, G3, H3;
- u32bit A4, B4, C4, D4, E4, F4, G4, H4;
-
- A1 = A2 = A3 = A4 = digest[0];
- B1 = B2 = B3 = B4 = digest[1];
- C1 = C2 = C3 = C4 = digest[2];
- D1 = D2 = D3 = D4 = digest[3];
- E1 = E2 = E3 = E4 = digest[4];
- F1 = F2 = F3 = F4 = digest[5];
- G1 = G2 = G3 = G4 = digest[6];
- H1 = H2 = H3 = H4 = digest[7];
-
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_be<u32bit>(input, j);
-
- step(A1, B1, C1, D1, E1, F1, G1, H1, M[ 0], M[ 1], DELTA[ 0], DELTA[ 1]);
- step(A2, B2, C2, D2, E2, F2, G2, H2, M[14], M[15], DELTA[15], DELTA[14]);
- step(A3, B3, C3, D3, E3, F3, G3, H3, M[ 7], M[ 6], DELTA[ 1], DELTA[ 0]);
- step(A4, B4, C4, D4, E4, F4, G4, H4, M[ 5], M[12], DELTA[14], DELTA[15]);
-
- step(H1, A1, B1, C1, D1, E1, F1, G1, M[ 2], M[ 3], DELTA[ 2], DELTA[ 3]);
- step(H2, A2, B2, C2, D2, E2, F2, G2, M[11], M[ 9], DELTA[13], DELTA[12]);
- step(H3, A3, B3, C3, D3, E3, F3, G3, M[10], M[14], DELTA[ 3], DELTA[ 2]);
- step(H4, A4, B4, C4, D4, E4, F4, G4, M[ 1], M[ 8], DELTA[12], DELTA[13]);
-
- step(G1, H1, A1, B1, C1, D1, E1, F1, M[ 4], M[ 5], DELTA[ 4], DELTA[ 5]);
- step(G2, H2, A2, B2, C2, D2, E2, F2, M[ 8], M[10], DELTA[11], DELTA[10]);
- step(G3, H3, A3, B3, C3, D3, E3, F3, M[13], M[ 2], DELTA[ 5], DELTA[ 4]);
- step(G4, H4, A4, B4, C4, D4, E4, F4, M[15], M[ 0], DELTA[10], DELTA[11]);
-
- step(F1, G1, H1, A1, B1, C1, D1, E1, M[ 6], M[ 7], DELTA[ 6], DELTA[ 7]);
- step(F2, G2, H2, A2, B2, C2, D2, E2, M[ 3], M[ 4], DELTA[ 9], DELTA[ 8]);
- step(F3, G3, H3, A3, B3, C3, D3, E3, M[ 9], M[12], DELTA[ 7], DELTA[ 6]);
- step(F4, G4, H4, A4, B4, C4, D4, E4, M[13], M[11], DELTA[ 8], DELTA[ 9]);
-
- step(E1, F1, G1, H1, A1, B1, C1, D1, M[ 8], M[ 9], DELTA[ 8], DELTA[ 9]);
- step(E2, F2, G2, H2, A2, B2, C2, D2, M[ 2], M[13], DELTA[ 7], DELTA[ 6]);
- step(E3, F3, G3, H3, A3, B3, C3, D3, M[11], M[ 4], DELTA[ 9], DELTA[ 8]);
- step(E4, F4, G4, H4, A4, B4, C4, D4, M[ 3], M[10], DELTA[ 6], DELTA[ 7]);
-
- step(D1, E1, F1, G1, H1, A1, B1, C1, M[10], M[11], DELTA[10], DELTA[11]);
- step(D2, E2, F2, G2, H2, A2, B2, C2, M[ 0], M[ 5], DELTA[ 5], DELTA[ 4]);
- step(D3, E3, F3, G3, H3, A3, B3, C3, M[15], M[ 8], DELTA[11], DELTA[10]);
- step(D4, E4, F4, G4, H4, A4, B4, C4, M[ 9], M[ 2], DELTA[ 4], DELTA[ 5]);
-
- step(C1, D1, E1, F1, G1, H1, A1, B1, M[12], M[13], DELTA[12], DELTA[13]);
- step(C2, D2, E2, F2, G2, H2, A2, B2, M[ 6], M[ 7], DELTA[ 3], DELTA[ 2]);
- step(C3, D3, E3, F3, G3, H3, A3, B3, M[ 5], M[ 0], DELTA[13], DELTA[12]);
- step(C4, D4, E4, F4, G4, H4, A4, B4, M[ 7], M[14], DELTA[ 2], DELTA[ 3]);
-
- step(B1, C1, D1, E1, F1, G1, H1, A1, M[14], M[15], DELTA[14], DELTA[15]);
- step(B2, C2, D2, E2, F2, G2, H2, A2, M[12], M[ 1], DELTA[ 1], DELTA[ 0]);
- step(B3, C3, D3, E3, F3, G3, H3, A3, M[ 1], M[ 3], DELTA[15], DELTA[14]);
- step(B4, C4, D4, E4, F4, G4, H4, A4, M[ 4], M[ 6], DELTA[ 0], DELTA[ 1]);
-
- digest[0] += (A1 + A2) ^ (A3 + A4);
- digest[1] += (B1 + B2) ^ (B3 + B4);
- digest[2] += (C1 + C2) ^ (C3 + C4);
- digest[3] += (D1 + D2) ^ (D3 + D4);
- digest[4] += (E1 + E2) ^ (E3 + E4);
- digest[5] += (F1 + F2) ^ (F3 + F4);
- digest[6] += (G1 + G2) ^ (G3 + G4);
- digest[7] += (H1 + H2) ^ (H3 + H4);
+ for(u32bit i = 0; i != blocks; ++i)
+ {
+ u32bit A1, B1, C1, D1, E1, F1, G1, H1;
+ u32bit A2, B2, C2, D2, E2, F2, G2, H2;
+ u32bit A3, B3, C3, D3, E3, F3, G3, H3;
+ u32bit A4, B4, C4, D4, E4, F4, G4, H4;
+
+ A1 = A2 = A3 = A4 = digest[0];
+ B1 = B2 = B3 = B4 = digest[1];
+ C1 = C2 = C3 = C4 = digest[2];
+ D1 = D2 = D3 = D4 = digest[3];
+ E1 = E2 = E3 = E4 = digest[4];
+ F1 = F2 = F3 = F4 = digest[5];
+ G1 = G2 = G3 = G4 = digest[6];
+ H1 = H2 = H3 = H4 = digest[7];
+
+ for(u32bit j = 0; j != 16; ++j)
+ M[j] = load_be<u32bit>(input, j);
+ input += HASH_BLOCK_SIZE;
+
+ step(A1, B1, C1, D1, E1, F1, G1, H1, M[ 0], M[ 1], DELTA[ 0], DELTA[ 1]);
+ step(A2, B2, C2, D2, E2, F2, G2, H2, M[14], M[15], DELTA[15], DELTA[14]);
+ step(A3, B3, C3, D3, E3, F3, G3, H3, M[ 7], M[ 6], DELTA[ 1], DELTA[ 0]);
+ step(A4, B4, C4, D4, E4, F4, G4, H4, M[ 5], M[12], DELTA[14], DELTA[15]);
+
+ step(H1, A1, B1, C1, D1, E1, F1, G1, M[ 2], M[ 3], DELTA[ 2], DELTA[ 3]);
+ step(H2, A2, B2, C2, D2, E2, F2, G2, M[11], M[ 9], DELTA[13], DELTA[12]);
+ step(H3, A3, B3, C3, D3, E3, F3, G3, M[10], M[14], DELTA[ 3], DELTA[ 2]);
+ step(H4, A4, B4, C4, D4, E4, F4, G4, M[ 1], M[ 8], DELTA[12], DELTA[13]);
+
+ step(G1, H1, A1, B1, C1, D1, E1, F1, M[ 4], M[ 5], DELTA[ 4], DELTA[ 5]);
+ step(G2, H2, A2, B2, C2, D2, E2, F2, M[ 8], M[10], DELTA[11], DELTA[10]);
+ step(G3, H3, A3, B3, C3, D3, E3, F3, M[13], M[ 2], DELTA[ 5], DELTA[ 4]);
+ step(G4, H4, A4, B4, C4, D4, E4, F4, M[15], M[ 0], DELTA[10], DELTA[11]);
+
+ step(F1, G1, H1, A1, B1, C1, D1, E1, M[ 6], M[ 7], DELTA[ 6], DELTA[ 7]);
+ step(F2, G2, H2, A2, B2, C2, D2, E2, M[ 3], M[ 4], DELTA[ 9], DELTA[ 8]);
+ step(F3, G3, H3, A3, B3, C3, D3, E3, M[ 9], M[12], DELTA[ 7], DELTA[ 6]);
+ step(F4, G4, H4, A4, B4, C4, D4, E4, M[13], M[11], DELTA[ 8], DELTA[ 9]);
+
+ step(E1, F1, G1, H1, A1, B1, C1, D1, M[ 8], M[ 9], DELTA[ 8], DELTA[ 9]);
+ step(E2, F2, G2, H2, A2, B2, C2, D2, M[ 2], M[13], DELTA[ 7], DELTA[ 6]);
+ step(E3, F3, G3, H3, A3, B3, C3, D3, M[11], M[ 4], DELTA[ 9], DELTA[ 8]);
+ step(E4, F4, G4, H4, A4, B4, C4, D4, M[ 3], M[10], DELTA[ 6], DELTA[ 7]);
+
+ step(D1, E1, F1, G1, H1, A1, B1, C1, M[10], M[11], DELTA[10], DELTA[11]);
+ step(D2, E2, F2, G2, H2, A2, B2, C2, M[ 0], M[ 5], DELTA[ 5], DELTA[ 4]);
+ step(D3, E3, F3, G3, H3, A3, B3, C3, M[15], M[ 8], DELTA[11], DELTA[10]);
+ step(D4, E4, F4, G4, H4, A4, B4, C4, M[ 9], M[ 2], DELTA[ 4], DELTA[ 5]);
+
+ step(C1, D1, E1, F1, G1, H1, A1, B1, M[12], M[13], DELTA[12], DELTA[13]);
+ step(C2, D2, E2, F2, G2, H2, A2, B2, M[ 6], M[ 7], DELTA[ 3], DELTA[ 2]);
+ step(C3, D3, E3, F3, G3, H3, A3, B3, M[ 5], M[ 0], DELTA[13], DELTA[12]);
+ step(C4, D4, E4, F4, G4, H4, A4, B4, M[ 7], M[14], DELTA[ 2], DELTA[ 3]);
+
+ step(B1, C1, D1, E1, F1, G1, H1, A1, M[14], M[15], DELTA[14], DELTA[15]);
+ step(B2, C2, D2, E2, F2, G2, H2, A2, M[12], M[ 1], DELTA[ 1], DELTA[ 0]);
+ step(B3, C3, D3, E3, F3, G3, H3, A3, M[ 1], M[ 3], DELTA[15], DELTA[14]);
+ step(B4, C4, D4, E4, F4, G4, H4, A4, M[ 4], M[ 6], DELTA[ 0], DELTA[ 1]);
+
+ digest[0] += (A1 + A2) ^ (A3 + A4);
+ digest[1] += (B1 + B2) ^ (B3 + B4);
+ digest[2] += (C1 + C2) ^ (C3 + C4);
+ digest[3] += (D1 + D2) ^ (D3 + D4);
+ digest[4] += (E1 + E2) ^ (E3 + E4);
+ digest[5] += (F1 + F2) ^ (F3 + F4);
+ digest[6] += (G1 + G2) ^ (G3 + G4);
+ digest[7] += (H1 + H2) ^ (H3 + H4);
+ }
}
/*************************************************
diff --git a/src/hash/fork256/fork256.h b/src/hash/fork256/fork256.h
index 8ad9c1f8d..d27e8693f 100644
--- a/src/hash/fork256/fork256.h
+++ b/src/hash/fork256/fork256.h
@@ -21,7 +21,7 @@ class BOTAN_DLL FORK_256 : public MDx_HashFunction
HashFunction* clone() const { return new FORK_256; }
FORK_256() : MDx_HashFunction(32, 64, true, true) { clear(); }
private:
- void hash(const byte[]);
+ void compress_n(const byte[], u32bit blocks);
void copy_out(byte[]);
SecureBuffer<u32bit, 8> digest;