diff options
author | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2008-11-23 02:03:16 +0000 |
commit | f4ad8ecfa14b59dbf06a9595f8dacccb3b7f04e3 (patch) | |
tree | 76077c951bb44373ad7c2ca632f2e24cf758e88f /src/hash/sha2/sha2_64.cpp | |
parent | 8ec4a1fe7e8af6f8223908cdf8fe7de39170fc2a (diff) |
I had not anticipated this being really worthwhile, but it turns out
to have been so! Change MDx_HashFunction::hash to a new compress_n
which hashes an arbitrary number of blocks. I had a thought this might
reduce a bit of loop overhead but the results were far better than I
anticipated. Speedup across the board of about 2%, and very
noticable (+10%) increases for MD4 and Tiger (probably b/c both
of those have so few instructions in each iteration of the
compression function).
Before:
SHA-1:
amd64: 211.9 MiB/s
core: 210.0 MiB/s
sse2: 295.2 MiB/s
MD4: 476.2 MiB/s
MD5: 355.2 MiB/s
SHA-256: 99.8 MiB/s
SHA-512: 151.4 MiB/s
RIPEMD-128: 326.9 MiB/s
RIPEMD-160: 225.1 MiB/s
Tiger: 214.8 MiB/s
Whirlpool: 38.4 MiB/s
After:
SHA-1:
amd64: 215.6 MiB/s
core: 213.8 MiB/s
sse2: 299.9 MiB/s
MD4: 528.4 MiB/s
MD5: 368.8 MiB/s
SHA-256: 103.9 MiB/s
SHA-512: 156.8 MiB/s
RIPEMD-128: 334.8 MiB/s
RIPEMD-160: 229.7 MiB/s
Tiger: 240.7 MiB/s
Whirlpool: 38.6 MiB/s
Diffstat (limited to 'src/hash/sha2/sha2_64.cpp')
-rw-r--r-- | src/hash/sha2/sha2_64.cpp | 196 |
1 files changed, 100 insertions, 96 deletions
diff --git a/src/hash/sha2/sha2_64.cpp b/src/hash/sha2/sha2_64.cpp index 809111d5e..73333af79 100644 --- a/src/hash/sha2/sha2_64.cpp +++ b/src/hash/sha2/sha2_64.cpp @@ -45,103 +45,107 @@ inline u64bit sigma(u64bit X, u32bit rot1, u32bit rot2, u32bit shift) /************************************************* * SHA-{384,512} Compression Function * *************************************************/ -void SHA_384_512_BASE::hash(const byte input[]) +void SHA_384_512_BASE::compress_n(const byte input[], u32bit blocks) { - for(u32bit j = 0; j != 16; ++j) - W[j] = load_be<u64bit>(input, j); - - for(u32bit j = 16; j != 80; ++j) - W[j] = sigma(W[j- 2], 19, 61, 6) + W[j- 7] + - sigma(W[j-15], 1, 8, 7) + W[j-16]; - - u64bit A = digest[0], B = digest[1], C = digest[2], - D = digest[3], E = digest[4], F = digest[5], - G = digest[6], H = digest[7]; - - F1(A,B,C,D,E,F,G,H,W[ 0],0x428A2F98D728AE22); - F1(H,A,B,C,D,E,F,G,W[ 1],0x7137449123EF65CD); - F1(G,H,A,B,C,D,E,F,W[ 2],0xB5C0FBCFEC4D3B2F); - F1(F,G,H,A,B,C,D,E,W[ 3],0xE9B5DBA58189DBBC); - F1(E,F,G,H,A,B,C,D,W[ 4],0x3956C25BF348B538); - F1(D,E,F,G,H,A,B,C,W[ 5],0x59F111F1B605D019); - F1(C,D,E,F,G,H,A,B,W[ 6],0x923F82A4AF194F9B); - F1(B,C,D,E,F,G,H,A,W[ 7],0xAB1C5ED5DA6D8118); - F1(A,B,C,D,E,F,G,H,W[ 8],0xD807AA98A3030242); - F1(H,A,B,C,D,E,F,G,W[ 9],0x12835B0145706FBE); - F1(G,H,A,B,C,D,E,F,W[10],0x243185BE4EE4B28C); - F1(F,G,H,A,B,C,D,E,W[11],0x550C7DC3D5FFB4E2); - F1(E,F,G,H,A,B,C,D,W[12],0x72BE5D74F27B896F); - F1(D,E,F,G,H,A,B,C,W[13],0x80DEB1FE3B1696B1); - F1(C,D,E,F,G,H,A,B,W[14],0x9BDC06A725C71235); - F1(B,C,D,E,F,G,H,A,W[15],0xC19BF174CF692694); - F1(A,B,C,D,E,F,G,H,W[16],0xE49B69C19EF14AD2); - F1(H,A,B,C,D,E,F,G,W[17],0xEFBE4786384F25E3); - F1(G,H,A,B,C,D,E,F,W[18],0x0FC19DC68B8CD5B5); - F1(F,G,H,A,B,C,D,E,W[19],0x240CA1CC77AC9C65); - F1(E,F,G,H,A,B,C,D,W[20],0x2DE92C6F592B0275); - F1(D,E,F,G,H,A,B,C,W[21],0x4A7484AA6EA6E483); - F1(C,D,E,F,G,H,A,B,W[22],0x5CB0A9DCBD41FBD4); - F1(B,C,D,E,F,G,H,A,W[23],0x76F988DA831153B5); - F1(A,B,C,D,E,F,G,H,W[24],0x983E5152EE66DFAB); - F1(H,A,B,C,D,E,F,G,W[25],0xA831C66D2DB43210); - F1(G,H,A,B,C,D,E,F,W[26],0xB00327C898FB213F); - F1(F,G,H,A,B,C,D,E,W[27],0xBF597FC7BEEF0EE4); - F1(E,F,G,H,A,B,C,D,W[28],0xC6E00BF33DA88FC2); - F1(D,E,F,G,H,A,B,C,W[29],0xD5A79147930AA725); - F1(C,D,E,F,G,H,A,B,W[30],0x06CA6351E003826F); - F1(B,C,D,E,F,G,H,A,W[31],0x142929670A0E6E70); - F1(A,B,C,D,E,F,G,H,W[32],0x27B70A8546D22FFC); - F1(H,A,B,C,D,E,F,G,W[33],0x2E1B21385C26C926); - F1(G,H,A,B,C,D,E,F,W[34],0x4D2C6DFC5AC42AED); - F1(F,G,H,A,B,C,D,E,W[35],0x53380D139D95B3DF); - F1(E,F,G,H,A,B,C,D,W[36],0x650A73548BAF63DE); - F1(D,E,F,G,H,A,B,C,W[37],0x766A0ABB3C77B2A8); - F1(C,D,E,F,G,H,A,B,W[38],0x81C2C92E47EDAEE6); - F1(B,C,D,E,F,G,H,A,W[39],0x92722C851482353B); - F1(A,B,C,D,E,F,G,H,W[40],0xA2BFE8A14CF10364); - F1(H,A,B,C,D,E,F,G,W[41],0xA81A664BBC423001); - F1(G,H,A,B,C,D,E,F,W[42],0xC24B8B70D0F89791); - F1(F,G,H,A,B,C,D,E,W[43],0xC76C51A30654BE30); - F1(E,F,G,H,A,B,C,D,W[44],0xD192E819D6EF5218); - F1(D,E,F,G,H,A,B,C,W[45],0xD69906245565A910); - F1(C,D,E,F,G,H,A,B,W[46],0xF40E35855771202A); - F1(B,C,D,E,F,G,H,A,W[47],0x106AA07032BBD1B8); - F1(A,B,C,D,E,F,G,H,W[48],0x19A4C116B8D2D0C8); - F1(H,A,B,C,D,E,F,G,W[49],0x1E376C085141AB53); - F1(G,H,A,B,C,D,E,F,W[50],0x2748774CDF8EEB99); - F1(F,G,H,A,B,C,D,E,W[51],0x34B0BCB5E19B48A8); - F1(E,F,G,H,A,B,C,D,W[52],0x391C0CB3C5C95A63); - F1(D,E,F,G,H,A,B,C,W[53],0x4ED8AA4AE3418ACB); - F1(C,D,E,F,G,H,A,B,W[54],0x5B9CCA4F7763E373); - F1(B,C,D,E,F,G,H,A,W[55],0x682E6FF3D6B2B8A3); - F1(A,B,C,D,E,F,G,H,W[56],0x748F82EE5DEFB2FC); - F1(H,A,B,C,D,E,F,G,W[57],0x78A5636F43172F60); - F1(G,H,A,B,C,D,E,F,W[58],0x84C87814A1F0AB72); - F1(F,G,H,A,B,C,D,E,W[59],0x8CC702081A6439EC); - F1(E,F,G,H,A,B,C,D,W[60],0x90BEFFFA23631E28); - F1(D,E,F,G,H,A,B,C,W[61],0xA4506CEBDE82BDE9); - F1(C,D,E,F,G,H,A,B,W[62],0xBEF9A3F7B2C67915); - F1(B,C,D,E,F,G,H,A,W[63],0xC67178F2E372532B); - F1(A,B,C,D,E,F,G,H,W[64],0xCA273ECEEA26619C); - F1(H,A,B,C,D,E,F,G,W[65],0xD186B8C721C0C207); - F1(G,H,A,B,C,D,E,F,W[66],0xEADA7DD6CDE0EB1E); - F1(F,G,H,A,B,C,D,E,W[67],0xF57D4F7FEE6ED178); - F1(E,F,G,H,A,B,C,D,W[68],0x06F067AA72176FBA); - F1(D,E,F,G,H,A,B,C,W[69],0x0A637DC5A2C898A6); - F1(C,D,E,F,G,H,A,B,W[70],0x113F9804BEF90DAE); - F1(B,C,D,E,F,G,H,A,W[71],0x1B710B35131C471B); - F1(A,B,C,D,E,F,G,H,W[72],0x28DB77F523047D84); - F1(H,A,B,C,D,E,F,G,W[73],0x32CAAB7B40C72493); - F1(G,H,A,B,C,D,E,F,W[74],0x3C9EBE0A15C9BEBC); - F1(F,G,H,A,B,C,D,E,W[75],0x431D67C49C100D4C); - F1(E,F,G,H,A,B,C,D,W[76],0x4CC5D4BECB3E42B6); - F1(D,E,F,G,H,A,B,C,W[77],0x597F299CFC657E2A); - F1(C,D,E,F,G,H,A,B,W[78],0x5FCB6FAB3AD6FAEC); - F1(B,C,D,E,F,G,H,A,W[79],0x6C44198C4A475817); - - digest[0] += A; digest[1] += B; digest[2] += C; - digest[3] += D; digest[4] += E; digest[5] += F; - digest[6] += G; digest[7] += H; + for(u32bit i = 0; i != blocks; ++i) + { + for(u32bit j = 0; j != 16; ++j) + W[j] = load_be<u64bit>(input, j); + input += HASH_BLOCK_SIZE; + + for(u32bit j = 16; j != 80; ++j) + W[j] = sigma(W[j- 2], 19, 61, 6) + W[j- 7] + + sigma(W[j-15], 1, 8, 7) + W[j-16]; + + u64bit A = digest[0], B = digest[1], C = digest[2], + D = digest[3], E = digest[4], F = digest[5], + G = digest[6], H = digest[7]; + + F1(A,B,C,D,E,F,G,H,W[ 0],0x428A2F98D728AE22); + F1(H,A,B,C,D,E,F,G,W[ 1],0x7137449123EF65CD); + F1(G,H,A,B,C,D,E,F,W[ 2],0xB5C0FBCFEC4D3B2F); + F1(F,G,H,A,B,C,D,E,W[ 3],0xE9B5DBA58189DBBC); + F1(E,F,G,H,A,B,C,D,W[ 4],0x3956C25BF348B538); + F1(D,E,F,G,H,A,B,C,W[ 5],0x59F111F1B605D019); + F1(C,D,E,F,G,H,A,B,W[ 6],0x923F82A4AF194F9B); + F1(B,C,D,E,F,G,H,A,W[ 7],0xAB1C5ED5DA6D8118); + F1(A,B,C,D,E,F,G,H,W[ 8],0xD807AA98A3030242); + F1(H,A,B,C,D,E,F,G,W[ 9],0x12835B0145706FBE); + F1(G,H,A,B,C,D,E,F,W[10],0x243185BE4EE4B28C); + F1(F,G,H,A,B,C,D,E,W[11],0x550C7DC3D5FFB4E2); + F1(E,F,G,H,A,B,C,D,W[12],0x72BE5D74F27B896F); + F1(D,E,F,G,H,A,B,C,W[13],0x80DEB1FE3B1696B1); + F1(C,D,E,F,G,H,A,B,W[14],0x9BDC06A725C71235); + F1(B,C,D,E,F,G,H,A,W[15],0xC19BF174CF692694); + F1(A,B,C,D,E,F,G,H,W[16],0xE49B69C19EF14AD2); + F1(H,A,B,C,D,E,F,G,W[17],0xEFBE4786384F25E3); + F1(G,H,A,B,C,D,E,F,W[18],0x0FC19DC68B8CD5B5); + F1(F,G,H,A,B,C,D,E,W[19],0x240CA1CC77AC9C65); + F1(E,F,G,H,A,B,C,D,W[20],0x2DE92C6F592B0275); + F1(D,E,F,G,H,A,B,C,W[21],0x4A7484AA6EA6E483); + F1(C,D,E,F,G,H,A,B,W[22],0x5CB0A9DCBD41FBD4); + F1(B,C,D,E,F,G,H,A,W[23],0x76F988DA831153B5); + F1(A,B,C,D,E,F,G,H,W[24],0x983E5152EE66DFAB); + F1(H,A,B,C,D,E,F,G,W[25],0xA831C66D2DB43210); + F1(G,H,A,B,C,D,E,F,W[26],0xB00327C898FB213F); + F1(F,G,H,A,B,C,D,E,W[27],0xBF597FC7BEEF0EE4); + F1(E,F,G,H,A,B,C,D,W[28],0xC6E00BF33DA88FC2); + F1(D,E,F,G,H,A,B,C,W[29],0xD5A79147930AA725); + F1(C,D,E,F,G,H,A,B,W[30],0x06CA6351E003826F); + F1(B,C,D,E,F,G,H,A,W[31],0x142929670A0E6E70); + F1(A,B,C,D,E,F,G,H,W[32],0x27B70A8546D22FFC); + F1(H,A,B,C,D,E,F,G,W[33],0x2E1B21385C26C926); + F1(G,H,A,B,C,D,E,F,W[34],0x4D2C6DFC5AC42AED); + F1(F,G,H,A,B,C,D,E,W[35],0x53380D139D95B3DF); + F1(E,F,G,H,A,B,C,D,W[36],0x650A73548BAF63DE); + F1(D,E,F,G,H,A,B,C,W[37],0x766A0ABB3C77B2A8); + F1(C,D,E,F,G,H,A,B,W[38],0x81C2C92E47EDAEE6); + F1(B,C,D,E,F,G,H,A,W[39],0x92722C851482353B); + F1(A,B,C,D,E,F,G,H,W[40],0xA2BFE8A14CF10364); + F1(H,A,B,C,D,E,F,G,W[41],0xA81A664BBC423001); + F1(G,H,A,B,C,D,E,F,W[42],0xC24B8B70D0F89791); + F1(F,G,H,A,B,C,D,E,W[43],0xC76C51A30654BE30); + F1(E,F,G,H,A,B,C,D,W[44],0xD192E819D6EF5218); + F1(D,E,F,G,H,A,B,C,W[45],0xD69906245565A910); + F1(C,D,E,F,G,H,A,B,W[46],0xF40E35855771202A); + F1(B,C,D,E,F,G,H,A,W[47],0x106AA07032BBD1B8); + F1(A,B,C,D,E,F,G,H,W[48],0x19A4C116B8D2D0C8); + F1(H,A,B,C,D,E,F,G,W[49],0x1E376C085141AB53); + F1(G,H,A,B,C,D,E,F,W[50],0x2748774CDF8EEB99); + F1(F,G,H,A,B,C,D,E,W[51],0x34B0BCB5E19B48A8); + F1(E,F,G,H,A,B,C,D,W[52],0x391C0CB3C5C95A63); + F1(D,E,F,G,H,A,B,C,W[53],0x4ED8AA4AE3418ACB); + F1(C,D,E,F,G,H,A,B,W[54],0x5B9CCA4F7763E373); + F1(B,C,D,E,F,G,H,A,W[55],0x682E6FF3D6B2B8A3); + F1(A,B,C,D,E,F,G,H,W[56],0x748F82EE5DEFB2FC); + F1(H,A,B,C,D,E,F,G,W[57],0x78A5636F43172F60); + F1(G,H,A,B,C,D,E,F,W[58],0x84C87814A1F0AB72); + F1(F,G,H,A,B,C,D,E,W[59],0x8CC702081A6439EC); + F1(E,F,G,H,A,B,C,D,W[60],0x90BEFFFA23631E28); + F1(D,E,F,G,H,A,B,C,W[61],0xA4506CEBDE82BDE9); + F1(C,D,E,F,G,H,A,B,W[62],0xBEF9A3F7B2C67915); + F1(B,C,D,E,F,G,H,A,W[63],0xC67178F2E372532B); + F1(A,B,C,D,E,F,G,H,W[64],0xCA273ECEEA26619C); + F1(H,A,B,C,D,E,F,G,W[65],0xD186B8C721C0C207); + F1(G,H,A,B,C,D,E,F,W[66],0xEADA7DD6CDE0EB1E); + F1(F,G,H,A,B,C,D,E,W[67],0xF57D4F7FEE6ED178); + F1(E,F,G,H,A,B,C,D,W[68],0x06F067AA72176FBA); + F1(D,E,F,G,H,A,B,C,W[69],0x0A637DC5A2C898A6); + F1(C,D,E,F,G,H,A,B,W[70],0x113F9804BEF90DAE); + F1(B,C,D,E,F,G,H,A,W[71],0x1B710B35131C471B); + F1(A,B,C,D,E,F,G,H,W[72],0x28DB77F523047D84); + F1(H,A,B,C,D,E,F,G,W[73],0x32CAAB7B40C72493); + F1(G,H,A,B,C,D,E,F,W[74],0x3C9EBE0A15C9BEBC); + F1(F,G,H,A,B,C,D,E,W[75],0x431D67C49C100D4C); + F1(E,F,G,H,A,B,C,D,W[76],0x4CC5D4BECB3E42B6); + F1(D,E,F,G,H,A,B,C,W[77],0x597F299CFC657E2A); + F1(C,D,E,F,G,H,A,B,W[78],0x5FCB6FAB3AD6FAEC); + F1(B,C,D,E,F,G,H,A,W[79],0x6C44198C4A475817); + + digest[0] += A; digest[1] += B; digest[2] += C; + digest[3] += D; digest[4] += E; digest[5] += F; + digest[6] += G; digest[7] += H; + } } /************************************************* |