diff options
author | lloyd <[email protected]> | 2009-10-29 07:52:39 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-10-29 07:52:39 +0000 |
commit | 073f9ebdb1463cac650ddf77ae1c034d79bbb166 (patch) | |
tree | 0bf5de8aa5809432cc8ab737bf09a21dd61527c2 /src/hash/sha2 | |
parent | f14c48a80cdbd2d134bf706c631018bf683e44d9 (diff) |
Unroll the expansion loop in both SHA-2 implementations by 8. On a Core2,
SHA-256 gets ~7% faster, SHA-512 ~10%.
Diffstat (limited to 'src/hash/sha2')
-rw-r--r-- | src/hash/sha2/sha2_32.cpp | 28 | ||||
-rw-r--r-- | src/hash/sha2/sha2_64.cpp | 14 |
2 files changed, 29 insertions, 13 deletions
diff --git a/src/hash/sha2/sha2_32.cpp b/src/hash/sha2/sha2_32.cpp index 2a63eef25..89efab123 100644 --- a/src/hash/sha2/sha2_32.cpp +++ b/src/hash/sha2/sha2_32.cpp @@ -60,9 +60,17 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks) W[j] = load_be<u32bit>(input, j); input += HASH_BLOCK_SIZE; - for(u32bit j = 16; j != 64; ++j) - W[j] = sigma(W[j- 2], 17, 19, 10) + W[j- 7] + - sigma(W[j-15], 7, 18, 3) + W[j-16]; + for(u32bit j = 16; j != 64; j += 8) + { + W[j ] = sigma(W[j-2], 17, 19, 10) + W[j-7] + sigma(W[j-15], 7, 18, 3) + W[j-16]; + W[j+1] = sigma(W[j-1], 17, 19, 10) + W[j-6] + sigma(W[j-14], 7, 18, 3) + W[j-15]; + W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] + sigma(W[j-13], 7, 18, 3) + W[j-14]; + W[j+3] = sigma(W[j+1], 17, 19, 10) + W[j-4] + sigma(W[j-12], 7, 18, 3) + W[j-13]; + W[j+4] = sigma(W[j+2], 17, 19, 10) + W[j-3] + sigma(W[j-11], 7, 18, 3) + W[j-12]; + W[j+5] = sigma(W[j+3], 17, 19, 10) + W[j-2] + sigma(W[j-10], 7, 18, 3) + W[j-11]; + W[j+6] = sigma(W[j+4], 17, 19, 10) + W[j-1] + sigma(W[j- 9], 7, 18, 3) + W[j-10]; + W[j+7] = sigma(W[j+5], 17, 19, 10) + W[j ] + sigma(W[j- 8], 7, 18, 3) + W[j- 9]; + } F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98); F1(H, A, B, C, D, E, F, G, W[ 1], 0x71374491); @@ -164,14 +172,14 @@ void SHA_224_256_BASE::clear() void SHA_224::clear() { SHA_224_256_BASE::clear(); - digest[0] = 0xc1059ed8; - digest[1] = 0x367cd507; - digest[2] = 0x3070dd17; - digest[3] = 0xf70e5939; - digest[4] = 0xffc00b31; + digest[0] = 0xC1059ED8; + digest[1] = 0x367CD507; + digest[2] = 0x3070DD17; + digest[3] = 0xF70E5939; + digest[4] = 0xFFC00B31; digest[5] = 0x68581511; - digest[6] = 0x64f98fa7; - digest[7] = 0xbefa4fa4; + digest[6] = 0x64F98FA7; + digest[7] = 0xBEFA4FA4; } /* diff --git a/src/hash/sha2/sha2_64.cpp b/src/hash/sha2/sha2_64.cpp index 3c771eb44..e260d8338 100644 --- a/src/hash/sha2/sha2_64.cpp +++ b/src/hash/sha2/sha2_64.cpp @@ -59,9 +59,17 @@ void SHA_384_512_BASE::compress_n(const byte input[], u32bit blocks) W[j] = load_be<u64bit>(input, j); input += HASH_BLOCK_SIZE; - for(u32bit j = 16; j != 80; ++j) - W[j] = sigma(W[j- 2], 19, 61, 6) + W[j- 7] + - sigma(W[j-15], 1, 8, 7) + W[j-16]; + for(u32bit j = 16; j != 80; j += 8) + { + W[j ] = sigma(W[j-2], 19, 61, 6) + W[j-7] + sigma(W[j-15], 1, 8, 7) + W[j-16]; + W[j+1] = sigma(W[j-1], 19, 61, 6) + W[j-6] + sigma(W[j-14], 1, 8, 7) + W[j-15]; + W[j+2] = sigma(W[j ], 19, 61, 6) + W[j-5] + sigma(W[j-13], 1, 8, 7) + W[j-14]; + W[j+3] = sigma(W[j+1], 19, 61, 6) + W[j-4] + sigma(W[j-12], 1, 8, 7) + W[j-13]; + W[j+4] = sigma(W[j+2], 19, 61, 6) + W[j-3] + sigma(W[j-11], 1, 8, 7) + W[j-12]; + W[j+5] = sigma(W[j+3], 19, 61, 6) + W[j-2] + sigma(W[j-10], 1, 8, 7) + W[j-11]; + W[j+6] = sigma(W[j+4], 19, 61, 6) + W[j-1] + sigma(W[j- 9], 1, 8, 7) + W[j-10]; + W[j+7] = sigma(W[j+5], 19, 61, 6) + W[j ] + sigma(W[j- 8], 1, 8, 7) + W[j- 9]; + } F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98D728AE22); F1(H, A, B, C, D, E, F, G, W[ 1], 0x7137449123EF65CD); |