aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlloyd <[email protected]>2009-10-29 07:52:39 +0000
committerlloyd <[email protected]>2009-10-29 07:52:39 +0000
commit073f9ebdb1463cac650ddf77ae1c034d79bbb166 (patch)
tree0bf5de8aa5809432cc8ab737bf09a21dd61527c2
parentf14c48a80cdbd2d134bf706c631018bf683e44d9 (diff)
Unroll the expansion loop in both SHA-2 implementations by 8. On a Core2,
SHA-256 gets ~7% faster, SHA-512 ~10%.
-rw-r--r--src/hash/sha2/sha2_32.cpp28
-rw-r--r--src/hash/sha2/sha2_64.cpp14
2 files changed, 29 insertions, 13 deletions
diff --git a/src/hash/sha2/sha2_32.cpp b/src/hash/sha2/sha2_32.cpp
index 2a63eef25..89efab123 100644
--- a/src/hash/sha2/sha2_32.cpp
+++ b/src/hash/sha2/sha2_32.cpp
@@ -60,9 +60,17 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks)
W[j] = load_be<u32bit>(input, j);
input += HASH_BLOCK_SIZE;
- for(u32bit j = 16; j != 64; ++j)
- W[j] = sigma(W[j- 2], 17, 19, 10) + W[j- 7] +
- sigma(W[j-15], 7, 18, 3) + W[j-16];
+ for(u32bit j = 16; j != 64; j += 8)
+ {
+ W[j ] = sigma(W[j-2], 17, 19, 10) + W[j-7] + sigma(W[j-15], 7, 18, 3) + W[j-16];
+ W[j+1] = sigma(W[j-1], 17, 19, 10) + W[j-6] + sigma(W[j-14], 7, 18, 3) + W[j-15];
+ W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] + sigma(W[j-13], 7, 18, 3) + W[j-14];
+ W[j+3] = sigma(W[j+1], 17, 19, 10) + W[j-4] + sigma(W[j-12], 7, 18, 3) + W[j-13];
+ W[j+4] = sigma(W[j+2], 17, 19, 10) + W[j-3] + sigma(W[j-11], 7, 18, 3) + W[j-12];
+ W[j+5] = sigma(W[j+3], 17, 19, 10) + W[j-2] + sigma(W[j-10], 7, 18, 3) + W[j-11];
+ W[j+6] = sigma(W[j+4], 17, 19, 10) + W[j-1] + sigma(W[j- 9], 7, 18, 3) + W[j-10];
+ W[j+7] = sigma(W[j+5], 17, 19, 10) + W[j ] + sigma(W[j- 8], 7, 18, 3) + W[j- 9];
+ }
F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98);
F1(H, A, B, C, D, E, F, G, W[ 1], 0x71374491);
@@ -164,14 +172,14 @@ void SHA_224_256_BASE::clear()
void SHA_224::clear()
{
SHA_224_256_BASE::clear();
- digest[0] = 0xc1059ed8;
- digest[1] = 0x367cd507;
- digest[2] = 0x3070dd17;
- digest[3] = 0xf70e5939;
- digest[4] = 0xffc00b31;
+ digest[0] = 0xC1059ED8;
+ digest[1] = 0x367CD507;
+ digest[2] = 0x3070DD17;
+ digest[3] = 0xF70E5939;
+ digest[4] = 0xFFC00B31;
digest[5] = 0x68581511;
- digest[6] = 0x64f98fa7;
- digest[7] = 0xbefa4fa4;
+ digest[6] = 0x64F98FA7;
+ digest[7] = 0xBEFA4FA4;
}
/*
diff --git a/src/hash/sha2/sha2_64.cpp b/src/hash/sha2/sha2_64.cpp
index 3c771eb44..e260d8338 100644
--- a/src/hash/sha2/sha2_64.cpp
+++ b/src/hash/sha2/sha2_64.cpp
@@ -59,9 +59,17 @@ void SHA_384_512_BASE::compress_n(const byte input[], u32bit blocks)
W[j] = load_be<u64bit>(input, j);
input += HASH_BLOCK_SIZE;
- for(u32bit j = 16; j != 80; ++j)
- W[j] = sigma(W[j- 2], 19, 61, 6) + W[j- 7] +
- sigma(W[j-15], 1, 8, 7) + W[j-16];
+ for(u32bit j = 16; j != 80; j += 8)
+ {
+ W[j ] = sigma(W[j-2], 19, 61, 6) + W[j-7] + sigma(W[j-15], 1, 8, 7) + W[j-16];
+ W[j+1] = sigma(W[j-1], 19, 61, 6) + W[j-6] + sigma(W[j-14], 1, 8, 7) + W[j-15];
+ W[j+2] = sigma(W[j ], 19, 61, 6) + W[j-5] + sigma(W[j-13], 1, 8, 7) + W[j-14];
+ W[j+3] = sigma(W[j+1], 19, 61, 6) + W[j-4] + sigma(W[j-12], 1, 8, 7) + W[j-13];
+ W[j+4] = sigma(W[j+2], 19, 61, 6) + W[j-3] + sigma(W[j-11], 1, 8, 7) + W[j-12];
+ W[j+5] = sigma(W[j+3], 19, 61, 6) + W[j-2] + sigma(W[j-10], 1, 8, 7) + W[j-11];
+ W[j+6] = sigma(W[j+4], 19, 61, 6) + W[j-1] + sigma(W[j- 9], 1, 8, 7) + W[j-10];
+ W[j+7] = sigma(W[j+5], 19, 61, 6) + W[j ] + sigma(W[j- 8], 1, 8, 7) + W[j- 9];
+ }
F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98D728AE22);
F1(H, A, B, C, D, E, F, G, W[ 1], 0x7137449123EF65CD);