diff options
author | lloyd <[email protected]> | 2008-03-09 19:10:57 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2008-03-09 19:10:57 +0000 |
commit | 78788d72f74ab1e6f0f3f37d4aa60a6df3d26bc8 (patch) | |
tree | e63ddbbc68d0c9ea3ec3f11b2b80c693ffc38bab /src | |
parent | f2bd1be860136bebd63f487b996f77f148c0aae3 (diff) |
Unroll the loops in SHA_160::hash that unpack the input and perform the
expansion. While I would prefer to have the compiler to this, using GCC 4.1.2
it is 4% faster on a Core2 Q6600 with the loops partially unrolled.
Diffstat (limited to 'src')
-rw-r--r-- | src/sha160.cpp | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/src/sha160.cpp b/src/sha160.cpp index 359d0a790..5d9afb684 100644 --- a/src/sha160.cpp +++ b/src/sha160.cpp @@ -54,10 +54,21 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, u32bit msg) *************************************************/ void SHA_160::hash(const byte input[]) { - for(u32bit j = 0; j != 16; ++j) - W[j] = load_be<u32bit>(input, j); - for(u32bit j = 16; j != 80; ++j) - W[j] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1); + for(u32bit j = 0; j != 16; j += 4) + { + W[j ] = load_be<u32bit>(input, j); + W[j+1] = load_be<u32bit>(input, j+1); + W[j+2] = load_be<u32bit>(input, j+2); + W[j+3] = load_be<u32bit>(input, j+3); + } + + for(u32bit j = 16; j != 80; j += 4) + { + W[j ] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1); + W[j+1] = rotate_left((W[j-2] ^ W[j-7] ^ W[j-13] ^ W[j-15]), 1); + W[j+2] = rotate_left((W[j-1] ^ W[j-6] ^ W[j-12] ^ W[j-14]), 1); + W[j+3] = rotate_left((W[j ] ^ W[j-5] ^ W[j-11] ^ W[j-13]), 1); + } u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3], E = digest[4]; |