aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-03-09 19:10:57 +0000
committerlloyd <[email protected]>2008-03-09 19:10:57 +0000
commit78788d72f74ab1e6f0f3f37d4aa60a6df3d26bc8 (patch)
treee63ddbbc68d0c9ea3ec3f11b2b80c693ffc38bab
parentf2bd1be860136bebd63f487b996f77f148c0aae3 (diff)
Unroll the loops in SHA_160::hash that unpack the input and perform the
expansion. While I would prefer to have the compiler to this, using GCC 4.1.2 it is 4% faster on a Core2 Q6600 with the loops partially unrolled.
-rw-r--r--src/sha160.cpp19
1 files changed, 15 insertions, 4 deletions
diff --git a/src/sha160.cpp b/src/sha160.cpp
index 359d0a790..5d9afb684 100644
--- a/src/sha160.cpp
+++ b/src/sha160.cpp
@@ -54,10 +54,21 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, u32bit msg)
*************************************************/
void SHA_160::hash(const byte input[])
{
- for(u32bit j = 0; j != 16; ++j)
- W[j] = load_be<u32bit>(input, j);
- for(u32bit j = 16; j != 80; ++j)
- W[j] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1);
+ for(u32bit j = 0; j != 16; j += 4)
+ {
+ W[j ] = load_be<u32bit>(input, j);
+ W[j+1] = load_be<u32bit>(input, j+1);
+ W[j+2] = load_be<u32bit>(input, j+2);
+ W[j+3] = load_be<u32bit>(input, j+3);
+ }
+
+ for(u32bit j = 16; j != 80; j += 4)
+ {
+ W[j ] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1);
+ W[j+1] = rotate_left((W[j-2] ^ W[j-7] ^ W[j-13] ^ W[j-15]), 1);
+ W[j+2] = rotate_left((W[j-1] ^ W[j-6] ^ W[j-12] ^ W[j-14]), 1);
+ W[j+3] = rotate_left((W[j ] ^ W[j-5] ^ W[j-11] ^ W[j-13]), 1);
+ }
u32bit A = digest[0], B = digest[1], C = digest[2],
D = digest[3], E = digest[4];