aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2009-10-29 17:34:52 +0000
committerlloyd <[email protected]>2009-10-29 17:34:52 +0000
commit7462977b8e5eb95a81a6253dc6e6224334ad6ae9 (patch)
treed14b468f7a04b02635b11bda017cca56259275b5 /src
parent5553c5cf54563280a4ffc94baab7b94a83cb0000 (diff)
Add a new looping load_be / load_le for loading large arrays at once, and
change some of the hash functions to use it as low hanging fruit. Probably could use further optimization (just unrolls x4 currently), but merely having it as syntax is good as it allows optimizing many functions at once (eg using SSE2 to do 4-way byteswaps).
Diffstat (limited to 'src')
-rw-r--r--src/hash/bmw/bmw_512.cpp3
-rw-r--r--src/hash/has160/has160.cpp18
-rw-r--r--src/hash/md4/md4.cpp1
-rw-r--r--src/hash/md5/md5.cpp6
-rw-r--r--src/hash/rmd128/rmd128.cpp22
-rw-r--r--src/hash/rmd160/rmd160.cpp6
-rw-r--r--src/hash/sha1/sha160.cpp11
-rw-r--r--src/hash/sha2/sha2_32.cpp30
-rw-r--r--src/hash/skein/skein_512.cpp4
-rw-r--r--src/hash/tiger/tiger.cpp6
-rw-r--r--src/utils/loadstor.h46
11 files changed, 104 insertions, 49 deletions
diff --git a/src/hash/bmw/bmw_512.cpp b/src/hash/bmw/bmw_512.cpp
index ae80d5725..5ccb09579 100644
--- a/src/hash/bmw/bmw_512.cpp
+++ b/src/hash/bmw/bmw_512.cpp
@@ -143,8 +143,7 @@ void BMW_512::compress_n(const byte input[], u32bit blocks)
{
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_le<u64bit>(input, j);
+ load_le(M.begin(), input, M.size());
BMW_512_compress(H, M, Q);
diff --git a/src/hash/has160/has160.cpp b/src/hash/has160/has160.cpp
index 533efe595..ae45418ce 100644
--- a/src/hash/has160/has160.cpp
+++ b/src/hash/has160/has160.cpp
@@ -60,13 +60,12 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E,
*/
void HAS_160::compress_n(const byte input[], u32bit blocks)
{
+ u32bit A = digest[0], B = digest[1], C = digest[2],
+ D = digest[3], E = digest[4];
+
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- X[j] = load_le<u32bit>(input, j);
-
- u32bit A = digest[0], B = digest[1], C = digest[2],
- D = digest[3], E = digest[4];
+ load_le(X.begin(), input, 16);
X[16] = X[ 0] ^ X[ 1] ^ X[ 2] ^ X[ 3];
X[17] = X[ 4] ^ X[ 5] ^ X[ 6] ^ X[ 7];
@@ -128,8 +127,13 @@ void HAS_160::compress_n(const byte input[], u32bit blocks)
F4(E,A,B,C,D,X[11], 9); F4(D,E,A,B,C,X[ 6],14);
F4(C,D,E,A,B,X[ 1], 5); F4(B,C,D,E,A,X[12],13);
- digest[0] += A; digest[1] += B; digest[2] += C;
- digest[3] += D; digest[4] += E;
+ A = (digest[0] += A);
+ B = (digest[1] += B);
+ C = (digest[2] += C);
+ D = (digest[3] += D);
+ E = (digest[4] += E);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/md4/md4.cpp b/src/hash/md4/md4.cpp
index b2870066d..c50c73a8d 100644
--- a/src/hash/md4/md4.cpp
+++ b/src/hash/md4/md4.cpp
@@ -51,6 +51,7 @@ void MD4::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
+ //load_le(M.begin(), input, M.size());
for(u32bit j = 0; j != 16; ++j)
M[j] = load_le<u32bit>(input, j);
input += HASH_BLOCK_SIZE;
diff --git a/src/hash/md5/md5.cpp b/src/hash/md5/md5.cpp
index 163413bec..8c1e5a8e1 100644
--- a/src/hash/md5/md5.cpp
+++ b/src/hash/md5/md5.cpp
@@ -64,9 +64,7 @@ void MD5::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_le<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_le(M.begin(), input, M.size());
FF(A,B,C,D,M[ 0], 7,0xD76AA478); FF(D,A,B,C,M[ 1],12,0xE8C7B756);
FF(C,D,A,B,M[ 2],17,0x242070DB); FF(B,C,D,A,M[ 3],22,0xC1BDCEEE);
@@ -108,6 +106,8 @@ void MD5::compress_n(const byte input[], u32bit blocks)
B = (digest[1] += B);
C = (digest[2] += C);
D = (digest[3] += D);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/rmd128/rmd128.cpp b/src/hash/rmd128/rmd128.cpp
index 899e50914..7e9da3bdd 100644
--- a/src/hash/rmd128/rmd128.cpp
+++ b/src/hash/rmd128/rmd128.cpp
@@ -60,18 +60,16 @@ inline void F4(u32bit& A, u32bit B, u32bit C, u32bit D,
*/
void RIPEMD_128::compress_n(const byte input[], u32bit blocks)
{
+ const u32bit MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1,
+ MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0x50A28BE6,
+ MAGIC6 = 0x5C4DD124, MAGIC7 = 0x6D703EF3;
+
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_le<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_le(M.begin(), input, M.size());
u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1,
- C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1;
-
- const u32bit MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1,
- MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0x50A28BE6,
- MAGIC6 = 0x5C4DD124, MAGIC7 = 0x6D703EF3;
+ C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1;
F1(A1,B1,C1,D1,M[ 0],11 ); F4(A2,B2,C2,D2,M[ 5], 8,MAGIC5);
F1(D1,A1,B1,C1,M[ 1],14 ); F4(D2,A2,B2,C2,M[14], 9,MAGIC5);
@@ -141,9 +139,13 @@ void RIPEMD_128::compress_n(const byte input[], u32bit blocks)
F4(C1,D1,A1,B1,M[ 6], 5,MAGIC4); F1(C2,D2,A2,B2,M[10],15 );
F4(B1,C1,D1,A1,M[ 2],12,MAGIC4); F1(B2,C2,D2,A2,M[14], 8 );
- D2 = digest[1] + C1 + D2; digest[1] = digest[2] + D1 + A2;
- digest[2] = digest[3] + A1 + B2; digest[3] = digest[0] + B1 + C2;
+ D2 = digest[1] + C1 + D2;
+ digest[1] = digest[2] + D1 + A2;
+ digest[2] = digest[3] + A1 + B2;
+ digest[3] = digest[0] + B1 + C2;
digest[0] = D2;
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/rmd160/rmd160.cpp b/src/hash/rmd160/rmd160.cpp
index 2baf5ab08..5237f1e12 100644
--- a/src/hash/rmd160/rmd160.cpp
+++ b/src/hash/rmd160/rmd160.cpp
@@ -82,9 +82,7 @@ void RIPEMD_160::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_le<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_le(M.begin(), input, M.size());
u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1,
C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1,
@@ -181,6 +179,8 @@ void RIPEMD_160::compress_n(const byte input[], u32bit blocks)
digest[3] = digest[4] + A1 + B2;
digest[4] = digest[0] + B1 + C2;
digest[0] = C1;
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/sha1/sha160.cpp b/src/hash/sha1/sha160.cpp
index 88f2161e2..ff44593f6 100644
--- a/src/hash/sha1/sha160.cpp
+++ b/src/hash/sha1/sha160.cpp
@@ -61,14 +61,7 @@ void SHA_160::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; j += 4)
- {
- W[j ] = load_be<u32bit>(input, j);
- W[j+1] = load_be<u32bit>(input, j+1);
- W[j+2] = load_be<u32bit>(input, j+2);
- W[j+3] = load_be<u32bit>(input, j+3);
- }
- input += HASH_BLOCK_SIZE;
+ load_be(W.begin(), input, 16);
for(u32bit j = 16; j != 80; j += 8)
{
@@ -131,6 +124,8 @@ void SHA_160::compress_n(const byte input[], u32bit blocks)
C = (digest[2] += C);
D = (digest[3] += D);
E = (digest[4] += E);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/sha2/sha2_32.cpp b/src/hash/sha2/sha2_32.cpp
index 89efab123..7068dd63f 100644
--- a/src/hash/sha2/sha2_32.cpp
+++ b/src/hash/sha2/sha2_32.cpp
@@ -56,20 +56,26 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- W[j] = load_be<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_be(W.begin(), input, 16);
for(u32bit j = 16; j != 64; j += 8)
{
- W[j ] = sigma(W[j-2], 17, 19, 10) + W[j-7] + sigma(W[j-15], 7, 18, 3) + W[j-16];
- W[j+1] = sigma(W[j-1], 17, 19, 10) + W[j-6] + sigma(W[j-14], 7, 18, 3) + W[j-15];
- W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] + sigma(W[j-13], 7, 18, 3) + W[j-14];
- W[j+3] = sigma(W[j+1], 17, 19, 10) + W[j-4] + sigma(W[j-12], 7, 18, 3) + W[j-13];
- W[j+4] = sigma(W[j+2], 17, 19, 10) + W[j-3] + sigma(W[j-11], 7, 18, 3) + W[j-12];
- W[j+5] = sigma(W[j+3], 17, 19, 10) + W[j-2] + sigma(W[j-10], 7, 18, 3) + W[j-11];
- W[j+6] = sigma(W[j+4], 17, 19, 10) + W[j-1] + sigma(W[j- 9], 7, 18, 3) + W[j-10];
- W[j+7] = sigma(W[j+5], 17, 19, 10) + W[j ] + sigma(W[j- 8], 7, 18, 3) + W[j- 9];
+ W[j ] = sigma(W[j- 2], 17, 19, 10) + W[j-7] +
+ sigma(W[j-15], 7, 18, 3) + W[j-16];
+ W[j+1] = sigma(W[j- 1], 17, 19, 10) + W[j-6] +
+ sigma(W[j-14], 7, 18, 3) + W[j-15];
+ W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] +
+ sigma(W[j-13], 7, 18, 3) + W[j-14];
+ W[j+3] = sigma(W[j+ 1], 17, 19, 10) + W[j-4] +
+ sigma(W[j-12], 7, 18, 3) + W[j-13];
+ W[j+4] = sigma(W[j+ 2], 17, 19, 10) + W[j-3] +
+ sigma(W[j-11], 7, 18, 3) + W[j-12];
+ W[j+5] = sigma(W[j+ 3], 17, 19, 10) + W[j-2] +
+ sigma(W[j-10], 7, 18, 3) + W[j-11];
+ W[j+6] = sigma(W[j+ 4], 17, 19, 10) + W[j-1] +
+ sigma(W[j- 9], 7, 18, 3) + W[j-10];
+ W[j+7] = sigma(W[j+ 5], 17, 19, 10) + W[j ] +
+ sigma(W[j- 8], 7, 18, 3) + W[j- 9];
}
F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98);
@@ -145,6 +151,8 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks)
F = (digest[5] += F);
G = (digest[6] += G);
H = (digest[7] += H);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/skein/skein_512.cpp b/src/hash/skein/skein_512.cpp
index b24efd5f7..e1ca08c15 100644
--- a/src/hash/skein/skein_512.cpp
+++ b/src/hash/skein/skein_512.cpp
@@ -34,8 +34,8 @@ void ubi_512(u64bit H[9], u64bit T[], const byte msg[], u64bit msg_len)
T[0] += to_proc;
u64bit M[8] = { 0 };
- for(u32bit j = 0; j != to_proc / 8; ++j)
- M[j] = load_le<u64bit>(msg, j);
+
+ load_le(M, msg, to_proc / 8);
if(to_proc % 8)
{
diff --git a/src/hash/tiger/tiger.cpp b/src/hash/tiger/tiger.cpp
index 975ea9b6b..4f4d4dc83 100644
--- a/src/hash/tiger/tiger.cpp
+++ b/src/hash/tiger/tiger.cpp
@@ -21,9 +21,7 @@ void Tiger::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 8; ++j)
- X[j] = load_le<u64bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_le(X.begin(), input, X.size());
pass(A, B, C, X, 5); mix(X);
pass(C, A, B, X, 7); mix(X);
@@ -39,6 +37,8 @@ void Tiger::compress_n(const byte input[], u32bit blocks)
A = (digest[0] ^= A);
B = digest[1] = B - digest[1];
C = (digest[2] += C);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/utils/loadstor.h b/src/utils/loadstor.h
index 6f91c2fa5..8f430f36c 100644
--- a/src/utils/loadstor.h
+++ b/src/utils/loadstor.h
@@ -199,6 +199,29 @@ inline void load_le(const byte in[],
}
template<typename T>
+inline void load_le(T out[],
+ const byte in[],
+ u32bit count)
+ {
+ const u32bit blocks = count - (count % 4);
+ const u32bit left = count - blocks;
+
+ for(u32bit i = 0; i != blocks; i += 4)
+ {
+ out[0] = load_le<T>(in, 0);
+ out[1] = load_le<T>(in, 1);
+ out[2] = load_le<T>(in, 2);
+ out[3] = load_le<T>(in, 3);
+
+ out += 4;
+ in += 4*sizeof(T);
+ }
+
+ for(u32bit i = 0; i != left; ++i)
+ out[i] = load_le<T>(in, i);
+ }
+
+template<typename T>
inline void load_be(const byte in[], T& x0, T& x1)
{
x0 = load_be<T>(in, 0);
@@ -230,6 +253,29 @@ inline void load_be(const byte in[],
x7 = load_be<T>(in, 7);
}
+template<typename T>
+inline void load_be(T out[],
+ const byte in[],
+ u32bit count)
+ {
+ const u32bit blocks = count - (count % 4);
+ const u32bit left = count - blocks;
+
+ for(u32bit i = 0; i != blocks; i += 4)
+ {
+ out[0] = load_be<T>(in, 0);
+ out[1] = load_be<T>(in, 1);
+ out[2] = load_be<T>(in, 2);
+ out[3] = load_be<T>(in, 3);
+
+ out += 4;
+ in += 4*sizeof(T);
+ }
+
+ for(u32bit i = 0; i != left; ++i)
+ out[i] = load_be<T>(in, i);
+ }
+
/*
* Endian-Specific Word Storing Operations
*/