diff options
author | lloyd <[email protected]> | 2009-10-29 17:34:52 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2009-10-29 17:34:52 +0000 |
commit | 7462977b8e5eb95a81a6253dc6e6224334ad6ae9 (patch) | |
tree | d14b468f7a04b02635b11bda017cca56259275b5 /src/hash | |
parent | 5553c5cf54563280a4ffc94baab7b94a83cb0000 (diff) |
Add a new looping load_be / load_le for loading large arrays at once, and
change some of the hash functions to use it as low hanging fruit.
Probably could use further optimization (just unrolls x4 currently), but
merely having it as syntax is good as it allows optimizing many functions
at once (eg using SSE2 to do 4-way byteswaps).
Diffstat (limited to 'src/hash')
-rw-r--r-- | src/hash/bmw/bmw_512.cpp | 3 | ||||
-rw-r--r-- | src/hash/has160/has160.cpp | 18 | ||||
-rw-r--r-- | src/hash/md4/md4.cpp | 1 | ||||
-rw-r--r-- | src/hash/md5/md5.cpp | 6 | ||||
-rw-r--r-- | src/hash/rmd128/rmd128.cpp | 22 | ||||
-rw-r--r-- | src/hash/rmd160/rmd160.cpp | 6 | ||||
-rw-r--r-- | src/hash/sha1/sha160.cpp | 11 | ||||
-rw-r--r-- | src/hash/sha2/sha2_32.cpp | 30 | ||||
-rw-r--r-- | src/hash/skein/skein_512.cpp | 4 | ||||
-rw-r--r-- | src/hash/tiger/tiger.cpp | 6 |
10 files changed, 58 insertions, 49 deletions
diff --git a/src/hash/bmw/bmw_512.cpp b/src/hash/bmw/bmw_512.cpp index ae80d5725..5ccb09579 100644 --- a/src/hash/bmw/bmw_512.cpp +++ b/src/hash/bmw/bmw_512.cpp @@ -143,8 +143,7 @@ void BMW_512::compress_n(const byte input[], u32bit blocks) { for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - M[j] = load_le<u64bit>(input, j); + load_le(M.begin(), input, M.size()); BMW_512_compress(H, M, Q); diff --git a/src/hash/has160/has160.cpp b/src/hash/has160/has160.cpp index 533efe595..ae45418ce 100644 --- a/src/hash/has160/has160.cpp +++ b/src/hash/has160/has160.cpp @@ -60,13 +60,12 @@ inline void F4(u32bit A, u32bit& B, u32bit C, u32bit D, u32bit& E, */ void HAS_160::compress_n(const byte input[], u32bit blocks) { + u32bit A = digest[0], B = digest[1], C = digest[2], + D = digest[3], E = digest[4]; + for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - X[j] = load_le<u32bit>(input, j); - - u32bit A = digest[0], B = digest[1], C = digest[2], - D = digest[3], E = digest[4]; + load_le(X.begin(), input, 16); X[16] = X[ 0] ^ X[ 1] ^ X[ 2] ^ X[ 3]; X[17] = X[ 4] ^ X[ 5] ^ X[ 6] ^ X[ 7]; @@ -128,8 +127,13 @@ void HAS_160::compress_n(const byte input[], u32bit blocks) F4(E,A,B,C,D,X[11], 9); F4(D,E,A,B,C,X[ 6],14); F4(C,D,E,A,B,X[ 1], 5); F4(B,C,D,E,A,X[12],13); - digest[0] += A; digest[1] += B; digest[2] += C; - digest[3] += D; digest[4] += E; + A = (digest[0] += A); + B = (digest[1] += B); + C = (digest[2] += C); + D = (digest[3] += D); + E = (digest[4] += E); + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/md4/md4.cpp b/src/hash/md4/md4.cpp index b2870066d..c50c73a8d 100644 --- a/src/hash/md4/md4.cpp +++ b/src/hash/md4/md4.cpp @@ -51,6 +51,7 @@ void MD4::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { + //load_le(M.begin(), input, M.size()); for(u32bit j = 0; j != 16; ++j) M[j] = load_le<u32bit>(input, j); input += HASH_BLOCK_SIZE; diff --git a/src/hash/md5/md5.cpp b/src/hash/md5/md5.cpp index 163413bec..8c1e5a8e1 100644 --- a/src/hash/md5/md5.cpp +++ b/src/hash/md5/md5.cpp @@ -64,9 +64,7 @@ void MD5::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - M[j] = load_le<u32bit>(input, j); - input += HASH_BLOCK_SIZE; + load_le(M.begin(), input, M.size()); FF(A,B,C,D,M[ 0], 7,0xD76AA478); FF(D,A,B,C,M[ 1],12,0xE8C7B756); FF(C,D,A,B,M[ 2],17,0x242070DB); FF(B,C,D,A,M[ 3],22,0xC1BDCEEE); @@ -108,6 +106,8 @@ void MD5::compress_n(const byte input[], u32bit blocks) B = (digest[1] += B); C = (digest[2] += C); D = (digest[3] += D); + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/rmd128/rmd128.cpp b/src/hash/rmd128/rmd128.cpp index 899e50914..7e9da3bdd 100644 --- a/src/hash/rmd128/rmd128.cpp +++ b/src/hash/rmd128/rmd128.cpp @@ -60,18 +60,16 @@ inline void F4(u32bit& A, u32bit B, u32bit C, u32bit D, */ void RIPEMD_128::compress_n(const byte input[], u32bit blocks) { + const u32bit MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1, + MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0x50A28BE6, + MAGIC6 = 0x5C4DD124, MAGIC7 = 0x6D703EF3; + for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - M[j] = load_le<u32bit>(input, j); - input += HASH_BLOCK_SIZE; + load_le(M.begin(), input, M.size()); u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1, - C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1; - - const u32bit MAGIC2 = 0x5A827999, MAGIC3 = 0x6ED9EBA1, - MAGIC4 = 0x8F1BBCDC, MAGIC5 = 0x50A28BE6, - MAGIC6 = 0x5C4DD124, MAGIC7 = 0x6D703EF3; + C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1; F1(A1,B1,C1,D1,M[ 0],11 ); F4(A2,B2,C2,D2,M[ 5], 8,MAGIC5); F1(D1,A1,B1,C1,M[ 1],14 ); F4(D2,A2,B2,C2,M[14], 9,MAGIC5); @@ -141,9 +139,13 @@ void RIPEMD_128::compress_n(const byte input[], u32bit blocks) F4(C1,D1,A1,B1,M[ 6], 5,MAGIC4); F1(C2,D2,A2,B2,M[10],15 ); F4(B1,C1,D1,A1,M[ 2],12,MAGIC4); F1(B2,C2,D2,A2,M[14], 8 ); - D2 = digest[1] + C1 + D2; digest[1] = digest[2] + D1 + A2; - digest[2] = digest[3] + A1 + B2; digest[3] = digest[0] + B1 + C2; + D2 = digest[1] + C1 + D2; + digest[1] = digest[2] + D1 + A2; + digest[2] = digest[3] + A1 + B2; + digest[3] = digest[0] + B1 + C2; digest[0] = D2; + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/rmd160/rmd160.cpp b/src/hash/rmd160/rmd160.cpp index 2baf5ab08..5237f1e12 100644 --- a/src/hash/rmd160/rmd160.cpp +++ b/src/hash/rmd160/rmd160.cpp @@ -82,9 +82,7 @@ void RIPEMD_160::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - M[j] = load_le<u32bit>(input, j); - input += HASH_BLOCK_SIZE; + load_le(M.begin(), input, M.size()); u32bit A1 = digest[0], A2 = A1, B1 = digest[1], B2 = B1, C1 = digest[2], C2 = C1, D1 = digest[3], D2 = D1, @@ -181,6 +179,8 @@ void RIPEMD_160::compress_n(const byte input[], u32bit blocks) digest[3] = digest[4] + A1 + B2; digest[4] = digest[0] + B1 + C2; digest[0] = C1; + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/sha1/sha160.cpp b/src/hash/sha1/sha160.cpp index 88f2161e2..ff44593f6 100644 --- a/src/hash/sha1/sha160.cpp +++ b/src/hash/sha1/sha160.cpp @@ -61,14 +61,7 @@ void SHA_160::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; j += 4) - { - W[j ] = load_be<u32bit>(input, j); - W[j+1] = load_be<u32bit>(input, j+1); - W[j+2] = load_be<u32bit>(input, j+2); - W[j+3] = load_be<u32bit>(input, j+3); - } - input += HASH_BLOCK_SIZE; + load_be(W.begin(), input, 16); for(u32bit j = 16; j != 80; j += 8) { @@ -131,6 +124,8 @@ void SHA_160::compress_n(const byte input[], u32bit blocks) C = (digest[2] += C); D = (digest[3] += D); E = (digest[4] += E); + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/sha2/sha2_32.cpp b/src/hash/sha2/sha2_32.cpp index 89efab123..7068dd63f 100644 --- a/src/hash/sha2/sha2_32.cpp +++ b/src/hash/sha2/sha2_32.cpp @@ -56,20 +56,26 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 16; ++j) - W[j] = load_be<u32bit>(input, j); - input += HASH_BLOCK_SIZE; + load_be(W.begin(), input, 16); for(u32bit j = 16; j != 64; j += 8) { - W[j ] = sigma(W[j-2], 17, 19, 10) + W[j-7] + sigma(W[j-15], 7, 18, 3) + W[j-16]; - W[j+1] = sigma(W[j-1], 17, 19, 10) + W[j-6] + sigma(W[j-14], 7, 18, 3) + W[j-15]; - W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] + sigma(W[j-13], 7, 18, 3) + W[j-14]; - W[j+3] = sigma(W[j+1], 17, 19, 10) + W[j-4] + sigma(W[j-12], 7, 18, 3) + W[j-13]; - W[j+4] = sigma(W[j+2], 17, 19, 10) + W[j-3] + sigma(W[j-11], 7, 18, 3) + W[j-12]; - W[j+5] = sigma(W[j+3], 17, 19, 10) + W[j-2] + sigma(W[j-10], 7, 18, 3) + W[j-11]; - W[j+6] = sigma(W[j+4], 17, 19, 10) + W[j-1] + sigma(W[j- 9], 7, 18, 3) + W[j-10]; - W[j+7] = sigma(W[j+5], 17, 19, 10) + W[j ] + sigma(W[j- 8], 7, 18, 3) + W[j- 9]; + W[j ] = sigma(W[j- 2], 17, 19, 10) + W[j-7] + + sigma(W[j-15], 7, 18, 3) + W[j-16]; + W[j+1] = sigma(W[j- 1], 17, 19, 10) + W[j-6] + + sigma(W[j-14], 7, 18, 3) + W[j-15]; + W[j+2] = sigma(W[j ], 17, 19, 10) + W[j-5] + + sigma(W[j-13], 7, 18, 3) + W[j-14]; + W[j+3] = sigma(W[j+ 1], 17, 19, 10) + W[j-4] + + sigma(W[j-12], 7, 18, 3) + W[j-13]; + W[j+4] = sigma(W[j+ 2], 17, 19, 10) + W[j-3] + + sigma(W[j-11], 7, 18, 3) + W[j-12]; + W[j+5] = sigma(W[j+ 3], 17, 19, 10) + W[j-2] + + sigma(W[j-10], 7, 18, 3) + W[j-11]; + W[j+6] = sigma(W[j+ 4], 17, 19, 10) + W[j-1] + + sigma(W[j- 9], 7, 18, 3) + W[j-10]; + W[j+7] = sigma(W[j+ 5], 17, 19, 10) + W[j ] + + sigma(W[j- 8], 7, 18, 3) + W[j- 9]; } F1(A, B, C, D, E, F, G, H, W[ 0], 0x428A2F98); @@ -145,6 +151,8 @@ void SHA_224_256_BASE::compress_n(const byte input[], u32bit blocks) F = (digest[5] += F); G = (digest[6] += G); H = (digest[7] += H); + + input += HASH_BLOCK_SIZE; } } diff --git a/src/hash/skein/skein_512.cpp b/src/hash/skein/skein_512.cpp index b24efd5f7..e1ca08c15 100644 --- a/src/hash/skein/skein_512.cpp +++ b/src/hash/skein/skein_512.cpp @@ -34,8 +34,8 @@ void ubi_512(u64bit H[9], u64bit T[], const byte msg[], u64bit msg_len) T[0] += to_proc; u64bit M[8] = { 0 }; - for(u32bit j = 0; j != to_proc / 8; ++j) - M[j] = load_le<u64bit>(msg, j); + + load_le(M, msg, to_proc / 8); if(to_proc % 8) { diff --git a/src/hash/tiger/tiger.cpp b/src/hash/tiger/tiger.cpp index 975ea9b6b..4f4d4dc83 100644 --- a/src/hash/tiger/tiger.cpp +++ b/src/hash/tiger/tiger.cpp @@ -21,9 +21,7 @@ void Tiger::compress_n(const byte input[], u32bit blocks) for(u32bit i = 0; i != blocks; ++i) { - for(u32bit j = 0; j != 8; ++j) - X[j] = load_le<u64bit>(input, j); - input += HASH_BLOCK_SIZE; + load_le(X.begin(), input, X.size()); pass(A, B, C, X, 5); mix(X); pass(C, A, B, X, 7); mix(X); @@ -39,6 +37,8 @@ void Tiger::compress_n(const byte input[], u32bit blocks) A = (digest[0] ^= A); B = digest[1] = B - digest[1]; C = (digest[2] += C); + + input += HASH_BLOCK_SIZE; } } |