diff options
-rw-r--r-- | modules/sha_x86/sha160.cpp | 59 | ||||
-rw-r--r-- | modules/sha_x86/sha1core.S | 56 |
2 files changed, 45 insertions, 70 deletions
diff --git a/modules/sha_x86/sha160.cpp b/modules/sha_x86/sha160.cpp index d591eb6d8..99afb4a46 100644 --- a/modules/sha_x86/sha160.cpp +++ b/modules/sha_x86/sha160.cpp @@ -55,64 +55,7 @@ extern "C" void sha160_core(u32bit[5], const byte[64], u32bit[80]); *************************************************/ void SHA_160::hash(const byte input[]) { -#if 1 - u32bit digestX[5]; - for(int j = 0; j != 5; j++) - digestX[j] = digest[j]; - - sha160_core(digestX, input, W); - - u32bit A = digestX[0], B = digestX[1], C = digestX[2], - D = digestX[3], E = digestX[4]; - -#else - for(u32bit j = 0; j != 16; ++j) - W[j] = make_u32bit(input[4*j], input[4*j+1], input[4*j+2], input[4*j+3]); - - for(u32bit j = 16; j != 80; ++j) - W[j] = rotate_left((W[j-3] ^ W[j-8] ^ W[j-14] ^ W[j-16]), 1); - - u32bit A = digest[0], B = digest[1], C = digest[2], - D = digest[3], E = digest[4]; -#endif - - /* - F1(A,B,C,D,E,W[ 0]); F1(E,A,B,C,D,W[ 1]); F1(D,E,A,B,C,W[ 2]); - F1(C,D,E,A,B,W[ 3]); F1(B,C,D,E,A,W[ 4]); F1(A,B,C,D,E,W[ 5]); - F1(E,A,B,C,D,W[ 6]); F1(D,E,A,B,C,W[ 7]); F1(C,D,E,A,B,W[ 8]); - F1(B,C,D,E,A,W[ 9]); F1(A,B,C,D,E,W[10]); F1(E,A,B,C,D,W[11]); - F1(D,E,A,B,C,W[12]); F1(C,D,E,A,B,W[13]); F1(B,C,D,E,A,W[14]); - F1(A,B,C,D,E,W[15]); F1(E,A,B,C,D,W[16]); F1(D,E,A,B,C,W[17]); - F1(C,D,E,A,B,W[18]); F1(B,C,D,E,A,W[19]); - - - F2(A,B,C,D,E,W[20]); F2(E,A,B,C,D,W[21]); F2(D,E,A,B,C,W[22]); - F2(C,D,E,A,B,W[23]); F2(B,C,D,E,A,W[24]); F2(A,B,C,D,E,W[25]); - F2(E,A,B,C,D,W[26]); F2(D,E,A,B,C,W[27]); F2(C,D,E,A,B,W[28]); - F2(B,C,D,E,A,W[29]); F2(A,B,C,D,E,W[30]); F2(E,A,B,C,D,W[31]); - F2(D,E,A,B,C,W[32]); F2(C,D,E,A,B,W[33]); F2(B,C,D,E,A,W[34]); - F2(A,B,C,D,E,W[35]); F2(E,A,B,C,D,W[36]); F2(D,E,A,B,C,W[37]); - F2(C,D,E,A,B,W[38]); F2(B,C,D,E,A,W[39]); - */ - - F3(A,B,C,D,E,W[40]); F3(E,A,B,C,D,W[41]); F3(D,E,A,B,C,W[42]); - F3(C,D,E,A,B,W[43]); F3(B,C,D,E,A,W[44]); F3(A,B,C,D,E,W[45]); - F3(E,A,B,C,D,W[46]); F3(D,E,A,B,C,W[47]); F3(C,D,E,A,B,W[48]); - F3(B,C,D,E,A,W[49]); F3(A,B,C,D,E,W[50]); F3(E,A,B,C,D,W[51]); - F3(D,E,A,B,C,W[52]); F3(C,D,E,A,B,W[53]); F3(B,C,D,E,A,W[54]); - F3(A,B,C,D,E,W[55]); F3(E,A,B,C,D,W[56]); F3(D,E,A,B,C,W[57]); - F3(C,D,E,A,B,W[58]); F3(B,C,D,E,A,W[59]); - - F4(A,B,C,D,E,W[60]); F4(E,A,B,C,D,W[61]); F4(D,E,A,B,C,W[62]); - F4(C,D,E,A,B,W[63]); F4(B,C,D,E,A,W[64]); F4(A,B,C,D,E,W[65]); - F4(E,A,B,C,D,W[66]); F4(D,E,A,B,C,W[67]); F4(C,D,E,A,B,W[68]); - F4(B,C,D,E,A,W[69]); F4(A,B,C,D,E,W[70]); F4(E,A,B,C,D,W[71]); - F4(D,E,A,B,C,W[72]); F4(C,D,E,A,B,W[73]); F4(B,C,D,E,A,W[74]); - F4(A,B,C,D,E,W[75]); F4(E,A,B,C,D,W[76]); F4(D,E,A,B,C,W[77]); - F4(C,D,E,A,B,W[78]); F4(B,C,D,E,A,W[79]); - - digest[0] += A; digest[1] += B; digest[2] += C; - digest[3] += D; digest[4] += E; + sha160_core(digest, input, W); } /************************************************* diff --git a/modules/sha_x86/sha1core.S b/modules/sha_x86/sha1core.S index 17352688d..acba9fb8d 100644 --- a/modules/sha_x86/sha1core.S +++ b/modules/sha_x86/sha1core.S @@ -54,7 +54,9 @@ sha160_core: #define MAGIC1 $0x5A827999 #define MAGIC2 $0x6ED9EBA1 - +#define MAGIC3 $0x8F1BBCDC +#define MAGIC4 $0xCA62C1D6 + #define FUNC1(B, C, D, TEMP) \ movl C, TEMP ; \ xorl D, TEMP ; \ @@ -66,16 +68,29 @@ sha160_core: xorl C, TEMP ; \ xorl D, TEMP +#define FUNC3(B, C, D, TEMP) \ + movl B, TEMP ; \ + orl C, TEMP ; \ + andl D, TEMP ; \ + movl B, (%edi) ; \ + andl C, (%edi) ; \ + orl (%edi), TEMP + +#define FUNC4(B, C, D, TEMP) \ + movl B, TEMP ; \ + xorl C, TEMP ; \ + xorl D, TEMP + #define F(A, B, C, D, E, TEMP, MAGIC, FUNC) \ + addl (%edi), E ; \ FUNC(B, C, D, TEMP) ; \ - addl 0(%edi), E ; \ addl $4, %edi ; \ addl TEMP, E ; \ addl MAGIC, E ; \ roll $5, A ; \ addl A, E ; \ rorl $5, A ; \ - roll $30, B + roll $30, B ; #define F1(A, B, C, D, E, TEMP) \ F(A, B, C, D, E, TEMP, MAGIC1, FUNC1) @@ -83,11 +98,17 @@ sha160_core: #define F2(A, B, C, D, E, TEMP) \ F(A, B, C, D, E, TEMP, MAGIC2, FUNC2) +#define F3(A, B, C, D, E, TEMP) \ + F(A, B, C, D, E, TEMP, MAGIC3, FUNC3) + +#define F4(A, B, C, D, E, TEMP) \ + F(A, B, C, D, E, TEMP, MAGIC4, FUNC4) + #define F_BLOCK(F) \ - F(%eax, %ebx, %ecx, %edx, %esi, %ebp) ; \ - F(%esi, %eax, %ebx, %ecx, %edx, %ebp) ; \ - F(%edx, %esi, %eax, %ebx, %ecx, %ebp) ; \ - F(%ecx, %edx, %esi, %eax, %ebx, %ebp) ; \ + F(%eax, %ebx, %ecx, %edx, %esi, %ebp) \ + F(%esi, %eax, %ebx, %ecx, %edx, %ebp) \ + F(%edx, %esi, %eax, %ebx, %ecx, %ebp) \ + F(%ecx, %edx, %esi, %eax, %ebx, %ebp) \ F(%ebx, %ecx, %edx, %esi, %eax, %ebp) /* F1(%eax, %ebx, %ecx, %edx, %esi, %ebp) @@ -96,6 +117,7 @@ sha160_core: F1(%ecx, %edx, %esi, %eax, %ebx, %ebp) F1(%ebx, %ecx, %edx, %esi, %eax, %ebp) */ + F_BLOCK(F1) F_BLOCK(F1) F_BLOCK(F1) @@ -106,12 +128,22 @@ sha160_core: F_BLOCK(F2) F_BLOCK(F2) + F_BLOCK(F3) + F_BLOCK(F3) + F_BLOCK(F3) + F_BLOCK(F3) + + F_BLOCK(F4) + F_BLOCK(F4) + F_BLOCK(F4) + F_BLOCK(F4) + movl 20(%esp), %ebp - movl %eax, 0(%ebp) - movl %ebx, 4(%ebp) - movl %ecx, 8(%ebp) - movl %edx, 12(%ebp) - movl %esi, 16(%ebp) + addl %eax, 0(%ebp) + addl %ebx, 4(%ebp) + addl %ecx, 8(%ebp) + addl %edx, 12(%ebp) + addl %esi, 16(%ebp) popl %ebx popl %esi |