diff options
author | lloyd <[email protected]> | 2006-08-13 16:00:54 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2006-08-13 16:00:54 +0000 |
commit | 2ebf2c637111f7b041bb2c47ccd0bb98f35e2e83 (patch) | |
tree | ba9c80398d4e789f6f5a1554a3c3909a072209a8 /modules | |
parent | 671061b187e594f3ce969ee8f10fa45f4ff70ff1 (diff) |
Cleanups, and move the initial memory access to the beginning of each
MD5 round in an attempt to hide the latency a bit
Diffstat (limited to 'modules')
-rw-r--r-- | modules/alg_ia32/md5core.S | 127 | ||||
-rw-r--r-- | modules/alg_ia32/sha1core.S | 2 |
2 files changed, 77 insertions, 52 deletions
diff --git a/modules/alg_ia32/md5core.S b/modules/alg_ia32/md5core.S index a801a3681..7bc7ec99d 100644 --- a/modules/alg_ia32/md5core.S +++ b/modules/alg_ia32/md5core.S @@ -32,94 +32,119 @@ START_LOOP(.LOAD_INPUT) ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-2), ECX) ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-1), EDX) LOOP_UNTIL(ESI, IMM(16), .LOAD_INPUT) - + ASSIGN(EBP, ARG(1)) ASSIGN(EAX, ARRAY4(EBP, 0)) ASSIGN(EBX, ARRAY4(EBP, 1)) ASSIGN(ECX, ARRAY4(EBP, 2)) ASSIGN(EDX, ARRAY4(EBP, 3)) -#define T EBP - -#define FF(A, B, C, D, MSG, S, MAGIC) \ +#define FF(A, B, C, D, T, MSG, S, MAGIC) \ + ADD(A, ARRAY4(EDI, MSG)) ; \ ASSIGN(T, C) ; \ XOR(T, D) ; \ AND(T, B) ; \ XOR(T, D) ; \ - ADD(A, ARRAY4(EDI, MSG)) ; \ ADD3_IMM(A, T, MAGIC) ; \ ROTL_IMM(A, S) ; \ ADD(A, B) ; -#define GG(A, B, C, D, MSG, S, MAGIC) \ +#define GG(A, B, C, D, T, MSG, S, MAGIC) \ + ADD(A, ARRAY4(EDI, MSG)) ; \ ASSIGN(T, B) ; \ XOR(T, C) ; \ AND(T, D) ; \ XOR(T, C) ; \ - ADD(A, ARRAY4(EDI, MSG)) ; \ ADD3_IMM(A, T, MAGIC) ; \ ROTL_IMM(A, S) ; \ ADD(A, B) ; -#define HH(A, B, C, D, MSG, S, MAGIC) \ +#define HH(A, B, C, D, T, MSG, S, MAGIC) \ + ADD(A, ARRAY4(EDI, MSG)) ; \ ASSIGN(T, B) ; \ XOR(T, C) ; \ XOR(T, D) ; \ - ADD(A, ARRAY4(EDI, MSG)) ; \ ADD3_IMM(A, T, MAGIC) ; \ ROTL_IMM(A, S) ; \ ADD(A, B) ; -#define II(A, B, C, D, MSG, S, MAGIC) \ +#define II(A, B, C, D, T, MSG, S, MAGIC) \ + ADD(A, ARRAY4(EDI, MSG)) ; \ ASSIGN(T, D) ; \ NOT(T) ; \ OR(T, B) ; \ XOR(T, C) ; \ - ADD(A, ARRAY4(EDI, MSG)) ; \ ADD3_IMM(A, T, MAGIC) ; \ ROTL_IMM(A, S) ; \ ADD(A, B) ; -#define A EAX -#define B EBX -#define C ECX -#define D EDX - - FF(A,B,C,D, 0, 7,0xD76AA478); FF(D,A,B,C, 1,12,0xE8C7B756); - FF(C,D,A,B, 2,17,0x242070DB); FF(B,C,D,A, 3,22,0xC1BDCEEE); - FF(A,B,C,D, 4, 7,0xF57C0FAF); FF(D,A,B,C, 5,12,0x4787C62A); - FF(C,D,A,B, 6,17,0xA8304613); FF(B,C,D,A, 7,22,0xFD469501); - FF(A,B,C,D, 8, 7,0x698098D8); FF(D,A,B,C, 9,12,0x8B44F7AF); - FF(C,D,A,B,10,17,0xFFFF5BB1); FF(B,C,D,A,11,22,0x895CD7BE); - FF(A,B,C,D,12, 7,0x6B901122); FF(D,A,B,C,13,12,0xFD987193); - FF(C,D,A,B,14,17,0xA679438E); FF(B,C,D,A,15,22,0x49B40821); - - GG(A,B,C,D, 1, 5,0xF61E2562); GG(D,A,B,C, 6, 9,0xC040B340); - GG(C,D,A,B,11,14,0x265E5A51); GG(B,C,D,A, 0,20,0xE9B6C7AA); - GG(A,B,C,D, 5, 5,0xD62F105D); GG(D,A,B,C,10, 9,0x02441453); - GG(C,D,A,B,15,14,0xD8A1E681); GG(B,C,D,A, 4,20,0xE7D3FBC8); - GG(A,B,C,D, 9, 5,0x21E1CDE6); GG(D,A,B,C,14, 9,0xC33707D6); - GG(C,D,A,B, 3,14,0xF4D50D87); GG(B,C,D,A, 8,20,0x455A14ED); - GG(A,B,C,D,13, 5,0xA9E3E905); GG(D,A,B,C, 2, 9,0xFCEFA3F8); - GG(C,D,A,B, 7,14,0x676F02D9); GG(B,C,D,A,12,20,0x8D2A4C8A); - - HH(A,B,C,D, 5, 4,0xFFFA3942); HH(D,A,B,C, 8,11,0x8771F681); - HH(C,D,A,B,11,16,0x6D9D6122); HH(B,C,D,A,14,23,0xFDE5380C); - HH(A,B,C,D, 1, 4,0xA4BEEA44); HH(D,A,B,C, 4,11,0x4BDECFA9); - HH(C,D,A,B, 7,16,0xF6BB4B60); HH(B,C,D,A,10,23,0xBEBFBC70); - HH(A,B,C,D,13, 4,0x289B7EC6); HH(D,A,B,C, 0,11,0xEAA127FA); - HH(C,D,A,B, 3,16,0xD4EF3085); HH(B,C,D,A, 6,23,0x04881D05); - HH(A,B,C,D, 9, 4,0xD9D4D039); HH(D,A,B,C,12,11,0xE6DB99E5); - HH(C,D,A,B,15,16,0x1FA27CF8); HH(B,C,D,A, 2,23,0xC4AC5665); - - II(A,B,C,D, 0, 6,0xF4292244); II(D,A,B,C, 7,10,0x432AFF97); - II(C,D,A,B,14,15,0xAB9423A7); II(B,C,D,A, 5,21,0xFC93A039); - II(A,B,C,D,12, 6,0x655B59C3); II(D,A,B,C, 3,10,0x8F0CCC92); - II(C,D,A,B,10,15,0xFFEFF47D); II(B,C,D,A, 1,21,0x85845DD1); - II(A,B,C,D, 8, 6,0x6FA87E4F); II(D,A,B,C,15,10,0xFE2CE6E0); - II(C,D,A,B, 6,15,0xA3014314); II(B,C,D,A,13,21,0x4E0811A1); - II(A,B,C,D, 4, 6,0xF7537E82); II(D,A,B,C,11,10,0xBD3AF235); - II(C,D,A,B, 2,15,0x2AD7D2BB); II(B,C,D,A, 9,21,0xEB86D391); + FF(EAX,EBX,ECX,EDX,ESI, 0, 7,0xD76AA478); + FF(EDX,EAX,EBX,ECX,ESI, 1,12,0xE8C7B756); + FF(ECX,EDX,EAX,EBX,ESI, 2,17,0x242070DB); + FF(EBX,ECX,EDX,EAX,ESI, 3,22,0xC1BDCEEE); + FF(EAX,EBX,ECX,EDX,ESI, 4, 7,0xF57C0FAF); + FF(EDX,EAX,EBX,ECX,ESI, 5,12,0x4787C62A); + FF(ECX,EDX,EAX,EBX,ESI, 6,17,0xA8304613); + FF(EBX,ECX,EDX,EAX,ESI, 7,22,0xFD469501); + FF(EAX,EBX,ECX,EDX,ESI, 8, 7,0x698098D8); + FF(EDX,EAX,EBX,ECX,ESI, 9,12,0x8B44F7AF); + FF(ECX,EDX,EAX,EBX,ESI,10,17,0xFFFF5BB1); + FF(EBX,ECX,EDX,EAX,ESI,11,22,0x895CD7BE); + FF(EAX,EBX,ECX,EDX,ESI,12, 7,0x6B901122); + FF(EDX,EAX,EBX,ECX,ESI,13,12,0xFD987193); + FF(ECX,EDX,EAX,EBX,ESI,14,17,0xA679438E); + FF(EBX,ECX,EDX,EAX,ESI,15,22,0x49B40821); + + GG(EAX,EBX,ECX,EDX,ESI, 1, 5,0xF61E2562); + GG(EDX,EAX,EBX,ECX,ESI, 6, 9,0xC040B340); + GG(ECX,EDX,EAX,EBX,ESI,11,14,0x265E5A51); + GG(EBX,ECX,EDX,EAX,ESI, 0,20,0xE9B6C7AA); + GG(EAX,EBX,ECX,EDX,ESI, 5, 5,0xD62F105D); + GG(EDX,EAX,EBX,ECX,ESI,10, 9,0x02441453); + GG(ECX,EDX,EAX,EBX,ESI,15,14,0xD8A1E681); + GG(EBX,ECX,EDX,EAX,ESI, 4,20,0xE7D3FBC8); + GG(EAX,EBX,ECX,EDX,ESI, 9, 5,0x21E1CDE6); + GG(EDX,EAX,EBX,ECX,ESI,14, 9,0xC33707D6); + GG(ECX,EDX,EAX,EBX,ESI, 3,14,0xF4D50D87); + GG(EBX,ECX,EDX,EAX,ESI, 8,20,0x455A14ED); + GG(EAX,EBX,ECX,EDX,ESI,13, 5,0xA9E3E905); + GG(EDX,EAX,EBX,ECX,ESI, 2, 9,0xFCEFA3F8); + GG(ECX,EDX,EAX,EBX,ESI, 7,14,0x676F02D9); + GG(EBX,ECX,EDX,EAX,ESI,12,20,0x8D2A4C8A); + + HH(EAX,EBX,ECX,EDX,ESI, 5, 4,0xFFFA3942); + HH(EDX,EAX,EBX,ECX,ESI, 8,11,0x8771F681); + HH(ECX,EDX,EAX,EBX,ESI,11,16,0x6D9D6122); + HH(EBX,ECX,EDX,EAX,ESI,14,23,0xFDE5380C); + HH(EAX,EBX,ECX,EDX,ESI, 1, 4,0xA4BEEA44); + HH(EDX,EAX,EBX,ECX,ESI, 4,11,0x4BDECFA9); + HH(ECX,EDX,EAX,EBX,ESI, 7,16,0xF6BB4B60); + HH(EBX,ECX,EDX,EAX,ESI,10,23,0xBEBFBC70); + HH(EAX,EBX,ECX,EDX,ESI,13, 4,0x289B7EC6); + HH(EDX,EAX,EBX,ECX,ESI, 0,11,0xEAA127FA); + HH(ECX,EDX,EAX,EBX,ESI, 3,16,0xD4EF3085); + HH(EBX,ECX,EDX,EAX,ESI, 6,23,0x04881D05); + HH(EAX,EBX,ECX,EDX,ESI, 9, 4,0xD9D4D039); + HH(EDX,EAX,EBX,ECX,ESI,12,11,0xE6DB99E5); + HH(ECX,EDX,EAX,EBX,ESI,15,16,0x1FA27CF8); + HH(EBX,ECX,EDX,EAX,ESI, 2,23,0xC4AC5665); + + II(EAX,EBX,ECX,EDX,ESI, 0, 6,0xF4292244); + II(EDX,EAX,EBX,ECX,ESI, 7,10,0x432AFF97); + II(ECX,EDX,EAX,EBX,ESI,14,15,0xAB9423A7); + II(EBX,ECX,EDX,EAX,ESI, 5,21,0xFC93A039); + II(EAX,EBX,ECX,EDX,ESI,12, 6,0x655B59C3); + II(EDX,EAX,EBX,ECX,ESI, 3,10,0x8F0CCC92); + II(ECX,EDX,EAX,EBX,ESI,10,15,0xFFEFF47D); + II(EBX,ECX,EDX,EAX,ESI, 1,21,0x85845DD1); + II(EAX,EBX,ECX,EDX,ESI, 8, 6,0x6FA87E4F); + II(EDX,EAX,EBX,ECX,ESI,15,10,0xFE2CE6E0); + II(ECX,EDX,EAX,EBX,ESI, 6,15,0xA3014314); + II(EBX,ECX,EDX,EAX,ESI,13,21,0x4E0811A1); + II(EAX,EBX,ECX,EDX,ESI, 4, 6,0xF7537E82); + II(EDX,EAX,EBX,ECX,ESI,11,10,0xBD3AF235); + II(ECX,EDX,EAX,EBX,ESI, 2,15,0x2AD7D2BB); + II(EBX,ECX,EDX,EAX,ESI, 9,21,0xEB86D391); ASSIGN(EBP, ARG(1)) ADD(ARRAY4(EBP, 0), EAX) diff --git a/modules/alg_ia32/sha1core.S b/modules/alg_ia32/sha1core.S index 45f95947d..191686a7d 100644 --- a/modules/alg_ia32/sha1core.S +++ b/modules/alg_ia32/sha1core.S @@ -100,7 +100,7 @@ LOOP_UNTIL(ESI, IMM(80), .EXPANSION) ADD(E, A) ; \ ADD3_IMM(E, T, MAGIC1) ; \ ROTR_IMM(A, 5) ; - + #define F2_OR_F4(A, B, C, D, E, T, MSG, MAGIC) \ ROTL_IMM(A, 5) ; \ ADD(E, ARRAY4(EDI, MSG)) ; \ |