aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorlloyd <[email protected]>2006-08-13 16:00:54 +0000
committerlloyd <[email protected]>2006-08-13 16:00:54 +0000
commit2ebf2c637111f7b041bb2c47ccd0bb98f35e2e83 (patch)
treeba9c80398d4e789f6f5a1554a3c3909a072209a8 /modules
parent671061b187e594f3ce969ee8f10fa45f4ff70ff1 (diff)
Cleanups, and move the initial memory access to the beginning of each
MD5 round in an attempt to hide the latency a bit
Diffstat (limited to 'modules')
-rw-r--r--modules/alg_ia32/md5core.S127
-rw-r--r--modules/alg_ia32/sha1core.S2
2 files changed, 77 insertions, 52 deletions
diff --git a/modules/alg_ia32/md5core.S b/modules/alg_ia32/md5core.S
index a801a3681..7bc7ec99d 100644
--- a/modules/alg_ia32/md5core.S
+++ b/modules/alg_ia32/md5core.S
@@ -32,94 +32,119 @@ START_LOOP(.LOAD_INPUT)
ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-2), ECX)
ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-1), EDX)
LOOP_UNTIL(ESI, IMM(16), .LOAD_INPUT)
-
+
ASSIGN(EBP, ARG(1))
ASSIGN(EAX, ARRAY4(EBP, 0))
ASSIGN(EBX, ARRAY4(EBP, 1))
ASSIGN(ECX, ARRAY4(EBP, 2))
ASSIGN(EDX, ARRAY4(EBP, 3))
-#define T EBP
-
-#define FF(A, B, C, D, MSG, S, MAGIC) \
+#define FF(A, B, C, D, T, MSG, S, MAGIC) \
+ ADD(A, ARRAY4(EDI, MSG)) ; \
ASSIGN(T, C) ; \
XOR(T, D) ; \
AND(T, B) ; \
XOR(T, D) ; \
- ADD(A, ARRAY4(EDI, MSG)) ; \
ADD3_IMM(A, T, MAGIC) ; \
ROTL_IMM(A, S) ; \
ADD(A, B) ;
-#define GG(A, B, C, D, MSG, S, MAGIC) \
+#define GG(A, B, C, D, T, MSG, S, MAGIC) \
+ ADD(A, ARRAY4(EDI, MSG)) ; \
ASSIGN(T, B) ; \
XOR(T, C) ; \
AND(T, D) ; \
XOR(T, C) ; \
- ADD(A, ARRAY4(EDI, MSG)) ; \
ADD3_IMM(A, T, MAGIC) ; \
ROTL_IMM(A, S) ; \
ADD(A, B) ;
-#define HH(A, B, C, D, MSG, S, MAGIC) \
+#define HH(A, B, C, D, T, MSG, S, MAGIC) \
+ ADD(A, ARRAY4(EDI, MSG)) ; \
ASSIGN(T, B) ; \
XOR(T, C) ; \
XOR(T, D) ; \
- ADD(A, ARRAY4(EDI, MSG)) ; \
ADD3_IMM(A, T, MAGIC) ; \
ROTL_IMM(A, S) ; \
ADD(A, B) ;
-#define II(A, B, C, D, MSG, S, MAGIC) \
+#define II(A, B, C, D, T, MSG, S, MAGIC) \
+ ADD(A, ARRAY4(EDI, MSG)) ; \
ASSIGN(T, D) ; \
NOT(T) ; \
OR(T, B) ; \
XOR(T, C) ; \
- ADD(A, ARRAY4(EDI, MSG)) ; \
ADD3_IMM(A, T, MAGIC) ; \
ROTL_IMM(A, S) ; \
ADD(A, B) ;
-#define A EAX
-#define B EBX
-#define C ECX
-#define D EDX
-
- FF(A,B,C,D, 0, 7,0xD76AA478); FF(D,A,B,C, 1,12,0xE8C7B756);
- FF(C,D,A,B, 2,17,0x242070DB); FF(B,C,D,A, 3,22,0xC1BDCEEE);
- FF(A,B,C,D, 4, 7,0xF57C0FAF); FF(D,A,B,C, 5,12,0x4787C62A);
- FF(C,D,A,B, 6,17,0xA8304613); FF(B,C,D,A, 7,22,0xFD469501);
- FF(A,B,C,D, 8, 7,0x698098D8); FF(D,A,B,C, 9,12,0x8B44F7AF);
- FF(C,D,A,B,10,17,0xFFFF5BB1); FF(B,C,D,A,11,22,0x895CD7BE);
- FF(A,B,C,D,12, 7,0x6B901122); FF(D,A,B,C,13,12,0xFD987193);
- FF(C,D,A,B,14,17,0xA679438E); FF(B,C,D,A,15,22,0x49B40821);
-
- GG(A,B,C,D, 1, 5,0xF61E2562); GG(D,A,B,C, 6, 9,0xC040B340);
- GG(C,D,A,B,11,14,0x265E5A51); GG(B,C,D,A, 0,20,0xE9B6C7AA);
- GG(A,B,C,D, 5, 5,0xD62F105D); GG(D,A,B,C,10, 9,0x02441453);
- GG(C,D,A,B,15,14,0xD8A1E681); GG(B,C,D,A, 4,20,0xE7D3FBC8);
- GG(A,B,C,D, 9, 5,0x21E1CDE6); GG(D,A,B,C,14, 9,0xC33707D6);
- GG(C,D,A,B, 3,14,0xF4D50D87); GG(B,C,D,A, 8,20,0x455A14ED);
- GG(A,B,C,D,13, 5,0xA9E3E905); GG(D,A,B,C, 2, 9,0xFCEFA3F8);
- GG(C,D,A,B, 7,14,0x676F02D9); GG(B,C,D,A,12,20,0x8D2A4C8A);
-
- HH(A,B,C,D, 5, 4,0xFFFA3942); HH(D,A,B,C, 8,11,0x8771F681);
- HH(C,D,A,B,11,16,0x6D9D6122); HH(B,C,D,A,14,23,0xFDE5380C);
- HH(A,B,C,D, 1, 4,0xA4BEEA44); HH(D,A,B,C, 4,11,0x4BDECFA9);
- HH(C,D,A,B, 7,16,0xF6BB4B60); HH(B,C,D,A,10,23,0xBEBFBC70);
- HH(A,B,C,D,13, 4,0x289B7EC6); HH(D,A,B,C, 0,11,0xEAA127FA);
- HH(C,D,A,B, 3,16,0xD4EF3085); HH(B,C,D,A, 6,23,0x04881D05);
- HH(A,B,C,D, 9, 4,0xD9D4D039); HH(D,A,B,C,12,11,0xE6DB99E5);
- HH(C,D,A,B,15,16,0x1FA27CF8); HH(B,C,D,A, 2,23,0xC4AC5665);
-
- II(A,B,C,D, 0, 6,0xF4292244); II(D,A,B,C, 7,10,0x432AFF97);
- II(C,D,A,B,14,15,0xAB9423A7); II(B,C,D,A, 5,21,0xFC93A039);
- II(A,B,C,D,12, 6,0x655B59C3); II(D,A,B,C, 3,10,0x8F0CCC92);
- II(C,D,A,B,10,15,0xFFEFF47D); II(B,C,D,A, 1,21,0x85845DD1);
- II(A,B,C,D, 8, 6,0x6FA87E4F); II(D,A,B,C,15,10,0xFE2CE6E0);
- II(C,D,A,B, 6,15,0xA3014314); II(B,C,D,A,13,21,0x4E0811A1);
- II(A,B,C,D, 4, 6,0xF7537E82); II(D,A,B,C,11,10,0xBD3AF235);
- II(C,D,A,B, 2,15,0x2AD7D2BB); II(B,C,D,A, 9,21,0xEB86D391);
+ FF(EAX,EBX,ECX,EDX,ESI, 0, 7,0xD76AA478);
+ FF(EDX,EAX,EBX,ECX,ESI, 1,12,0xE8C7B756);
+ FF(ECX,EDX,EAX,EBX,ESI, 2,17,0x242070DB);
+ FF(EBX,ECX,EDX,EAX,ESI, 3,22,0xC1BDCEEE);
+ FF(EAX,EBX,ECX,EDX,ESI, 4, 7,0xF57C0FAF);
+ FF(EDX,EAX,EBX,ECX,ESI, 5,12,0x4787C62A);
+ FF(ECX,EDX,EAX,EBX,ESI, 6,17,0xA8304613);
+ FF(EBX,ECX,EDX,EAX,ESI, 7,22,0xFD469501);
+ FF(EAX,EBX,ECX,EDX,ESI, 8, 7,0x698098D8);
+ FF(EDX,EAX,EBX,ECX,ESI, 9,12,0x8B44F7AF);
+ FF(ECX,EDX,EAX,EBX,ESI,10,17,0xFFFF5BB1);
+ FF(EBX,ECX,EDX,EAX,ESI,11,22,0x895CD7BE);
+ FF(EAX,EBX,ECX,EDX,ESI,12, 7,0x6B901122);
+ FF(EDX,EAX,EBX,ECX,ESI,13,12,0xFD987193);
+ FF(ECX,EDX,EAX,EBX,ESI,14,17,0xA679438E);
+ FF(EBX,ECX,EDX,EAX,ESI,15,22,0x49B40821);
+
+ GG(EAX,EBX,ECX,EDX,ESI, 1, 5,0xF61E2562);
+ GG(EDX,EAX,EBX,ECX,ESI, 6, 9,0xC040B340);
+ GG(ECX,EDX,EAX,EBX,ESI,11,14,0x265E5A51);
+ GG(EBX,ECX,EDX,EAX,ESI, 0,20,0xE9B6C7AA);
+ GG(EAX,EBX,ECX,EDX,ESI, 5, 5,0xD62F105D);
+ GG(EDX,EAX,EBX,ECX,ESI,10, 9,0x02441453);
+ GG(ECX,EDX,EAX,EBX,ESI,15,14,0xD8A1E681);
+ GG(EBX,ECX,EDX,EAX,ESI, 4,20,0xE7D3FBC8);
+ GG(EAX,EBX,ECX,EDX,ESI, 9, 5,0x21E1CDE6);
+ GG(EDX,EAX,EBX,ECX,ESI,14, 9,0xC33707D6);
+ GG(ECX,EDX,EAX,EBX,ESI, 3,14,0xF4D50D87);
+ GG(EBX,ECX,EDX,EAX,ESI, 8,20,0x455A14ED);
+ GG(EAX,EBX,ECX,EDX,ESI,13, 5,0xA9E3E905);
+ GG(EDX,EAX,EBX,ECX,ESI, 2, 9,0xFCEFA3F8);
+ GG(ECX,EDX,EAX,EBX,ESI, 7,14,0x676F02D9);
+ GG(EBX,ECX,EDX,EAX,ESI,12,20,0x8D2A4C8A);
+
+ HH(EAX,EBX,ECX,EDX,ESI, 5, 4,0xFFFA3942);
+ HH(EDX,EAX,EBX,ECX,ESI, 8,11,0x8771F681);
+ HH(ECX,EDX,EAX,EBX,ESI,11,16,0x6D9D6122);
+ HH(EBX,ECX,EDX,EAX,ESI,14,23,0xFDE5380C);
+ HH(EAX,EBX,ECX,EDX,ESI, 1, 4,0xA4BEEA44);
+ HH(EDX,EAX,EBX,ECX,ESI, 4,11,0x4BDECFA9);
+ HH(ECX,EDX,EAX,EBX,ESI, 7,16,0xF6BB4B60);
+ HH(EBX,ECX,EDX,EAX,ESI,10,23,0xBEBFBC70);
+ HH(EAX,EBX,ECX,EDX,ESI,13, 4,0x289B7EC6);
+ HH(EDX,EAX,EBX,ECX,ESI, 0,11,0xEAA127FA);
+ HH(ECX,EDX,EAX,EBX,ESI, 3,16,0xD4EF3085);
+ HH(EBX,ECX,EDX,EAX,ESI, 6,23,0x04881D05);
+ HH(EAX,EBX,ECX,EDX,ESI, 9, 4,0xD9D4D039);
+ HH(EDX,EAX,EBX,ECX,ESI,12,11,0xE6DB99E5);
+ HH(ECX,EDX,EAX,EBX,ESI,15,16,0x1FA27CF8);
+ HH(EBX,ECX,EDX,EAX,ESI, 2,23,0xC4AC5665);
+
+ II(EAX,EBX,ECX,EDX,ESI, 0, 6,0xF4292244);
+ II(EDX,EAX,EBX,ECX,ESI, 7,10,0x432AFF97);
+ II(ECX,EDX,EAX,EBX,ESI,14,15,0xAB9423A7);
+ II(EBX,ECX,EDX,EAX,ESI, 5,21,0xFC93A039);
+ II(EAX,EBX,ECX,EDX,ESI,12, 6,0x655B59C3);
+ II(EDX,EAX,EBX,ECX,ESI, 3,10,0x8F0CCC92);
+ II(ECX,EDX,EAX,EBX,ESI,10,15,0xFFEFF47D);
+ II(EBX,ECX,EDX,EAX,ESI, 1,21,0x85845DD1);
+ II(EAX,EBX,ECX,EDX,ESI, 8, 6,0x6FA87E4F);
+ II(EDX,EAX,EBX,ECX,ESI,15,10,0xFE2CE6E0);
+ II(ECX,EDX,EAX,EBX,ESI, 6,15,0xA3014314);
+ II(EBX,ECX,EDX,EAX,ESI,13,21,0x4E0811A1);
+ II(EAX,EBX,ECX,EDX,ESI, 4, 6,0xF7537E82);
+ II(EDX,EAX,EBX,ECX,ESI,11,10,0xBD3AF235);
+ II(ECX,EDX,EAX,EBX,ESI, 2,15,0x2AD7D2BB);
+ II(EBX,ECX,EDX,EAX,ESI, 9,21,0xEB86D391);
ASSIGN(EBP, ARG(1))
ADD(ARRAY4(EBP, 0), EAX)
diff --git a/modules/alg_ia32/sha1core.S b/modules/alg_ia32/sha1core.S
index 45f95947d..191686a7d 100644
--- a/modules/alg_ia32/sha1core.S
+++ b/modules/alg_ia32/sha1core.S
@@ -100,7 +100,7 @@ LOOP_UNTIL(ESI, IMM(80), .EXPANSION)
ADD(E, A) ; \
ADD3_IMM(E, T, MAGIC1) ; \
ROTR_IMM(A, 5) ;
-
+
#define F2_OR_F4(A, B, C, D, E, T, MSG, MAGIC) \
ROTL_IMM(A, 5) ; \
ADD(E, ARRAY4(EDI, MSG)) ; \