aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-09-17 22:38:10 +0000
committerlloyd <[email protected]>2008-09-17 22:38:10 +0000
commite65adc861146a0d88064a3889ed2efa2bbd9133f (patch)
tree23d737ed0171199a93372bc4e76a595385445a01 /modules
parentbd5bc359e18058ca5cbb4b8ae404b76652b63699 (diff)
Update to 8 word linmul2. Fix word8_madd3
Diffstat (limited to 'modules')
-rw-r--r--modules/mp_ia32_msvc/mp_asmi.h52
1 files changed, 41 insertions, 11 deletions
diff --git a/modules/mp_ia32_msvc/mp_asmi.h b/modules/mp_ia32_msvc/mp_asmi.h
index 9a156f62b..5eaa46eb4 100644
--- a/modules/mp_ia32_msvc/mp_asmi.h
+++ b/modules/mp_ia32_msvc/mp_asmi.h
@@ -220,7 +220,7 @@ __forceinline word word8_sub3(word z[8], const word x[8],
/*************************************************
* Four Word Block Linear Multiplication *
*************************************************/
-inline word word4_linmul2(word x[4], word y, word carry)
+inline word word8_linmul2(word x[8], word y, word carry)
{
__asm
{
@@ -250,7 +250,36 @@ inline word word4_linmul2(word x[4], word y, word carry)
mul [y] //edx(hi):eax(lo)=a*b
add eax,ecx //sum lo carry
adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
mov [esi+12],eax //load a
+
+ mov eax,[esi+16] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+16],eax //load a
+
+ mov eax,[esi+20] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+20],eax //load a
+
+ mov eax,[esi+24] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+24],eax //load a
+
+ mov eax,[esi+28] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov [esi+28],eax //load a
+
mov eax,edx //store carry
}
}
@@ -465,18 +494,19 @@ __forceinline word word8_linmul3(word z[4], const word x[4], word y, word carry)
}
/*************************************************
-* Eight Word Block Multiply-Add *
+* Eight Word Block Multiply/Add *
*************************************************/
-inline void word8_madd3(word z[], word x, const word y[], word* carry)
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
{
- word_madd(x, y[0], z[0], *carry, z + 0, carry);
- word_madd(x, y[1], z[1], *carry, z + 1, carry);
- word_madd(x, y[2], z[2], *carry, z + 2, carry);
- word_madd(x, y[3], z[3], *carry, z + 3, carry);
- word_madd(x, y[4], z[4], *carry, z + 4, carry);
- word_madd(x, y[5], z[5], *carry, z + 5, carry);
- word_madd(x, y[6], z[6], *carry, z + 6, carry);
- word_madd(x, y[7], z[7], *carry, z + 7, carry);
+ z[0] = word_madd3(x[0], y, z[0], &carry);
+ z[1] = word_madd3(x[1], y, z[1], &carry);
+ z[2] = word_madd3(x[2], y, z[2], &carry);
+ z[3] = word_madd3(x[3], y, z[3], &carry);
+ z[4] = word_madd3(x[4], y, z[4], &carry);
+ z[5] = word_madd3(x[5], y, z[5], &carry);
+ z[6] = word_madd3(x[6], y, z[6], &carry);
+ z[7] = word_madd3(x[7], y, z[7], &carry);
+ return carry;
}
/*************************************************