diff options
author | Jack Lloyd <[email protected]> | 2016-07-20 20:29:22 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2016-07-21 14:46:19 -0400 |
commit | 6907e196cbc8fd4a80ebf2443f2a918bb4a19706 (patch) | |
tree | 2fb7ed81df1a1f899ea0136e6e872961b8468e46 /src/lib/math | |
parent | b4da573fc1c241a6badc83f4ec166cbdd5c70c5c (diff) |
Merge asm into single mp_madd.h and mp_asmi.h files
Avoids some cut and paste, also removes the need for special logic in
configure.py for handling mp module specially.
Merge SIMD classes into a single type SIMD_4x32
Diffstat (limited to 'src/lib/math')
-rw-r--r-- | src/lib/math/mp/info.txt | 10 | ||||
-rw-r--r-- | src/lib/math/mp/mp_asmi.h | 820 | ||||
-rw-r--r-- | src/lib/math/mp/mp_core.cpp (renamed from src/lib/math/mp/mp_asm.cpp) | 186 | ||||
-rw-r--r-- | src/lib/math/mp/mp_generic/info.txt | 6 | ||||
-rw-r--r-- | src/lib/math/mp/mp_generic/mp_asmi.h | 203 | ||||
-rw-r--r-- | src/lib/math/mp/mp_madd.h (renamed from src/lib/math/mp/mp_generic/mp_madd.h) | 78 | ||||
-rw-r--r-- | src/lib/math/mp/mp_misc.cpp | 79 | ||||
-rw-r--r-- | src/lib/math/mp/mp_shift.cpp | 133 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_32/info.txt | 18 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_32/mp_asmi.h | 236 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_32/mp_madd.h | 63 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_32_msvc/info.txt | 16 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h | 454 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_64/info.txt | 18 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_64/mp_asmi.h | 244 | ||||
-rw-r--r-- | src/lib/math/mp/mp_x86_64/mp_madd.h | 65 |
16 files changed, 1084 insertions, 1545 deletions
diff --git a/src/lib/math/mp/info.txt b/src/lib/math/mp/info.txt index 6aa0142f3..b5db12648 100644 --- a/src/lib/math/mp/info.txt +++ b/src/lib/math/mp/info.txt @@ -1,12 +1,10 @@ define BIGINT_MP 20151225 <source> -mp_asm.cpp +mp_core.cpp mp_comba.cpp mp_karat.cpp mp_monty.cpp -mp_misc.cpp -mp_shift.cpp </source> <header:public> @@ -15,8 +13,6 @@ mp_types.h <header:internal> mp_core.h +mp_madd.h +mp_asmi.h </header:internal> - -<requires> -mp_x86_64|mp_x86_32|mp_x86_32_msvc|mp_generic -</requires> diff --git a/src/lib/math/mp/mp_asmi.h b/src/lib/math/mp/mp_asmi.h new file mode 100644 index 000000000..afb4d1407 --- /dev/null +++ b/src/lib/math/mp/mp_asmi.h @@ -0,0 +1,820 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include <botan/internal/mp_madd.h> + +namespace Botan { + +#if defined(BOTAN_MP_USE_X86_32_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ + ASM("movl %[carry], 4*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("addl 4*" #INDEX "(%[z]),%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorl %[carry]") \ + CORE_CODE \ + ASM("sbbl %[carry],%[carry]") \ + ASM("negl %[carry]") + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +#endif + +#if defined(ADD_OR_SUBTRACT) + +#define ASM(x) x "\n\t" + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#endif + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; +#endif + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edx,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[esi] + adc [edx],eax + mov eax,[esi+4] + adc [edx+4],eax + mov eax,[esi+8] + adc [edx+8],eax + mov eax,[esi+12] + adc [edx+12],eax + mov eax,[esi+16] + adc [edx+16],eax + mov eax,[esi+20] + adc [edx+20],eax + mov eax,[esi+24] + adc [edx+24],eax + mov eax,[esi+28] + adc [edx+28],eax + sbb eax,eax + neg eax + } + +#else + x[0] = word_add(x[0], y[0], &carry); + x[1] = word_add(x[1], y[1], &carry); + x[2] = word_add(x[2], y[2], &carry); + x[3] = word_add(x[3], y[3], &carry); + x[4] = word_add(x[4], y[4], &carry); + x[5] = word_add(x[5], y[5], &carry); + x[6] = word_add(x[6], y[6], &carry); + x[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + mov ebx,[z] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + adc eax,[esi] + mov [ebx],eax + + mov eax,[edi+4] + adc eax,[esi+4] + mov [ebx+4],eax + + mov eax,[edi+8] + adc eax,[esi+8] + mov [ebx+8],eax + + mov eax,[edi+12] + adc eax,[esi+12] + mov [ebx+12],eax + + mov eax,[edi+16] + adc eax,[esi+16] + mov [ebx+16],eax + + mov eax,[edi+20] + adc eax,[esi+20] + mov [ebx+20],eax + + mov eax,[edi+24] + adc eax,[esi+24] + mov [ebx+24],eax + + mov eax,[edi+28] + adc eax,[esi+28] + mov [ebx+28],eax + + sbb eax,eax + neg eax + } + +#else + z[0] = word_add(x[0], y[0], &carry); + z[1] = word_add(x[1], y[1], &carry); + z[2] = word_add(x[2], y[2], &carry); + z[3] = word_add(x[3], y[3], &carry); + z[4] = word_add(x[4], y[4], &carry); + z[5] = word_add(x[5], y[5], &carry); + z[6] = word_add(x[6], y[6], &carry); + z[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + sbb eax,[esi] + mov [edi],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [edi+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [edi+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [edi+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [edi+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [edi+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [edi+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [edi+28],eax + sbb eax,eax + neg eax + } + +#else + x[0] = word_sub(x[0], y[0], &carry); + x[1] = word_sub(x[1], y[1], &carry); + x[2] = word_sub(x[2], y[2], &carry); + x[3] = word_sub(x[3], y[3], &carry); + x[4] = word_sub(x[4], y[4], &carry); + x[5] = word_sub(x[5], y[5], &carry); + x[6] = word_sub(x[6], y[6], &carry); + x[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov ebx,[z] + mov eax,[edi] + sbb eax,[esi] + mov [ebx],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [ebx+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [ebx+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [ebx+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [ebx+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [ebx+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [ebx+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [ebx+28],eax + sbb eax,eax + neg eax + } + +#else + z[0] = word_sub(x[0], y[0], &carry); + z[1] = word_sub(x[1], y[1], &carry); + z[2] = word_sub(x[2], y[2], &carry); + z[3] = word_sub(x[3], y[3], &carry); + z[4] = word_sub(x[4], y[4], &carry); + z[5] = word_sub(x[5], y[5], &carry); + z[6] = word_sub(x[6], y[6], &carry); + z[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [esi+28],eax //load a + + mov eax,edx //store carry + } + +#else + x[0] = word_madd2(x[0], y, &carry); + x[1] = word_madd2(x[1], y, &carry); + x[2] = word_madd2(x[2], y, &carry); + x[3] = word_madd2(x[3], y, &carry); + x[4] = word_madd2(x[4], y, &carry); + x[5] = word_madd2(x[5], y, &carry); + x[6] = word_madd2(x[6], y, &carry); + x[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[z] + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [edi+28],eax //load a + mov eax,edx //store carry + } + +#else + z[0] = word_madd2(x[0], y, &carry); + z[1] = word_madd2(x[1], y, &carry); + z[2] = word_madd2(x[2], y, &carry); + z[3] = word_madd2(x[3], y, &carry); + z[4] = word_madd2(x[4], y, &carry); + z[5] = word_madd2(x[5], y, &carry); + z[6] = word_madd2(x[6], y, &carry); + z[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += x * y +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = *w0; + *w0 = word_madd2(x, y, &carry); + *w1 += carry; + *w2 += (*w1 < carry) ? 1 : 0; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += 2 * x * y +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = 0; + x = word_madd2(x, y, &carry); + y = carry; + + word top = (y >> (BOTAN_MP_WORD_BITS-1)); + y <<= 1; + y |= (x >> (BOTAN_MP_WORD_BITS-1)); + x <<= 1; + + carry = 0; + *w0 = word_add(*w0, x, &carry); + *w1 = word_add(*w1, y, &carry); + *w2 = word_add(*w2, top, &carry); +#endif + } + +#if defined(ASM) + #undef ASM + #undef DO_8_TIMES + #undef ADD_OR_SUBTRACT + #undef ADDSUB2_OP + #undef ADDSUB3_OP + #undef LINMUL_OP + #undef MULADD_OP +#endif + +} + +#endif diff --git a/src/lib/math/mp/mp_asm.cpp b/src/lib/math/mp/mp_core.cpp index cfbb027d7..2a0b08f67 100644 --- a/src/lib/math/mp/mp_asm.cpp +++ b/src/lib/math/mp/mp_core.cpp @@ -8,7 +8,6 @@ #include <botan/internal/mp_core.h> #include <botan/internal/mp_asmi.h> -#include <botan/internal/mp_core.h> #include <botan/internal/ct_utils.h> #include <botan/exceptn.h> #include <botan/mem_ops.h> @@ -253,4 +252,189 @@ void bigint_linmul3(word z[], const word x[], size_t x_size, word y) z[x_size] = carry; } +/* +* Single Operand Left Shift +*/ +void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) + { + if(word_shift) + { + copy_mem(x + word_shift, x, x_size); + clear_mem(x, word_shift); + } + + if(bit_shift) + { + word carry = 0; + for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) + { + word temp = x[j]; + x[j] = (temp << bit_shift) | carry; + carry = (temp >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Single Operand Right Shift +*/ +void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) + { + if(x_size < word_shift) + { + clear_mem(x, x_size); + return; + } + + if(word_shift) + { + copy_mem(x, x + word_shift, x_size - word_shift); + clear_mem(x + x_size - word_shift, word_shift); + } + + if(bit_shift) + { + word carry = 0; + + size_t top = x_size - word_shift; + + while(top >= 4) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-2]; + x[top-2] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-3]; + x[top-3] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-4]; + x[top-4] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top -= 4; + } + + while(top) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top--; + } + } + } + +/* +* Two Operand Left Shift +*/ +void bigint_shl2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + for(size_t j = 0; j != x_size; ++j) + y[j + word_shift] = x[j]; + if(bit_shift) + { + word carry = 0; + for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) + { + word w = y[j]; + y[j] = (w << bit_shift) | carry; + carry = (w >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Two Operand Right Shift +*/ +void bigint_shr2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + if(x_size < word_shift) return; + + for(size_t j = 0; j != x_size - word_shift; ++j) + y[j] = x[j + word_shift]; + if(bit_shift) + { + word carry = 0; + for(size_t j = x_size - word_shift; j > 0; --j) + { + word w = y[j-1]; + y[j-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Compare two MP integers +*/ +s32bit bigint_cmp(const word x[], size_t x_size, + const word y[], size_t y_size) + { + if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } + + while(x_size > y_size) + { + if(x[x_size-1]) + return 1; + x_size--; + } + + for(size_t i = x_size; i > 0; --i) + { + if(x[i-1] > y[i-1]) + return 1; + if(x[i-1] < y[i-1]) + return -1; + } + + return 0; + } + +/* +* Do a 2-word/1-word Division +*/ +word bigint_divop(word n1, word n0, word d) + { + if(d == 0) + throw Invalid_Argument("bigint_divop divide by zero"); + + word high = n1 % d, quotient = 0; + + for(size_t i = 0; i != MP_WORD_BITS; ++i) + { + word high_top_bit = (high & MP_WORD_TOP_BIT); + + high <<= 1; + high |= (n0 >> (MP_WORD_BITS-1-i)) & 1; + quotient <<= 1; + + if(high_top_bit || high >= d) + { + high -= d; + quotient |= 1; + } + } + + return quotient; + } + +/* +* Do a 2-word/1-word Modulo +*/ +word bigint_modop(word n1, word n0, word d) + { + word z = bigint_divop(n1, n0, d); + word dummy = 0; + z = word_madd2(z, d, &dummy); + return (n0-z); + } + } diff --git a/src/lib/math/mp/mp_generic/info.txt b/src/lib/math/mp/mp_generic/info.txt deleted file mode 100644 index c87dd00ca..000000000 --- a/src/lib/math/mp/mp_generic/info.txt +++ /dev/null @@ -1,6 +0,0 @@ -load_on dep - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> diff --git a/src/lib/math/mp/mp_generic/mp_asmi.h b/src/lib/math/mp/mp_generic/mp_asmi.h deleted file mode 100644 index 708afdfa0..000000000 --- a/src/lib/math/mp/mp_generic/mp_asmi.h +++ /dev/null @@ -1,203 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - x[0] = word_add(x[0], y[0], &carry); - x[1] = word_add(x[1], y[1], &carry); - x[2] = word_add(x[2], y[2], &carry); - x[3] = word_add(x[3], y[3], &carry); - x[4] = word_add(x[4], y[4], &carry); - x[5] = word_add(x[5], y[5], &carry); - x[6] = word_add(x[6], y[6], &carry); - x[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_add(x[0], y[0], &carry); - z[1] = word_add(x[1], y[1], &carry); - z[2] = word_add(x[2], y[2], &carry); - z[3] = word_add(x[3], y[3], &carry); - z[4] = word_add(x[4], y[4], &carry); - z[5] = word_add(x[5], y[5], &carry); - z[6] = word_add(x[6], y[6], &carry); - z[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - x[0] = word_sub(x[0], y[0], &carry); - x[1] = word_sub(x[1], y[1], &carry); - x[2] = word_sub(x[2], y[2], &carry); - x[3] = word_sub(x[3], y[3], &carry); - x[4] = word_sub(x[4], y[4], &carry); - x[5] = word_sub(x[5], y[5], &carry); - x[6] = word_sub(x[6], y[6], &carry); - x[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_sub(x[0], y[0], &carry); - z[1] = word_sub(x[1], y[1], &carry); - z[2] = word_sub(x[2], y[2], &carry); - z[3] = word_sub(x[3], y[3], &carry); - z[4] = word_sub(x[4], y[4], &carry); - z[5] = word_sub(x[5], y[5], &carry); - z[6] = word_sub(x[6], y[6], &carry); - z[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - x[0] = word_madd2(x[0], y, &carry); - x[1] = word_madd2(x[1], y, &carry); - x[2] = word_madd2(x[2], y, &carry); - x[3] = word_madd2(x[3], y, &carry); - x[4] = word_madd2(x[4], y, &carry); - x[5] = word_madd2(x[5], y, &carry); - x[6] = word_madd2(x[6], y, &carry); - x[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd2(x[0], y, &carry); - z[1] = word_madd2(x[1], y, &carry); - z[2] = word_madd2(x[2], y, &carry); - z[3] = word_madd2(x[3], y, &carry); - z[4] = word_madd2(x[4], y, &carry); - z[5] = word_madd2(x[5], y, &carry); - z[6] = word_madd2(x[6], y, &carry); - z[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_generic/mp_madd.h b/src/lib/math/mp/mp_madd.h index 95a1069a4..0567622d9 100644 --- a/src/lib/math/mp/mp_generic/mp_madd.h +++ b/src/lib/math/mp/mp_madd.h @@ -35,12 +35,52 @@ namespace Botan { #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 #endif +#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) + + #if defined(BOTAN_USE_GCC_INLINE_ASM) + #define BOTAN_MP_USE_X86_32_ASM + #define ASM(x) x "\n\t" + #elif defined(BOTAN_TARGET_COMPILER_IS_MSVC) + #define BOTAN_MP_USE_X86_32_MSVC_ASM + #endif + +#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) && (BOTAN_USE_GCC_INLINE_ASM) + #define BOTAN_MP_USE_X86_64_ASM + #define ASM(x) x "\n\t" +#endif + +#if defined(BOTAN_MP_USE_X86_32_ASM) || defined(BOTAN_MP_USE_X86_64_ASM) + #define ASM(x) x "\n\t" +#endif + /* * Word Multiply/Add */ inline word word_madd2(word a, word b, word* c) { -#if defined(BOTAN_HAS_MP_DWORD) +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[b]") + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + ASM("mulq %[b]") + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) const dword s = static_cast<dword>(a) * b + *c; *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); return static_cast<word>(s); @@ -64,7 +104,37 @@ inline word word_madd2(word a, word b, word* c) */ inline word word_madd3(word a, word b, word c, word* d) { -#if defined(BOTAN_HAS_MP_DWORD) +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[b]") + + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + ASM("addl %[d],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + ASM("mulq %[b]") + + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + ASM("addq %[d],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) const dword s = static_cast<dword>(a) * b + c + *d; *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); return static_cast<word>(s); @@ -86,6 +156,10 @@ inline word word_madd3(word a, word b, word c, word* d) #endif } +#if defined(ASM) + #undef ASM +#endif + } #endif diff --git a/src/lib/math/mp/mp_misc.cpp b/src/lib/math/mp/mp_misc.cpp deleted file mode 100644 index 768543a64..000000000 --- a/src/lib/math/mp/mp_misc.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* -* MP Misc Functions -* (C) 1999-2008 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#include <botan/internal/mp_core.h> -#include <botan/internal/mp_madd.h> -#include <botan/exceptn.h> - -namespace Botan { - -/* -* Compare two MP integers -*/ -s32bit bigint_cmp(const word x[], size_t x_size, - const word y[], size_t y_size) - { - if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } - - while(x_size > y_size) - { - if(x[x_size-1]) - return 1; - x_size--; - } - - for(size_t i = x_size; i > 0; --i) - { - if(x[i-1] > y[i-1]) - return 1; - if(x[i-1] < y[i-1]) - return -1; - } - - return 0; - } - -/* -* Do a 2-word/1-word Division -*/ -word bigint_divop(word n1, word n0, word d) - { - if(d == 0) - throw Invalid_Argument("bigint_divop divide by zero"); - - word high = n1 % d, quotient = 0; - - for(size_t i = 0; i != MP_WORD_BITS; ++i) - { - word high_top_bit = (high & MP_WORD_TOP_BIT); - - high <<= 1; - high |= (n0 >> (MP_WORD_BITS-1-i)) & 1; - quotient <<= 1; - - if(high_top_bit || high >= d) - { - high -= d; - quotient |= 1; - } - } - - return quotient; - } - -/* -* Do a 2-word/1-word Modulo -*/ -word bigint_modop(word n1, word n0, word d) - { - word z = bigint_divop(n1, n0, d); - word dummy = 0; - z = word_madd2(z, d, &dummy); - return (n0-z); - } - -} diff --git a/src/lib/math/mp/mp_shift.cpp b/src/lib/math/mp/mp_shift.cpp deleted file mode 100644 index 1850888a0..000000000 --- a/src/lib/math/mp/mp_shift.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* -* MP Shift Algorithms -* (C) 1999-2007,2014 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> - -namespace Botan { - -/* -* Single Operand Left Shift -*/ -void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) - { - if(word_shift) - { - copy_mem(x + word_shift, x, x_size); - clear_mem(x, word_shift); - } - - if(bit_shift) - { - word carry = 0; - for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) - { - word temp = x[j]; - x[j] = (temp << bit_shift) | carry; - carry = (temp >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Single Operand Right Shift -*/ -void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) - { - if(x_size < word_shift) - { - clear_mem(x, x_size); - return; - } - - if(word_shift) - { - copy_mem(x, x + word_shift, x_size - word_shift); - clear_mem(x + x_size - word_shift, word_shift); - } - - if(bit_shift) - { - word carry = 0; - - size_t top = x_size - word_shift; - - while(top >= 4) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-2]; - x[top-2] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-3]; - x[top-3] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-4]; - x[top-4] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top -= 4; - } - - while(top) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top--; - } - } - } - -/* -* Two Operand Left Shift -*/ -void bigint_shl2(word y[], const word x[], size_t x_size, - size_t word_shift, size_t bit_shift) - { - for(size_t j = 0; j != x_size; ++j) - y[j + word_shift] = x[j]; - if(bit_shift) - { - word carry = 0; - for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) - { - word w = y[j]; - y[j] = (w << bit_shift) | carry; - carry = (w >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Two Operand Right Shift -*/ -void bigint_shr2(word y[], const word x[], size_t x_size, - size_t word_shift, size_t bit_shift) - { - if(x_size < word_shift) return; - - for(size_t j = 0; j != x_size - word_shift; ++j) - y[j] = x[j + word_shift]; - if(bit_shift) - { - word carry = 0; - for(size_t j = x_size - word_shift; j > 0; --j) - { - word w = y[j-1]; - y[j-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - } - } - } - -} diff --git a/src/lib/math/mp/mp_x86_32/info.txt b/src/lib/math/mp/mp_x86_32/info.txt deleted file mode 100644 index f36abaf62..000000000 --- a/src/lib/math/mp/mp_x86_32/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 32 - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_32 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/lib/math/mp/mp_x86_32/mp_asmi.h b/src/lib/math/mp/mp_x86_32/mp_asmi.h deleted file mode 100644 index 95af89fc0..000000000 --- a/src/lib/math/mp/mp_x86_32/mp_asmi.h +++ /dev/null @@ -1,236 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Helper Macros for x86 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ - ASM("movl %[carry], 4*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("addl 4*" #INDEX "(%[z]),%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorl %[carry]") \ - CORE_CODE \ - ASM("sbbl %[carry],%[carry]") \ - ASM("negl %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_32/mp_madd.h b/src/lib/math/mp/mp_x86_32/mp_madd.h deleted file mode 100644 index 9c0990398..000000000 --- a/src/lib/math/mp/mp_x86_32/mp_madd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_WORD_MULADD_H__ -#define BOTAN_MP_WORD_MULADD_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 32) - #error The mp_x86_32 module requires that BOTAN_MP_WORD_BITS == 32 -#endif - -namespace Botan { - -/* -* Helper Macros for x86 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mull %[b]") - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mull %[b]") - - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - ASM("addl %[d],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_32_msvc/info.txt b/src/lib/math/mp/mp_x86_32_msvc/info.txt deleted file mode 100644 index 3029d6a61..000000000 --- a/src/lib/math/mp/mp_x86_32_msvc/info.txt +++ /dev/null @@ -1,16 +0,0 @@ -mp_bits 32 - -load_on dep - -<header:internal> -mp_generic:mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_32 -</arch> - -<cc> -msvc -</cc> diff --git a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h b/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h deleted file mode 100644 index 92bf7980d..000000000 --- a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h +++ /dev/null @@ -1,454 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - __asm { - mov edx,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[esi] - adc [edx],eax - mov eax,[esi+4] - adc [edx+4],eax - mov eax,[esi+8] - adc [edx+8],eax - mov eax,[esi+12] - adc [edx+12],eax - mov eax,[esi+16] - adc [edx+16],eax - mov eax,[esi+20] - adc [edx+20],eax - mov eax,[esi+24] - adc [edx+24],eax - mov eax,[esi+28] - adc [edx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - mov ebx,[z] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - adc eax,[esi] - mov [ebx],eax - - mov eax,[edi+4] - adc eax,[esi+4] - mov [ebx+4],eax - - mov eax,[edi+8] - adc eax,[esi+8] - mov [ebx+8],eax - - mov eax,[edi+12] - adc eax,[esi+12] - mov [ebx+12],eax - - mov eax,[edi+16] - adc eax,[esi+16] - mov [ebx+16],eax - - mov eax,[edi+20] - adc eax,[esi+20] - mov [ebx+20],eax - - mov eax,[edi+24] - adc eax,[esi+24] - mov [ebx+24],eax - - mov eax,[edi+28] - adc eax,[esi+28] - mov [ebx+28],eax - - sbb eax,eax - neg eax - } - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - sbb eax,[esi] - mov [edi],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [edi+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [edi+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [edi+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [edi+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [edi+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [edi+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [edi+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov ebx,[z] - mov eax,[edi] - sbb eax,[esi] - mov [ebx],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [ebx+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [ebx+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [ebx+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [ebx+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [ebx+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [ebx+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [ebx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - __asm { - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [esi+28],eax //load a - - mov eax,edx //store carry - } - } - -inline word word8_linmul3(word z[4], const word x[4], word y, word carry) - { - __asm { -#if 0 - //it's slower!!! - mov edx,[z] - mov eax,[x] - movd mm7,[y] - - movd mm0,[eax] - movd mm1,[eax+4] - movd mm2,[eax+8] - pmuludq mm0,mm7 - pmuludq mm1,mm7 - pmuludq mm2,mm7 - - movd mm6,[carry] - paddq mm0,mm6 - movd [edx],mm0 - - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+4],mm1 - - movd mm3,[eax+12] - psrlq mm1,32 - paddq mm2,mm1 - movd [edx+8],mm2 - - pmuludq mm3,mm7 - movd mm4,[eax+16] - psrlq mm2,32 - paddq mm3,mm2 - movd [edx+12],mm3 - - pmuludq mm4,mm7 - movd mm5,[eax+20] - psrlq mm3,32 - paddq mm4,mm3 - movd [edx+16],mm4 - - pmuludq mm5,mm7 - movd mm0,[eax+24] - psrlq mm4,32 - paddq mm5,mm4 - movd [edx+20],mm5 - - pmuludq mm0,mm7 - movd mm1,[eax+28] - psrlq mm5,32 - paddq mm0,mm5 - movd [edx+24],mm0 - - pmuludq mm1,mm7 - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+28],mm1 - psrlq mm1,32 - - movd eax,mm1 - emms -#else - mov edi,[z] - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [edi+28],eax //load a - mov eax,edx //store carry -#endif - } - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_64/info.txt b/src/lib/math/mp/mp_x86_64/info.txt deleted file mode 100644 index 75c42ddc1..000000000 --- a/src/lib/math/mp/mp_x86_64/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 64 - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_64 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/lib/math/mp/mp_x86_64/mp_asmi.h b/src/lib/math/mp/mp_x86_64/mp_asmi.h deleted file mode 100644 index cd5884867..000000000 --- a/src/lib/math/mp/mp_x86_64/mp_asmi.h +++ /dev/null @@ -1,244 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Helper Macros for x86-64 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ - ASM("movq %[carry], 8*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("addq 8*" #INDEX "(%[z]),%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorq %[carry]") \ - CORE_CODE \ - ASM("sbbq %[carry],%[carry]") \ - ASM("negq %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -#undef ASM -#undef DO_8_TIMES -#undef ADD_OR_SUBTRACT -#undef ADDSUB2_OP -#undef ADDSUB3_OP -#undef LINMUL_OP -#undef MULADD_OP - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_64/mp_madd.h b/src/lib/math/mp/mp_x86_64/mp_madd.h deleted file mode 100644 index 6f9185dc0..000000000 --- a/src/lib/math/mp/mp_x86_64/mp_madd.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_WORD_MULADD_H__ -#define BOTAN_MP_WORD_MULADD_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_x86_64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -namespace Botan { - -/* -* Helper Macros for x86-64 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mulq %[b]") - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mulq %[b]") - - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - ASM("addq %[d],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -#undef ASM - -} - -#endif |