aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/math
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2016-07-20 20:29:22 -0400
committerJack Lloyd <[email protected]>2016-07-21 14:46:19 -0400
commit6907e196cbc8fd4a80ebf2443f2a918bb4a19706 (patch)
tree2fb7ed81df1a1f899ea0136e6e872961b8468e46 /src/lib/math
parentb4da573fc1c241a6badc83f4ec166cbdd5c70c5c (diff)
Merge asm into single mp_madd.h and mp_asmi.h files
Avoids some cut and paste, also removes the need for special logic in configure.py for handling mp module specially. Merge SIMD classes into a single type SIMD_4x32
Diffstat (limited to 'src/lib/math')
-rw-r--r--src/lib/math/mp/info.txt10
-rw-r--r--src/lib/math/mp/mp_asmi.h820
-rw-r--r--src/lib/math/mp/mp_core.cpp (renamed from src/lib/math/mp/mp_asm.cpp)186
-rw-r--r--src/lib/math/mp/mp_generic/info.txt6
-rw-r--r--src/lib/math/mp/mp_generic/mp_asmi.h203
-rw-r--r--src/lib/math/mp/mp_madd.h (renamed from src/lib/math/mp/mp_generic/mp_madd.h)78
-rw-r--r--src/lib/math/mp/mp_misc.cpp79
-rw-r--r--src/lib/math/mp/mp_shift.cpp133
-rw-r--r--src/lib/math/mp/mp_x86_32/info.txt18
-rw-r--r--src/lib/math/mp/mp_x86_32/mp_asmi.h236
-rw-r--r--src/lib/math/mp/mp_x86_32/mp_madd.h63
-rw-r--r--src/lib/math/mp/mp_x86_32_msvc/info.txt16
-rw-r--r--src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h454
-rw-r--r--src/lib/math/mp/mp_x86_64/info.txt18
-rw-r--r--src/lib/math/mp/mp_x86_64/mp_asmi.h244
-rw-r--r--src/lib/math/mp/mp_x86_64/mp_madd.h65
16 files changed, 1084 insertions, 1545 deletions
diff --git a/src/lib/math/mp/info.txt b/src/lib/math/mp/info.txt
index 6aa0142f3..b5db12648 100644
--- a/src/lib/math/mp/info.txt
+++ b/src/lib/math/mp/info.txt
@@ -1,12 +1,10 @@
define BIGINT_MP 20151225
<source>
-mp_asm.cpp
+mp_core.cpp
mp_comba.cpp
mp_karat.cpp
mp_monty.cpp
-mp_misc.cpp
-mp_shift.cpp
</source>
<header:public>
@@ -15,8 +13,6 @@ mp_types.h
<header:internal>
mp_core.h
+mp_madd.h
+mp_asmi.h
</header:internal>
-
-<requires>
-mp_x86_64|mp_x86_32|mp_x86_32_msvc|mp_generic
-</requires>
diff --git a/src/lib/math/mp/mp_asmi.h b/src/lib/math/mp/mp_asmi.h
new file mode 100644
index 000000000..afb4d1407
--- /dev/null
+++ b/src/lib/math/mp/mp_asmi.h
@@ -0,0 +1,820 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+* 2006 Luca Piccarreta
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/internal/mp_madd.h>
+
+namespace Botan {
+
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+
+#define ADDSUB2_OP(OPERATION, INDEX) \
+ ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
+ ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
+
+#define ADDSUB3_OP(OPERATION, INDEX) \
+ ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
+ ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
+ ASM("movl %[carry], 4*" #INDEX "(%[z])") \
+
+#define LINMUL_OP(WRITE_TO, INDEX) \
+ ASM("movl 4*" #INDEX "(%[x]),%%eax") \
+ ASM("mull %[y]") \
+ ASM("addl %[carry],%%eax") \
+ ASM("adcl $0,%%edx") \
+ ASM("movl %%edx,%[carry]") \
+ ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
+
+#define MULADD_OP(IGNORED, INDEX) \
+ ASM("movl 4*" #INDEX "(%[x]),%%eax") \
+ ASM("mull %[y]") \
+ ASM("addl %[carry],%%eax") \
+ ASM("adcl $0,%%edx") \
+ ASM("addl 4*" #INDEX "(%[z]),%%eax") \
+ ASM("adcl $0,%%edx") \
+ ASM("movl %%edx,%[carry]") \
+ ASM("movl %%eax, 4*" #INDEX " (%[z])")
+
+#define ADD_OR_SUBTRACT(CORE_CODE) \
+ ASM("rorl %[carry]") \
+ CORE_CODE \
+ ASM("sbbl %[carry],%[carry]") \
+ ASM("negl %[carry]")
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+#define ADDSUB2_OP(OPERATION, INDEX) \
+ ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
+ ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
+
+#define ADDSUB3_OP(OPERATION, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
+ ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
+ ASM("movq %[carry], 8*" #INDEX "(%[z])") \
+
+#define LINMUL_OP(WRITE_TO, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]),%%rax") \
+ ASM("mulq %[y]") \
+ ASM("addq %[carry],%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("movq %%rdx,%[carry]") \
+ ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
+
+#define MULADD_OP(IGNORED, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]),%%rax") \
+ ASM("mulq %[y]") \
+ ASM("addq %[carry],%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("addq 8*" #INDEX "(%[z]),%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("movq %%rdx,%[carry]") \
+ ASM("movq %%rax, 8*" #INDEX " (%[z])")
+
+#define ADD_OR_SUBTRACT(CORE_CODE) \
+ ASM("rorq %[carry]") \
+ CORE_CODE \
+ ASM("sbbq %[carry],%[carry]") \
+ ASM("negq %[carry]")
+
+#endif
+
+#if defined(ADD_OR_SUBTRACT)
+
+#define ASM(x) x "\n\t"
+
+#define DO_8_TIMES(MACRO, ARG) \
+ MACRO(ARG, 0) \
+ MACRO(ARG, 1) \
+ MACRO(ARG, 2) \
+ MACRO(ARG, 3) \
+ MACRO(ARG, 4) \
+ MACRO(ARG, 5) \
+ MACRO(ARG, 6) \
+ MACRO(ARG, 7)
+
+#endif
+
+/*
+* Word Addition
+*/
+inline word word_add(word x, word y, word* carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+
+#else
+ word z = x + y;
+ word c1 = (z < x);
+ z += *carry;
+ *carry = c1 | (z < *carry);
+ return z;
+#endif
+ }
+
+/*
+* Eight Word Block Addition, Two Argument
+*/
+inline word word8_add2(word x[8], const word y[8], word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov edx,[x]
+ mov esi,[y]
+ xor eax,eax
+ sub eax,[carry] //force CF=1 iff *carry==1
+ mov eax,[esi]
+ adc [edx],eax
+ mov eax,[esi+4]
+ adc [edx+4],eax
+ mov eax,[esi+8]
+ adc [edx+8],eax
+ mov eax,[esi+12]
+ adc [edx+12],eax
+ mov eax,[esi+16]
+ adc [edx+16],eax
+ mov eax,[esi+20]
+ adc [edx+20],eax
+ mov eax,[esi+24]
+ adc [edx+24],eax
+ mov eax,[esi+28]
+ adc [edx+28],eax
+ sbb eax,eax
+ neg eax
+ }
+
+#else
+ x[0] = word_add(x[0], y[0], &carry);
+ x[1] = word_add(x[1], y[1], &carry);
+ x[2] = word_add(x[2], y[2], &carry);
+ x[3] = word_add(x[3], y[3], &carry);
+ x[4] = word_add(x[4], y[4], &carry);
+ x[5] = word_add(x[5], y[5], &carry);
+ x[6] = word_add(x[6], y[6], &carry);
+ x[7] = word_add(x[7], y[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Addition, Three Argument
+*/
+inline word word8_add3(word z[8], const word x[8],
+ const word y[8], word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov edi,[x]
+ mov esi,[y]
+ mov ebx,[z]
+ xor eax,eax
+ sub eax,[carry] //force CF=1 iff *carry==1
+ mov eax,[edi]
+ adc eax,[esi]
+ mov [ebx],eax
+
+ mov eax,[edi+4]
+ adc eax,[esi+4]
+ mov [ebx+4],eax
+
+ mov eax,[edi+8]
+ adc eax,[esi+8]
+ mov [ebx+8],eax
+
+ mov eax,[edi+12]
+ adc eax,[esi+12]
+ mov [ebx+12],eax
+
+ mov eax,[edi+16]
+ adc eax,[esi+16]
+ mov [ebx+16],eax
+
+ mov eax,[edi+20]
+ adc eax,[esi+20]
+ mov [ebx+20],eax
+
+ mov eax,[edi+24]
+ adc eax,[esi+24]
+ mov [ebx+24],eax
+
+ mov eax,[edi+28]
+ adc eax,[esi+28]
+ mov [ebx+28],eax
+
+ sbb eax,eax
+ neg eax
+ }
+
+#else
+ z[0] = word_add(x[0], y[0], &carry);
+ z[1] = word_add(x[1], y[1], &carry);
+ z[2] = word_add(x[2], y[2], &carry);
+ z[3] = word_add(x[3], y[3], &carry);
+ z[4] = word_add(x[4], y[4], &carry);
+ z[5] = word_add(x[5], y[5], &carry);
+ z[6] = word_add(x[6], y[6], &carry);
+ z[7] = word_add(x[7], y[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Word Subtraction
+*/
+inline word word_sub(word x, word y, word* carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+
+#else
+ word t0 = x - y;
+ word c1 = (t0 > x);
+ word z = t0 - *carry;
+ *carry = c1 | (z > t0);
+ return z;
+#endif
+ }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov edi,[x]
+ mov esi,[y]
+ xor eax,eax
+ sub eax,[carry] //force CF=1 iff *carry==1
+ mov eax,[edi]
+ sbb eax,[esi]
+ mov [edi],eax
+ mov eax,[edi+4]
+ sbb eax,[esi+4]
+ mov [edi+4],eax
+ mov eax,[edi+8]
+ sbb eax,[esi+8]
+ mov [edi+8],eax
+ mov eax,[edi+12]
+ sbb eax,[esi+12]
+ mov [edi+12],eax
+ mov eax,[edi+16]
+ sbb eax,[esi+16]
+ mov [edi+16],eax
+ mov eax,[edi+20]
+ sbb eax,[esi+20]
+ mov [edi+20],eax
+ mov eax,[edi+24]
+ sbb eax,[esi+24]
+ mov [edi+24],eax
+ mov eax,[edi+28]
+ sbb eax,[esi+28]
+ mov [edi+28],eax
+ sbb eax,eax
+ neg eax
+ }
+
+#else
+ x[0] = word_sub(x[0], y[0], &carry);
+ x[1] = word_sub(x[1], y[1], &carry);
+ x[2] = word_sub(x[2], y[2], &carry);
+ x[3] = word_sub(x[3], y[3], &carry);
+ x[4] = word_sub(x[4], y[4], &carry);
+ x[5] = word_sub(x[5], y[5], &carry);
+ x[6] = word_sub(x[6], y[6], &carry);
+ x[7] = word_sub(x[7], y[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2_rev(word x[8], const word y[8], word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
+ : [carry]"=r"(carry)
+ : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#else
+ x[0] = word_sub(y[0], x[0], &carry);
+ x[1] = word_sub(y[1], x[1], &carry);
+ x[2] = word_sub(y[2], x[2], &carry);
+ x[3] = word_sub(y[3], x[3], &carry);
+ x[4] = word_sub(y[4], x[4], &carry);
+ x[5] = word_sub(y[5], x[5], &carry);
+ x[6] = word_sub(y[6], x[6], &carry);
+ x[7] = word_sub(y[7], x[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Subtraction, Three Argument
+*/
+inline word word8_sub3(word z[8], const word x[8],
+ const word y[8], word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov edi,[x]
+ mov esi,[y]
+ xor eax,eax
+ sub eax,[carry] //force CF=1 iff *carry==1
+ mov ebx,[z]
+ mov eax,[edi]
+ sbb eax,[esi]
+ mov [ebx],eax
+ mov eax,[edi+4]
+ sbb eax,[esi+4]
+ mov [ebx+4],eax
+ mov eax,[edi+8]
+ sbb eax,[esi+8]
+ mov [ebx+8],eax
+ mov eax,[edi+12]
+ sbb eax,[esi+12]
+ mov [ebx+12],eax
+ mov eax,[edi+16]
+ sbb eax,[esi+16]
+ mov [ebx+16],eax
+ mov eax,[edi+20]
+ sbb eax,[esi+20]
+ mov [ebx+20],eax
+ mov eax,[edi+24]
+ sbb eax,[esi+24]
+ mov [ebx+24],eax
+ mov eax,[edi+28]
+ sbb eax,[esi+28]
+ mov [ebx+28],eax
+ sbb eax,eax
+ neg eax
+ }
+
+#else
+ z[0] = word_sub(x[0], y[0], &carry);
+ z[1] = word_sub(x[1], y[1], &carry);
+ z[2] = word_sub(x[2], y[2], &carry);
+ z[3] = word_sub(x[3], y[3], &carry);
+ z[4] = word_sub(x[4], y[4], &carry);
+ z[5] = word_sub(x[5], y[5], &carry);
+ z[6] = word_sub(x[6], y[6], &carry);
+ z[7] = word_sub(x[7], y[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul2(word x[8], word y, word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ DO_8_TIMES(LINMUL_OP, "x")
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%eax", "%edx");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ DO_8_TIMES(LINMUL_OP, "x")
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov esi,[x]
+ mov eax,[esi] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,[carry] //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi],eax //load a
+
+ mov eax,[esi+4] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+4],eax //load a
+
+ mov eax,[esi+8] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+8],eax //load a
+
+ mov eax,[esi+12] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+12],eax //load a
+
+ mov eax,[esi+16] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+16],eax //load a
+
+ mov eax,[esi+20] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+20],eax //load a
+
+ mov eax,[esi+24] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [esi+24],eax //load a
+
+ mov eax,[esi+28] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov [esi+28],eax //load a
+
+ mov eax,edx //store carry
+ }
+
+#else
+ x[0] = word_madd2(x[0], y, &carry);
+ x[1] = word_madd2(x[1], y, &carry);
+ x[2] = word_madd2(x[2], y, &carry);
+ x[3] = word_madd2(x[3], y, &carry);
+ x[4] = word_madd2(x[4], y, &carry);
+ x[5] = word_madd2(x[5], y, &carry);
+ x[6] = word_madd2(x[6], y, &carry);
+ x[7] = word_madd2(x[7], y, &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ DO_8_TIMES(LINMUL_OP, "z")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%eax", "%edx");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+ asm(
+ DO_8_TIMES(LINMUL_OP, "z")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
+
+ __asm {
+ mov edi,[z]
+ mov esi,[x]
+ mov eax,[esi] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,[carry] //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi],eax //load a
+
+ mov eax,[esi+4] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+4],eax //load a
+
+ mov eax,[esi+8] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+8],eax //load a
+
+ mov eax,[esi+12] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+12],eax //load a
+
+ mov eax,[esi+16] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+16],eax //load a
+
+ mov eax,[esi+20] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+20],eax //load a
+
+ mov eax,[esi+24] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov ecx,edx //store carry
+ mov [edi+24],eax //load a
+
+ mov eax,[esi+28] //load a
+ mul [y] //edx(hi):eax(lo)=a*b
+ add eax,ecx //sum lo carry
+ adc edx,0 //sum hi carry
+ mov [edi+28],eax //load a
+ mov eax,edx //store carry
+ }
+
+#else
+ z[0] = word_madd2(x[0], y, &carry);
+ z[1] = word_madd2(x[1], y, &carry);
+ z[2] = word_madd2(x[2], y, &carry);
+ z[3] = word_madd2(x[3], y, &carry);
+ z[4] = word_madd2(x[4], y, &carry);
+ z[5] = word_madd2(x[5], y, &carry);
+ z[6] = word_madd2(x[6], y, &carry);
+ z[7] = word_madd2(x[7], y, &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Eight Word Block Multiply/Add
+*/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ DO_8_TIMES(MULADD_OP, "")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%eax", "%edx");
+ return carry;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ DO_8_TIMES(MULADD_OP, "")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+
+#else
+ z[0] = word_madd3(x[0], y, z[0], &carry);
+ z[1] = word_madd3(x[1], y, z[1], &carry);
+ z[2] = word_madd3(x[2], y, z[2], &carry);
+ z[3] = word_madd3(x[3], y, z[3], &carry);
+ z[4] = word_madd3(x[4], y, z[4], &carry);
+ z[5] = word_madd3(x[5], y, z[5], &carry);
+ z[6] = word_madd3(x[6], y, z[6], &carry);
+ z[7] = word_madd3(x[7], y, z[7], &carry);
+ return carry;
+#endif
+ }
+
+/*
+* Multiply-Add Accumulator
+* (w2,w1,w0) += x * y
+*/
+inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ASM("mull %[y]")
+
+ ASM("addl %[x],%[w0]")
+ ASM("adcl %[y],%[w1]")
+ ASM("adcl $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ASM("mulq %[y]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+
+#else
+ word carry = *w0;
+ *w0 = word_madd2(x, y, &carry);
+ *w1 += carry;
+ *w2 += (*w1 < carry) ? 1 : 0;
+#endif
+ }
+
+/*
+* Multiply-Add Accumulator
+* (w2,w1,w0) += 2 * x * y
+*/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
+ {
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ASM("mull %[y]")
+
+ ASM("addl %[x],%[w0]")
+ ASM("adcl %[y],%[w1]")
+ ASM("adcl $0,%[w2]")
+
+ ASM("addl %[x],%[w0]")
+ ASM("adcl %[y],%[w1]")
+ ASM("adcl $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+
+ asm(
+ ASM("mulq %[y]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+
+#else
+ word carry = 0;
+ x = word_madd2(x, y, &carry);
+ y = carry;
+
+ word top = (y >> (BOTAN_MP_WORD_BITS-1));
+ y <<= 1;
+ y |= (x >> (BOTAN_MP_WORD_BITS-1));
+ x <<= 1;
+
+ carry = 0;
+ *w0 = word_add(*w0, x, &carry);
+ *w1 = word_add(*w1, y, &carry);
+ *w2 = word_add(*w2, top, &carry);
+#endif
+ }
+
+#if defined(ASM)
+ #undef ASM
+ #undef DO_8_TIMES
+ #undef ADD_OR_SUBTRACT
+ #undef ADDSUB2_OP
+ #undef ADDSUB3_OP
+ #undef LINMUL_OP
+ #undef MULADD_OP
+#endif
+
+}
+
+#endif
diff --git a/src/lib/math/mp/mp_asm.cpp b/src/lib/math/mp/mp_core.cpp
index cfbb027d7..2a0b08f67 100644
--- a/src/lib/math/mp/mp_asm.cpp
+++ b/src/lib/math/mp/mp_core.cpp
@@ -8,7 +8,6 @@
#include <botan/internal/mp_core.h>
#include <botan/internal/mp_asmi.h>
-#include <botan/internal/mp_core.h>
#include <botan/internal/ct_utils.h>
#include <botan/exceptn.h>
#include <botan/mem_ops.h>
@@ -253,4 +252,189 @@ void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
z[x_size] = carry;
}
+/*
+* Single Operand Left Shift
+*/
+void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift)
+ {
+ if(word_shift)
+ {
+ copy_mem(x + word_shift, x, x_size);
+ clear_mem(x, word_shift);
+ }
+
+ if(bit_shift)
+ {
+ word carry = 0;
+ for(size_t j = word_shift; j != x_size + word_shift + 1; ++j)
+ {
+ word temp = x[j];
+ x[j] = (temp << bit_shift) | carry;
+ carry = (temp >> (MP_WORD_BITS - bit_shift));
+ }
+ }
+ }
+
+/*
+* Single Operand Right Shift
+*/
+void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift)
+ {
+ if(x_size < word_shift)
+ {
+ clear_mem(x, x_size);
+ return;
+ }
+
+ if(word_shift)
+ {
+ copy_mem(x, x + word_shift, x_size - word_shift);
+ clear_mem(x + x_size - word_shift, word_shift);
+ }
+
+ if(bit_shift)
+ {
+ word carry = 0;
+
+ size_t top = x_size - word_shift;
+
+ while(top >= 4)
+ {
+ word w = x[top-1];
+ x[top-1] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+
+ w = x[top-2];
+ x[top-2] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+
+ w = x[top-3];
+ x[top-3] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+
+ w = x[top-4];
+ x[top-4] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+
+ top -= 4;
+ }
+
+ while(top)
+ {
+ word w = x[top-1];
+ x[top-1] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+
+ top--;
+ }
+ }
+ }
+
+/*
+* Two Operand Left Shift
+*/
+void bigint_shl2(word y[], const word x[], size_t x_size,
+ size_t word_shift, size_t bit_shift)
+ {
+ for(size_t j = 0; j != x_size; ++j)
+ y[j + word_shift] = x[j];
+ if(bit_shift)
+ {
+ word carry = 0;
+ for(size_t j = word_shift; j != x_size + word_shift + 1; ++j)
+ {
+ word w = y[j];
+ y[j] = (w << bit_shift) | carry;
+ carry = (w >> (MP_WORD_BITS - bit_shift));
+ }
+ }
+ }
+
+/*
+* Two Operand Right Shift
+*/
+void bigint_shr2(word y[], const word x[], size_t x_size,
+ size_t word_shift, size_t bit_shift)
+ {
+ if(x_size < word_shift) return;
+
+ for(size_t j = 0; j != x_size - word_shift; ++j)
+ y[j] = x[j + word_shift];
+ if(bit_shift)
+ {
+ word carry = 0;
+ for(size_t j = x_size - word_shift; j > 0; --j)
+ {
+ word w = y[j-1];
+ y[j-1] = (w >> bit_shift) | carry;
+ carry = (w << (MP_WORD_BITS - bit_shift));
+ }
+ }
+ }
+
+/*
+* Compare two MP integers
+*/
+s32bit bigint_cmp(const word x[], size_t x_size,
+ const word y[], size_t y_size)
+ {
+ if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); }
+
+ while(x_size > y_size)
+ {
+ if(x[x_size-1])
+ return 1;
+ x_size--;
+ }
+
+ for(size_t i = x_size; i > 0; --i)
+ {
+ if(x[i-1] > y[i-1])
+ return 1;
+ if(x[i-1] < y[i-1])
+ return -1;
+ }
+
+ return 0;
+ }
+
+/*
+* Do a 2-word/1-word Division
+*/
+word bigint_divop(word n1, word n0, word d)
+ {
+ if(d == 0)
+ throw Invalid_Argument("bigint_divop divide by zero");
+
+ word high = n1 % d, quotient = 0;
+
+ for(size_t i = 0; i != MP_WORD_BITS; ++i)
+ {
+ word high_top_bit = (high & MP_WORD_TOP_BIT);
+
+ high <<= 1;
+ high |= (n0 >> (MP_WORD_BITS-1-i)) & 1;
+ quotient <<= 1;
+
+ if(high_top_bit || high >= d)
+ {
+ high -= d;
+ quotient |= 1;
+ }
+ }
+
+ return quotient;
+ }
+
+/*
+* Do a 2-word/1-word Modulo
+*/
+word bigint_modop(word n1, word n0, word d)
+ {
+ word z = bigint_divop(n1, n0, d);
+ word dummy = 0;
+ z = word_madd2(z, d, &dummy);
+ return (n0-z);
+ }
+
}
diff --git a/src/lib/math/mp/mp_generic/info.txt b/src/lib/math/mp/mp_generic/info.txt
deleted file mode 100644
index c87dd00ca..000000000
--- a/src/lib/math/mp/mp_generic/info.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-load_on dep
-
-<header:internal>
-mp_madd.h
-mp_asmi.h
-</header:internal>
diff --git a/src/lib/math/mp/mp_generic/mp_asmi.h b/src/lib/math/mp/mp_generic/mp_asmi.h
deleted file mode 100644
index 708afdfa0..000000000
--- a/src/lib/math/mp/mp_generic/mp_asmi.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_madd.h>
-
-namespace Botan {
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
- {
- word z = x + y;
- word c1 = (z < x);
- z += *carry;
- *carry = c1 | (z < *carry);
- return z;
- }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
- {
- x[0] = word_add(x[0], y[0], &carry);
- x[1] = word_add(x[1], y[1], &carry);
- x[2] = word_add(x[2], y[2], &carry);
- x[3] = word_add(x[3], y[3], &carry);
- x[4] = word_add(x[4], y[4], &carry);
- x[5] = word_add(x[5], y[5], &carry);
- x[6] = word_add(x[6], y[6], &carry);
- x[7] = word_add(x[7], y[7], &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8],
- const word y[8], word carry)
- {
- z[0] = word_add(x[0], y[0], &carry);
- z[1] = word_add(x[1], y[1], &carry);
- z[2] = word_add(x[2], y[2], &carry);
- z[3] = word_add(x[3], y[3], &carry);
- z[4] = word_add(x[4], y[4], &carry);
- z[5] = word_add(x[5], y[5], &carry);
- z[6] = word_add(x[6], y[6], &carry);
- z[7] = word_add(x[7], y[7], &carry);
- return carry;
- }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
- {
- word t0 = x - y;
- word c1 = (t0 > x);
- word z = t0 - *carry;
- *carry = c1 | (z > t0);
- return z;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
- {
- x[0] = word_sub(x[0], y[0], &carry);
- x[1] = word_sub(x[1], y[1], &carry);
- x[2] = word_sub(x[2], y[2], &carry);
- x[3] = word_sub(x[3], y[3], &carry);
- x[4] = word_sub(x[4], y[4], &carry);
- x[5] = word_sub(x[5], y[5], &carry);
- x[6] = word_sub(x[6], y[6], &carry);
- x[7] = word_sub(x[7], y[7], &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
- {
- x[0] = word_sub(y[0], x[0], &carry);
- x[1] = word_sub(y[1], x[1], &carry);
- x[2] = word_sub(y[2], x[2], &carry);
- x[3] = word_sub(y[3], x[3], &carry);
- x[4] = word_sub(y[4], x[4], &carry);
- x[5] = word_sub(y[5], x[5], &carry);
- x[6] = word_sub(y[6], x[6], &carry);
- x[7] = word_sub(y[7], x[7], &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8],
- const word y[8], word carry)
- {
- z[0] = word_sub(x[0], y[0], &carry);
- z[1] = word_sub(x[1], y[1], &carry);
- z[2] = word_sub(x[2], y[2], &carry);
- z[3] = word_sub(x[3], y[3], &carry);
- z[4] = word_sub(x[4], y[4], &carry);
- z[5] = word_sub(x[5], y[5], &carry);
- z[6] = word_sub(x[6], y[6], &carry);
- z[7] = word_sub(x[7], y[7], &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
- {
- x[0] = word_madd2(x[0], y, &carry);
- x[1] = word_madd2(x[1], y, &carry);
- x[2] = word_madd2(x[2], y, &carry);
- x[3] = word_madd2(x[3], y, &carry);
- x[4] = word_madd2(x[4], y, &carry);
- x[5] = word_madd2(x[5], y, &carry);
- x[6] = word_madd2(x[6], y, &carry);
- x[7] = word_madd2(x[7], y, &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
- {
- z[0] = word_madd2(x[0], y, &carry);
- z[1] = word_madd2(x[1], y, &carry);
- z[2] = word_madd2(x[2], y, &carry);
- z[3] = word_madd2(x[3], y, &carry);
- z[4] = word_madd2(x[4], y, &carry);
- z[5] = word_madd2(x[5], y, &carry);
- z[6] = word_madd2(x[6], y, &carry);
- z[7] = word_madd2(x[7], y, &carry);
- return carry;
- }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
- {
- z[0] = word_madd3(x[0], y, z[0], &carry);
- z[1] = word_madd3(x[1], y, z[1], &carry);
- z[2] = word_madd3(x[2], y, z[2], &carry);
- z[3] = word_madd3(x[3], y, z[3], &carry);
- z[4] = word_madd3(x[4], y, z[4], &carry);
- z[5] = word_madd3(x[5], y, z[5], &carry);
- z[6] = word_madd3(x[6], y, z[6], &carry);
- z[7] = word_madd3(x[7], y, z[7], &carry);
- return carry;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
- {
- word carry = *w0;
- *w0 = word_madd2(a, b, &carry);
- *w1 += carry;
- *w2 += (*w1 < carry) ? 1 : 0;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
- {
- word carry = 0;
- a = word_madd2(a, b, &carry);
- b = carry;
-
- word top = (b >> (BOTAN_MP_WORD_BITS-1));
- b <<= 1;
- b |= (a >> (BOTAN_MP_WORD_BITS-1));
- a <<= 1;
-
- carry = 0;
- *w0 = word_add(*w0, a, &carry);
- *w1 = word_add(*w1, b, &carry);
- *w2 = word_add(*w2, top, &carry);
- }
-
-}
-
-#endif
diff --git a/src/lib/math/mp/mp_generic/mp_madd.h b/src/lib/math/mp/mp_madd.h
index 95a1069a4..0567622d9 100644
--- a/src/lib/math/mp/mp_generic/mp_madd.h
+++ b/src/lib/math/mp/mp_madd.h
@@ -35,12 +35,52 @@ namespace Botan {
#error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64
#endif
+#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32)
+
+ #if defined(BOTAN_USE_GCC_INLINE_ASM)
+ #define BOTAN_MP_USE_X86_32_ASM
+ #define ASM(x) x "\n\t"
+ #elif defined(BOTAN_TARGET_COMPILER_IS_MSVC)
+ #define BOTAN_MP_USE_X86_32_MSVC_ASM
+ #endif
+
+#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) && (BOTAN_USE_GCC_INLINE_ASM)
+ #define BOTAN_MP_USE_X86_64_ASM
+ #define ASM(x) x "\n\t"
+#endif
+
+#if defined(BOTAN_MP_USE_X86_32_ASM) || defined(BOTAN_MP_USE_X86_64_ASM)
+ #define ASM(x) x "\n\t"
+#endif
+
/*
* Word Multiply/Add
*/
inline word word_madd2(word a, word b, word* c)
{
-#if defined(BOTAN_HAS_MP_DWORD)
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ASM("mull %[b]")
+ ASM("addl %[c],%[a]")
+ ASM("adcl $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
+ : "0"(a), "1"(b), [c]"g"(*c) : "cc");
+
+ return a;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+ asm(
+ ASM("mulq %[b]")
+ ASM("addq %[c],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
+ : "0"(a), "1"(b), [c]"g"(*c) : "cc");
+
+ return a;
+
+#elif defined(BOTAN_HAS_MP_DWORD)
const dword s = static_cast<dword>(a) * b + *c;
*c = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
return static_cast<word>(s);
@@ -64,7 +104,37 @@ inline word word_madd2(word a, word b, word* c)
*/
inline word word_madd3(word a, word b, word c, word* d)
{
-#if defined(BOTAN_HAS_MP_DWORD)
+#if defined(BOTAN_MP_USE_X86_32_ASM)
+ asm(
+ ASM("mull %[b]")
+
+ ASM("addl %[c],%[a]")
+ ASM("adcl $0,%[carry]")
+
+ ASM("addl %[d],%[a]")
+ ASM("adcl $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
+ : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
+
+ return a;
+
+#elif defined(BOTAN_MP_USE_X86_64_ASM)
+ asm(
+ ASM("mulq %[b]")
+
+ ASM("addq %[c],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ ASM("addq %[d],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
+ : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
+
+ return a;
+
+#elif defined(BOTAN_HAS_MP_DWORD)
const dword s = static_cast<dword>(a) * b + c + *d;
*d = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
return static_cast<word>(s);
@@ -86,6 +156,10 @@ inline word word_madd3(word a, word b, word c, word* d)
#endif
}
+#if defined(ASM)
+ #undef ASM
+#endif
+
}
#endif
diff --git a/src/lib/math/mp/mp_misc.cpp b/src/lib/math/mp/mp_misc.cpp
deleted file mode 100644
index 768543a64..000000000
--- a/src/lib/math/mp/mp_misc.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-* MP Misc Functions
-* (C) 1999-2008 Jack Lloyd
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/internal/mp_madd.h>
-#include <botan/exceptn.h>
-
-namespace Botan {
-
-/*
-* Compare two MP integers
-*/
-s32bit bigint_cmp(const word x[], size_t x_size,
- const word y[], size_t y_size)
- {
- if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); }
-
- while(x_size > y_size)
- {
- if(x[x_size-1])
- return 1;
- x_size--;
- }
-
- for(size_t i = x_size; i > 0; --i)
- {
- if(x[i-1] > y[i-1])
- return 1;
- if(x[i-1] < y[i-1])
- return -1;
- }
-
- return 0;
- }
-
-/*
-* Do a 2-word/1-word Division
-*/
-word bigint_divop(word n1, word n0, word d)
- {
- if(d == 0)
- throw Invalid_Argument("bigint_divop divide by zero");
-
- word high = n1 % d, quotient = 0;
-
- for(size_t i = 0; i != MP_WORD_BITS; ++i)
- {
- word high_top_bit = (high & MP_WORD_TOP_BIT);
-
- high <<= 1;
- high |= (n0 >> (MP_WORD_BITS-1-i)) & 1;
- quotient <<= 1;
-
- if(high_top_bit || high >= d)
- {
- high -= d;
- quotient |= 1;
- }
- }
-
- return quotient;
- }
-
-/*
-* Do a 2-word/1-word Modulo
-*/
-word bigint_modop(word n1, word n0, word d)
- {
- word z = bigint_divop(n1, n0, d);
- word dummy = 0;
- z = word_madd2(z, d, &dummy);
- return (n0-z);
- }
-
-}
diff --git a/src/lib/math/mp/mp_shift.cpp b/src/lib/math/mp/mp_shift.cpp
deleted file mode 100644
index 1850888a0..000000000
--- a/src/lib/math/mp/mp_shift.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-* MP Shift Algorithms
-* (C) 1999-2007,2014 Jack Lloyd
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-/*
-* Single Operand Left Shift
-*/
-void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift)
- {
- if(word_shift)
- {
- copy_mem(x + word_shift, x, x_size);
- clear_mem(x, word_shift);
- }
-
- if(bit_shift)
- {
- word carry = 0;
- for(size_t j = word_shift; j != x_size + word_shift + 1; ++j)
- {
- word temp = x[j];
- x[j] = (temp << bit_shift) | carry;
- carry = (temp >> (MP_WORD_BITS - bit_shift));
- }
- }
- }
-
-/*
-* Single Operand Right Shift
-*/
-void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift)
- {
- if(x_size < word_shift)
- {
- clear_mem(x, x_size);
- return;
- }
-
- if(word_shift)
- {
- copy_mem(x, x + word_shift, x_size - word_shift);
- clear_mem(x + x_size - word_shift, word_shift);
- }
-
- if(bit_shift)
- {
- word carry = 0;
-
- size_t top = x_size - word_shift;
-
- while(top >= 4)
- {
- word w = x[top-1];
- x[top-1] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
-
- w = x[top-2];
- x[top-2] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
-
- w = x[top-3];
- x[top-3] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
-
- w = x[top-4];
- x[top-4] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
-
- top -= 4;
- }
-
- while(top)
- {
- word w = x[top-1];
- x[top-1] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
-
- top--;
- }
- }
- }
-
-/*
-* Two Operand Left Shift
-*/
-void bigint_shl2(word y[], const word x[], size_t x_size,
- size_t word_shift, size_t bit_shift)
- {
- for(size_t j = 0; j != x_size; ++j)
- y[j + word_shift] = x[j];
- if(bit_shift)
- {
- word carry = 0;
- for(size_t j = word_shift; j != x_size + word_shift + 1; ++j)
- {
- word w = y[j];
- y[j] = (w << bit_shift) | carry;
- carry = (w >> (MP_WORD_BITS - bit_shift));
- }
- }
- }
-
-/*
-* Two Operand Right Shift
-*/
-void bigint_shr2(word y[], const word x[], size_t x_size,
- size_t word_shift, size_t bit_shift)
- {
- if(x_size < word_shift) return;
-
- for(size_t j = 0; j != x_size - word_shift; ++j)
- y[j] = x[j + word_shift];
- if(bit_shift)
- {
- word carry = 0;
- for(size_t j = x_size - word_shift; j > 0; --j)
- {
- word w = y[j-1];
- y[j-1] = (w >> bit_shift) | carry;
- carry = (w << (MP_WORD_BITS - bit_shift));
- }
- }
- }
-
-}
diff --git a/src/lib/math/mp/mp_x86_32/info.txt b/src/lib/math/mp/mp_x86_32/info.txt
deleted file mode 100644
index f36abaf62..000000000
--- a/src/lib/math/mp/mp_x86_32/info.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-load_on dep
-
-mp_bits 32
-
-<header:internal>
-mp_madd.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-x86_32
-</arch>
-
-<cc>
-clang
-gcc
-icc
-</cc>
diff --git a/src/lib/math/mp/mp_x86_32/mp_asmi.h b/src/lib/math/mp/mp_x86_32/mp_asmi.h
deleted file mode 100644
index 95af89fc0..000000000
--- a/src/lib/math/mp/mp_x86_32/mp_asmi.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_madd.h>
-
-namespace Botan {
-
-/*
-* Helper Macros for x86 Assembly
-*/
-#ifndef ASM
- #define ASM(x) x "\n\t"
-#endif
-
-#define ADDSUB2_OP(OPERATION, INDEX) \
- ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
- ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
-
-#define ADDSUB3_OP(OPERATION, INDEX) \
- ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
- ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
- ASM("movl %[carry], 4*" #INDEX "(%[z])") \
-
-#define LINMUL_OP(WRITE_TO, INDEX) \
- ASM("movl 4*" #INDEX "(%[x]),%%eax") \
- ASM("mull %[y]") \
- ASM("addl %[carry],%%eax") \
- ASM("adcl $0,%%edx") \
- ASM("movl %%edx,%[carry]") \
- ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
-
-#define MULADD_OP(IGNORED, INDEX) \
- ASM("movl 4*" #INDEX "(%[x]),%%eax") \
- ASM("mull %[y]") \
- ASM("addl %[carry],%%eax") \
- ASM("adcl $0,%%edx") \
- ASM("addl 4*" #INDEX "(%[z]),%%eax") \
- ASM("adcl $0,%%edx") \
- ASM("movl %%edx,%[carry]") \
- ASM("movl %%eax, 4*" #INDEX " (%[z])")
-
-#define DO_8_TIMES(MACRO, ARG) \
- MACRO(ARG, 0) \
- MACRO(ARG, 1) \
- MACRO(ARG, 2) \
- MACRO(ARG, 3) \
- MACRO(ARG, 4) \
- MACRO(ARG, 5) \
- MACRO(ARG, 6) \
- MACRO(ARG, 7)
-
-#define ADD_OR_SUBTRACT(CORE_CODE) \
- ASM("rorl %[carry]") \
- CORE_CODE \
- ASM("sbbl %[carry],%[carry]") \
- ASM("negl %[carry]")
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
- {
- asm(
- ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
- : [x]"=r"(x), [carry]"=r"(*carry)
- : "0"(x), [y]"rm"(y), "1"(*carry)
- : "cc");
- return x;
- }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
- {
- asm(
- ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
- : [x]"=r"(x), [carry]"=r"(*carry)
- : "0"(x), [y]"rm"(y), "1"(*carry)
- : "cc");
- return x;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
- : [carry]"=r"(carry)
- : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(LINMUL_OP, "x")
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%eax", "%edx");
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(LINMUL_OP, "z")
- : [carry]"=r"(carry)
- : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%eax", "%edx");
- return carry;
- }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(MULADD_OP, "")
- : [carry]"=r"(carry)
- : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%eax", "%edx");
- return carry;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
- {
- asm(
- ASM("mull %[y]")
-
- ASM("addl %[x],%[w0]")
- ASM("adcl %[y],%[w1]")
- ASM("adcl $0,%[w2]")
-
- : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
- : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
- : "cc");
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
- {
- asm(
- ASM("mull %[y]")
-
- ASM("addl %[x],%[w0]")
- ASM("adcl %[y],%[w1]")
- ASM("adcl $0,%[w2]")
-
- ASM("addl %[x],%[w0]")
- ASM("adcl %[y],%[w1]")
- ASM("adcl $0,%[w2]")
-
- : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
- : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
- : "cc");
- }
-
-}
-
-#endif
diff --git a/src/lib/math/mp/mp_x86_32/mp_madd.h b/src/lib/math/mp/mp_x86_32/mp_madd.h
deleted file mode 100644
index 9c0990398..000000000
--- a/src/lib/math/mp/mp_x86_32/mp_madd.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2008 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_WORD_MULADD_H__
-#define BOTAN_MP_WORD_MULADD_H__
-
-#include <botan/mp_types.h>
-
-#if (BOTAN_MP_WORD_BITS != 32)
- #error The mp_x86_32 module requires that BOTAN_MP_WORD_BITS == 32
-#endif
-
-namespace Botan {
-
-/*
-* Helper Macros for x86 Assembly
-*/
-#define ASM(x) x "\n\t"
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
- {
- asm(
- ASM("mull %[b]")
- ASM("addl %[c],%[a]")
- ASM("adcl $0,%[carry]")
-
- : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
- : "0"(a), "1"(b), [c]"g"(*c) : "cc");
-
- return a;
- }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
- {
- asm(
- ASM("mull %[b]")
-
- ASM("addl %[c],%[a]")
- ASM("adcl $0,%[carry]")
-
- ASM("addl %[d],%[a]")
- ASM("adcl $0,%[carry]")
-
- : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
- : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
-
- return a;
- }
-
-}
-
-#endif
diff --git a/src/lib/math/mp/mp_x86_32_msvc/info.txt b/src/lib/math/mp/mp_x86_32_msvc/info.txt
deleted file mode 100644
index 3029d6a61..000000000
--- a/src/lib/math/mp/mp_x86_32_msvc/info.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-mp_bits 32
-
-load_on dep
-
-<header:internal>
-mp_generic:mp_madd.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-x86_32
-</arch>
-
-<cc>
-msvc
-</cc>
diff --git a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h b/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h
deleted file mode 100644
index 92bf7980d..000000000
--- a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_madd.h>
-
-namespace Botan {
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
- {
- word z = x + y;
- word c1 = (z < x);
- z += *carry;
- *carry = c1 | (z < *carry);
- return z;
- }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
- {
- __asm {
- mov edx,[x]
- mov esi,[y]
- xor eax,eax
- sub eax,[carry] //force CF=1 iff *carry==1
- mov eax,[esi]
- adc [edx],eax
- mov eax,[esi+4]
- adc [edx+4],eax
- mov eax,[esi+8]
- adc [edx+8],eax
- mov eax,[esi+12]
- adc [edx+12],eax
- mov eax,[esi+16]
- adc [edx+16],eax
- mov eax,[esi+20]
- adc [edx+20],eax
- mov eax,[esi+24]
- adc [edx+24],eax
- mov eax,[esi+28]
- adc [edx+28],eax
- sbb eax,eax
- neg eax
- }
- }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
- {
- __asm {
- mov edi,[x]
- mov esi,[y]
- mov ebx,[z]
- xor eax,eax
- sub eax,[carry] //force CF=1 iff *carry==1
- mov eax,[edi]
- adc eax,[esi]
- mov [ebx],eax
-
- mov eax,[edi+4]
- adc eax,[esi+4]
- mov [ebx+4],eax
-
- mov eax,[edi+8]
- adc eax,[esi+8]
- mov [ebx+8],eax
-
- mov eax,[edi+12]
- adc eax,[esi+12]
- mov [ebx+12],eax
-
- mov eax,[edi+16]
- adc eax,[esi+16]
- mov [ebx+16],eax
-
- mov eax,[edi+20]
- adc eax,[esi+20]
- mov [ebx+20],eax
-
- mov eax,[edi+24]
- adc eax,[esi+24]
- mov [ebx+24],eax
-
- mov eax,[edi+28]
- adc eax,[esi+28]
- mov [ebx+28],eax
-
- sbb eax,eax
- neg eax
- }
- }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
- {
- word t0 = x - y;
- word c1 = (t0 > x);
- word z = t0 - *carry;
- *carry = c1 | (z > t0);
- return z;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
- {
- __asm {
- mov edi,[x]
- mov esi,[y]
- xor eax,eax
- sub eax,[carry] //force CF=1 iff *carry==1
- mov eax,[edi]
- sbb eax,[esi]
- mov [edi],eax
- mov eax,[edi+4]
- sbb eax,[esi+4]
- mov [edi+4],eax
- mov eax,[edi+8]
- sbb eax,[esi+8]
- mov [edi+8],eax
- mov eax,[edi+12]
- sbb eax,[esi+12]
- mov [edi+12],eax
- mov eax,[edi+16]
- sbb eax,[esi+16]
- mov [edi+16],eax
- mov eax,[edi+20]
- sbb eax,[esi+20]
- mov [edi+20],eax
- mov eax,[edi+24]
- sbb eax,[esi+24]
- mov [edi+24],eax
- mov eax,[edi+28]
- sbb eax,[esi+28]
- mov [edi+28],eax
- sbb eax,eax
- neg eax
- }
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
- {
- x[0] = word_sub(y[0], x[0], &carry);
- x[1] = word_sub(y[1], x[1], &carry);
- x[2] = word_sub(y[2], x[2], &carry);
- x[3] = word_sub(y[3], x[3], &carry);
- x[4] = word_sub(y[4], x[4], &carry);
- x[5] = word_sub(y[5], x[5], &carry);
- x[6] = word_sub(y[6], x[6], &carry);
- x[7] = word_sub(y[7], x[7], &carry);
- return carry;
- }
-
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8],
- const word y[8], word carry)
- {
- __asm {
- mov edi,[x]
- mov esi,[y]
- xor eax,eax
- sub eax,[carry] //force CF=1 iff *carry==1
- mov ebx,[z]
- mov eax,[edi]
- sbb eax,[esi]
- mov [ebx],eax
- mov eax,[edi+4]
- sbb eax,[esi+4]
- mov [ebx+4],eax
- mov eax,[edi+8]
- sbb eax,[esi+8]
- mov [ebx+8],eax
- mov eax,[edi+12]
- sbb eax,[esi+12]
- mov [ebx+12],eax
- mov eax,[edi+16]
- sbb eax,[esi+16]
- mov [ebx+16],eax
- mov eax,[edi+20]
- sbb eax,[esi+20]
- mov [ebx+20],eax
- mov eax,[edi+24]
- sbb eax,[esi+24]
- mov [ebx+24],eax
- mov eax,[edi+28]
- sbb eax,[esi+28]
- mov [ebx+28],eax
- sbb eax,eax
- neg eax
- }
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
- {
- __asm {
- mov esi,[x]
- mov eax,[esi] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,[carry] //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi],eax //load a
-
- mov eax,[esi+4] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+4],eax //load a
-
- mov eax,[esi+8] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+8],eax //load a
-
- mov eax,[esi+12] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+12],eax //load a
-
- mov eax,[esi+16] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+16],eax //load a
-
- mov eax,[esi+20] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+20],eax //load a
-
- mov eax,[esi+24] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [esi+24],eax //load a
-
- mov eax,[esi+28] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov [esi+28],eax //load a
-
- mov eax,edx //store carry
- }
- }
-
-inline word word8_linmul3(word z[4], const word x[4], word y, word carry)
- {
- __asm {
-#if 0
- //it's slower!!!
- mov edx,[z]
- mov eax,[x]
- movd mm7,[y]
-
- movd mm0,[eax]
- movd mm1,[eax+4]
- movd mm2,[eax+8]
- pmuludq mm0,mm7
- pmuludq mm1,mm7
- pmuludq mm2,mm7
-
- movd mm6,[carry]
- paddq mm0,mm6
- movd [edx],mm0
-
- psrlq mm0,32
- paddq mm1,mm0
- movd [edx+4],mm1
-
- movd mm3,[eax+12]
- psrlq mm1,32
- paddq mm2,mm1
- movd [edx+8],mm2
-
- pmuludq mm3,mm7
- movd mm4,[eax+16]
- psrlq mm2,32
- paddq mm3,mm2
- movd [edx+12],mm3
-
- pmuludq mm4,mm7
- movd mm5,[eax+20]
- psrlq mm3,32
- paddq mm4,mm3
- movd [edx+16],mm4
-
- pmuludq mm5,mm7
- movd mm0,[eax+24]
- psrlq mm4,32
- paddq mm5,mm4
- movd [edx+20],mm5
-
- pmuludq mm0,mm7
- movd mm1,[eax+28]
- psrlq mm5,32
- paddq mm0,mm5
- movd [edx+24],mm0
-
- pmuludq mm1,mm7
- psrlq mm0,32
- paddq mm1,mm0
- movd [edx+28],mm1
- psrlq mm1,32
-
- movd eax,mm1
- emms
-#else
- mov edi,[z]
- mov esi,[x]
- mov eax,[esi] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,[carry] //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi],eax //load a
-
- mov eax,[esi+4] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+4],eax //load a
-
- mov eax,[esi+8] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+8],eax //load a
-
- mov eax,[esi+12] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+12],eax //load a
-
- mov eax,[esi+16] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+16],eax //load a
-
- mov eax,[esi+20] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+20],eax //load a
-
- mov eax,[esi+24] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov ecx,edx //store carry
- mov [edi+24],eax //load a
-
- mov eax,[esi+28] //load a
- mul [y] //edx(hi):eax(lo)=a*b
- add eax,ecx //sum lo carry
- adc edx,0 //sum hi carry
- mov [edi+28],eax //load a
- mov eax,edx //store carry
-#endif
- }
- }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
- {
- z[0] = word_madd3(x[0], y, z[0], &carry);
- z[1] = word_madd3(x[1], y, z[1], &carry);
- z[2] = word_madd3(x[2], y, z[2], &carry);
- z[3] = word_madd3(x[3], y, z[3], &carry);
- z[4] = word_madd3(x[4], y, z[4], &carry);
- z[5] = word_madd3(x[5], y, z[5], &carry);
- z[6] = word_madd3(x[6], y, z[6], &carry);
- z[7] = word_madd3(x[7], y, z[7], &carry);
- return carry;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
- {
- word carry = *w0;
- *w0 = word_madd2(a, b, &carry);
- *w1 += carry;
- *w2 += (*w1 < carry) ? 1 : 0;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
- {
- word carry = 0;
- a = word_madd2(a, b, &carry);
- b = carry;
-
- word top = (b >> (BOTAN_MP_WORD_BITS-1));
- b <<= 1;
- b |= (a >> (BOTAN_MP_WORD_BITS-1));
- a <<= 1;
-
- carry = 0;
- *w0 = word_add(*w0, a, &carry);
- *w1 = word_add(*w1, b, &carry);
- *w2 = word_add(*w2, top, &carry);
- }
-
-}
-
-#endif
diff --git a/src/lib/math/mp/mp_x86_64/info.txt b/src/lib/math/mp/mp_x86_64/info.txt
deleted file mode 100644
index 75c42ddc1..000000000
--- a/src/lib/math/mp/mp_x86_64/info.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-load_on dep
-
-mp_bits 64
-
-<header:internal>
-mp_madd.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-x86_64
-</arch>
-
-<cc>
-clang
-gcc
-icc
-</cc>
diff --git a/src/lib/math/mp/mp_x86_64/mp_asmi.h b/src/lib/math/mp/mp_x86_64/mp_asmi.h
deleted file mode 100644
index cd5884867..000000000
--- a/src/lib/math/mp/mp_x86_64/mp_asmi.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_madd.h>
-
-namespace Botan {
-
-/*
-* Helper Macros for x86-64 Assembly
-*/
-#ifndef ASM
- #define ASM(x) x "\n\t"
-#endif
-
-#define ADDSUB2_OP(OPERATION, INDEX) \
- ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
- ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
-
-#define ADDSUB3_OP(OPERATION, INDEX) \
- ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
- ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
- ASM("movq %[carry], 8*" #INDEX "(%[z])") \
-
-#define LINMUL_OP(WRITE_TO, INDEX) \
- ASM("movq 8*" #INDEX "(%[x]),%%rax") \
- ASM("mulq %[y]") \
- ASM("addq %[carry],%%rax") \
- ASM("adcq $0,%%rdx") \
- ASM("movq %%rdx,%[carry]") \
- ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
-
-#define MULADD_OP(IGNORED, INDEX) \
- ASM("movq 8*" #INDEX "(%[x]),%%rax") \
- ASM("mulq %[y]") \
- ASM("addq %[carry],%%rax") \
- ASM("adcq $0,%%rdx") \
- ASM("addq 8*" #INDEX "(%[z]),%%rax") \
- ASM("adcq $0,%%rdx") \
- ASM("movq %%rdx,%[carry]") \
- ASM("movq %%rax, 8*" #INDEX " (%[z])")
-
-#define DO_8_TIMES(MACRO, ARG) \
- MACRO(ARG, 0) \
- MACRO(ARG, 1) \
- MACRO(ARG, 2) \
- MACRO(ARG, 3) \
- MACRO(ARG, 4) \
- MACRO(ARG, 5) \
- MACRO(ARG, 6) \
- MACRO(ARG, 7)
-
-#define ADD_OR_SUBTRACT(CORE_CODE) \
- ASM("rorq %[carry]") \
- CORE_CODE \
- ASM("sbbq %[carry],%[carry]") \
- ASM("negq %[carry]")
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
- {
- asm(
- ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
- : [x]"=r"(x), [carry]"=r"(*carry)
- : "0"(x), [y]"rm"(y), "1"(*carry)
- : "cc");
- return x;
- }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
- {
- asm(
- ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
- : [x]"=r"(x), [carry]"=r"(*carry)
- : "0"(x), [y]"rm"(y), "1"(*carry)
- : "cc");
- return x;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
- : [carry]"=r"(carry)
- : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
- {
- asm(
- ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
- : "cc", "memory");
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(LINMUL_OP, "x")
- : [carry]"=r"(carry)
- : [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%rax", "%rdx");
- return carry;
- }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(LINMUL_OP, "z")
- : [carry]"=r"(carry)
- : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%rax", "%rdx");
- return carry;
- }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
- {
- asm(
- DO_8_TIMES(MULADD_OP, "")
- : [carry]"=r"(carry)
- : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
- : "cc", "%rax", "%rdx");
- return carry;
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
- {
- asm(
- ASM("mulq %[y]")
-
- ASM("addq %[x],%[w0]")
- ASM("adcq %[y],%[w1]")
- ASM("adcq $0,%[w2]")
-
- : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
- : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
- : "cc");
- }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
- {
- asm(
- ASM("mulq %[y]")
-
- ASM("addq %[x],%[w0]")
- ASM("adcq %[y],%[w1]")
- ASM("adcq $0,%[w2]")
-
- ASM("addq %[x],%[w0]")
- ASM("adcq %[y],%[w1]")
- ASM("adcq $0,%[w2]")
-
- : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
- : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
- : "cc");
- }
-
-#undef ASM
-#undef DO_8_TIMES
-#undef ADD_OR_SUBTRACT
-#undef ADDSUB2_OP
-#undef ADDSUB3_OP
-#undef LINMUL_OP
-#undef MULADD_OP
-
-}
-
-#endif
diff --git a/src/lib/math/mp/mp_x86_64/mp_madd.h b/src/lib/math/mp/mp_x86_64/mp_madd.h
deleted file mode 100644
index 6f9185dc0..000000000
--- a/src/lib/math/mp/mp_x86_64/mp_madd.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2008 Jack Lloyd
-* 2006 Luca Piccarreta
-*
-* Botan is released under the Simplified BSD License (see license.txt)
-*/
-
-#ifndef BOTAN_MP_WORD_MULADD_H__
-#define BOTAN_MP_WORD_MULADD_H__
-
-#include <botan/mp_types.h>
-
-#if (BOTAN_MP_WORD_BITS != 64)
- #error The mp_x86_64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-namespace Botan {
-
-/*
-* Helper Macros for x86-64 Assembly
-*/
-#define ASM(x) x "\n\t"
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
- {
- asm(
- ASM("mulq %[b]")
- ASM("addq %[c],%[a]")
- ASM("adcq $0,%[carry]")
-
- : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
- : "0"(a), "1"(b), [c]"g"(*c) : "cc");
-
- return a;
- }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
- {
- asm(
- ASM("mulq %[b]")
-
- ASM("addq %[c],%[a]")
- ASM("adcq $0,%[carry]")
-
- ASM("addq %[d],%[a]")
- ASM("adcq $0,%[carry]")
-
- : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
- : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
-
- return a;
- }
-
-#undef ASM
-
-}
-
-#endif