diff options
author | Jack Lloyd <[email protected]> | 2016-07-20 20:29:22 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2016-07-21 14:46:19 -0400 |
commit | 6907e196cbc8fd4a80ebf2443f2a918bb4a19706 (patch) | |
tree | 2fb7ed81df1a1f899ea0136e6e872961b8468e46 /src/lib | |
parent | b4da573fc1c241a6badc83f4ec166cbdd5c70c5c (diff) |
Merge asm into single mp_madd.h and mp_asmi.h files
Avoids some cut and paste, also removes the need for special logic in
configure.py for handling mp module specially.
Merge SIMD classes into a single type SIMD_4x32
Diffstat (limited to 'src/lib')
25 files changed, 1546 insertions, 2183 deletions
diff --git a/src/lib/math/mp/info.txt b/src/lib/math/mp/info.txt index 6aa0142f3..b5db12648 100644 --- a/src/lib/math/mp/info.txt +++ b/src/lib/math/mp/info.txt @@ -1,12 +1,10 @@ define BIGINT_MP 20151225 <source> -mp_asm.cpp +mp_core.cpp mp_comba.cpp mp_karat.cpp mp_monty.cpp -mp_misc.cpp -mp_shift.cpp </source> <header:public> @@ -15,8 +13,6 @@ mp_types.h <header:internal> mp_core.h +mp_madd.h +mp_asmi.h </header:internal> - -<requires> -mp_x86_64|mp_x86_32|mp_x86_32_msvc|mp_generic -</requires> diff --git a/src/lib/math/mp/mp_asmi.h b/src/lib/math/mp/mp_asmi.h new file mode 100644 index 000000000..afb4d1407 --- /dev/null +++ b/src/lib/math/mp/mp_asmi.h @@ -0,0 +1,820 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include <botan/internal/mp_madd.h> + +namespace Botan { + +#if defined(BOTAN_MP_USE_X86_32_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ + ASM("movl %[carry], 4*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("addl 4*" #INDEX "(%[z]),%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorl %[carry]") \ + CORE_CODE \ + ASM("sbbl %[carry],%[carry]") \ + ASM("negl %[carry]") + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +#endif + +#if defined(ADD_OR_SUBTRACT) + +#define ASM(x) x "\n\t" + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#endif + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; +#endif + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edx,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[esi] + adc [edx],eax + mov eax,[esi+4] + adc [edx+4],eax + mov eax,[esi+8] + adc [edx+8],eax + mov eax,[esi+12] + adc [edx+12],eax + mov eax,[esi+16] + adc [edx+16],eax + mov eax,[esi+20] + adc [edx+20],eax + mov eax,[esi+24] + adc [edx+24],eax + mov eax,[esi+28] + adc [edx+28],eax + sbb eax,eax + neg eax + } + +#else + x[0] = word_add(x[0], y[0], &carry); + x[1] = word_add(x[1], y[1], &carry); + x[2] = word_add(x[2], y[2], &carry); + x[3] = word_add(x[3], y[3], &carry); + x[4] = word_add(x[4], y[4], &carry); + x[5] = word_add(x[5], y[5], &carry); + x[6] = word_add(x[6], y[6], &carry); + x[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + mov ebx,[z] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + adc eax,[esi] + mov [ebx],eax + + mov eax,[edi+4] + adc eax,[esi+4] + mov [ebx+4],eax + + mov eax,[edi+8] + adc eax,[esi+8] + mov [ebx+8],eax + + mov eax,[edi+12] + adc eax,[esi+12] + mov [ebx+12],eax + + mov eax,[edi+16] + adc eax,[esi+16] + mov [ebx+16],eax + + mov eax,[edi+20] + adc eax,[esi+20] + mov [ebx+20],eax + + mov eax,[edi+24] + adc eax,[esi+24] + mov [ebx+24],eax + + mov eax,[edi+28] + adc eax,[esi+28] + mov [ebx+28],eax + + sbb eax,eax + neg eax + } + +#else + z[0] = word_add(x[0], y[0], &carry); + z[1] = word_add(x[1], y[1], &carry); + z[2] = word_add(x[2], y[2], &carry); + z[3] = word_add(x[3], y[3], &carry); + z[4] = word_add(x[4], y[4], &carry); + z[5] = word_add(x[5], y[5], &carry); + z[6] = word_add(x[6], y[6], &carry); + z[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + sbb eax,[esi] + mov [edi],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [edi+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [edi+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [edi+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [edi+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [edi+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [edi+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [edi+28],eax + sbb eax,eax + neg eax + } + +#else + x[0] = word_sub(x[0], y[0], &carry); + x[1] = word_sub(x[1], y[1], &carry); + x[2] = word_sub(x[2], y[2], &carry); + x[3] = word_sub(x[3], y[3], &carry); + x[4] = word_sub(x[4], y[4], &carry); + x[5] = word_sub(x[5], y[5], &carry); + x[6] = word_sub(x[6], y[6], &carry); + x[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov ebx,[z] + mov eax,[edi] + sbb eax,[esi] + mov [ebx],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [ebx+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [ebx+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [ebx+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [ebx+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [ebx+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [ebx+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [ebx+28],eax + sbb eax,eax + neg eax + } + +#else + z[0] = word_sub(x[0], y[0], &carry); + z[1] = word_sub(x[1], y[1], &carry); + z[2] = word_sub(x[2], y[2], &carry); + z[3] = word_sub(x[3], y[3], &carry); + z[4] = word_sub(x[4], y[4], &carry); + z[5] = word_sub(x[5], y[5], &carry); + z[6] = word_sub(x[6], y[6], &carry); + z[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [esi+28],eax //load a + + mov eax,edx //store carry + } + +#else + x[0] = word_madd2(x[0], y, &carry); + x[1] = word_madd2(x[1], y, &carry); + x[2] = word_madd2(x[2], y, &carry); + x[3] = word_madd2(x[3], y, &carry); + x[4] = word_madd2(x[4], y, &carry); + x[5] = word_madd2(x[5], y, &carry); + x[6] = word_madd2(x[6], y, &carry); + x[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM) + + __asm { + mov edi,[z] + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [edi+28],eax //load a + mov eax,edx //store carry + } + +#else + z[0] = word_madd2(x[0], y, &carry); + z[1] = word_madd2(x[1], y, &carry); + z[2] = word_madd2(x[2], y, &carry); + z[3] = word_madd2(x[3], y, &carry); + z[4] = word_madd2(x[4], y, &carry); + z[5] = word_madd2(x[5], y, &carry); + z[6] = word_madd2(x[6], y, &carry); + z[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += x * y +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = *w0; + *w0 = word_madd2(x, y, &carry); + *w1 += carry; + *w2 += (*w1 < carry) ? 1 : 0; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += 2 * x * y +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = 0; + x = word_madd2(x, y, &carry); + y = carry; + + word top = (y >> (BOTAN_MP_WORD_BITS-1)); + y <<= 1; + y |= (x >> (BOTAN_MP_WORD_BITS-1)); + x <<= 1; + + carry = 0; + *w0 = word_add(*w0, x, &carry); + *w1 = word_add(*w1, y, &carry); + *w2 = word_add(*w2, top, &carry); +#endif + } + +#if defined(ASM) + #undef ASM + #undef DO_8_TIMES + #undef ADD_OR_SUBTRACT + #undef ADDSUB2_OP + #undef ADDSUB3_OP + #undef LINMUL_OP + #undef MULADD_OP +#endif + +} + +#endif diff --git a/src/lib/math/mp/mp_asm.cpp b/src/lib/math/mp/mp_core.cpp index cfbb027d7..2a0b08f67 100644 --- a/src/lib/math/mp/mp_asm.cpp +++ b/src/lib/math/mp/mp_core.cpp @@ -8,7 +8,6 @@ #include <botan/internal/mp_core.h> #include <botan/internal/mp_asmi.h> -#include <botan/internal/mp_core.h> #include <botan/internal/ct_utils.h> #include <botan/exceptn.h> #include <botan/mem_ops.h> @@ -253,4 +252,189 @@ void bigint_linmul3(word z[], const word x[], size_t x_size, word y) z[x_size] = carry; } +/* +* Single Operand Left Shift +*/ +void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) + { + if(word_shift) + { + copy_mem(x + word_shift, x, x_size); + clear_mem(x, word_shift); + } + + if(bit_shift) + { + word carry = 0; + for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) + { + word temp = x[j]; + x[j] = (temp << bit_shift) | carry; + carry = (temp >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Single Operand Right Shift +*/ +void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) + { + if(x_size < word_shift) + { + clear_mem(x, x_size); + return; + } + + if(word_shift) + { + copy_mem(x, x + word_shift, x_size - word_shift); + clear_mem(x + x_size - word_shift, word_shift); + } + + if(bit_shift) + { + word carry = 0; + + size_t top = x_size - word_shift; + + while(top >= 4) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-2]; + x[top-2] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-3]; + x[top-3] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-4]; + x[top-4] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top -= 4; + } + + while(top) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top--; + } + } + } + +/* +* Two Operand Left Shift +*/ +void bigint_shl2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + for(size_t j = 0; j != x_size; ++j) + y[j + word_shift] = x[j]; + if(bit_shift) + { + word carry = 0; + for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) + { + word w = y[j]; + y[j] = (w << bit_shift) | carry; + carry = (w >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Two Operand Right Shift +*/ +void bigint_shr2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + if(x_size < word_shift) return; + + for(size_t j = 0; j != x_size - word_shift; ++j) + y[j] = x[j + word_shift]; + if(bit_shift) + { + word carry = 0; + for(size_t j = x_size - word_shift; j > 0; --j) + { + word w = y[j-1]; + y[j-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Compare two MP integers +*/ +s32bit bigint_cmp(const word x[], size_t x_size, + const word y[], size_t y_size) + { + if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } + + while(x_size > y_size) + { + if(x[x_size-1]) + return 1; + x_size--; + } + + for(size_t i = x_size; i > 0; --i) + { + if(x[i-1] > y[i-1]) + return 1; + if(x[i-1] < y[i-1]) + return -1; + } + + return 0; + } + +/* +* Do a 2-word/1-word Division +*/ +word bigint_divop(word n1, word n0, word d) + { + if(d == 0) + throw Invalid_Argument("bigint_divop divide by zero"); + + word high = n1 % d, quotient = 0; + + for(size_t i = 0; i != MP_WORD_BITS; ++i) + { + word high_top_bit = (high & MP_WORD_TOP_BIT); + + high <<= 1; + high |= (n0 >> (MP_WORD_BITS-1-i)) & 1; + quotient <<= 1; + + if(high_top_bit || high >= d) + { + high -= d; + quotient |= 1; + } + } + + return quotient; + } + +/* +* Do a 2-word/1-word Modulo +*/ +word bigint_modop(word n1, word n0, word d) + { + word z = bigint_divop(n1, n0, d); + word dummy = 0; + z = word_madd2(z, d, &dummy); + return (n0-z); + } + } diff --git a/src/lib/math/mp/mp_generic/info.txt b/src/lib/math/mp/mp_generic/info.txt deleted file mode 100644 index c87dd00ca..000000000 --- a/src/lib/math/mp/mp_generic/info.txt +++ /dev/null @@ -1,6 +0,0 @@ -load_on dep - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> diff --git a/src/lib/math/mp/mp_generic/mp_asmi.h b/src/lib/math/mp/mp_generic/mp_asmi.h deleted file mode 100644 index 708afdfa0..000000000 --- a/src/lib/math/mp/mp_generic/mp_asmi.h +++ /dev/null @@ -1,203 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - x[0] = word_add(x[0], y[0], &carry); - x[1] = word_add(x[1], y[1], &carry); - x[2] = word_add(x[2], y[2], &carry); - x[3] = word_add(x[3], y[3], &carry); - x[4] = word_add(x[4], y[4], &carry); - x[5] = word_add(x[5], y[5], &carry); - x[6] = word_add(x[6], y[6], &carry); - x[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_add(x[0], y[0], &carry); - z[1] = word_add(x[1], y[1], &carry); - z[2] = word_add(x[2], y[2], &carry); - z[3] = word_add(x[3], y[3], &carry); - z[4] = word_add(x[4], y[4], &carry); - z[5] = word_add(x[5], y[5], &carry); - z[6] = word_add(x[6], y[6], &carry); - z[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - x[0] = word_sub(x[0], y[0], &carry); - x[1] = word_sub(x[1], y[1], &carry); - x[2] = word_sub(x[2], y[2], &carry); - x[3] = word_sub(x[3], y[3], &carry); - x[4] = word_sub(x[4], y[4], &carry); - x[5] = word_sub(x[5], y[5], &carry); - x[6] = word_sub(x[6], y[6], &carry); - x[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_sub(x[0], y[0], &carry); - z[1] = word_sub(x[1], y[1], &carry); - z[2] = word_sub(x[2], y[2], &carry); - z[3] = word_sub(x[3], y[3], &carry); - z[4] = word_sub(x[4], y[4], &carry); - z[5] = word_sub(x[5], y[5], &carry); - z[6] = word_sub(x[6], y[6], &carry); - z[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - x[0] = word_madd2(x[0], y, &carry); - x[1] = word_madd2(x[1], y, &carry); - x[2] = word_madd2(x[2], y, &carry); - x[3] = word_madd2(x[3], y, &carry); - x[4] = word_madd2(x[4], y, &carry); - x[5] = word_madd2(x[5], y, &carry); - x[6] = word_madd2(x[6], y, &carry); - x[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd2(x[0], y, &carry); - z[1] = word_madd2(x[1], y, &carry); - z[2] = word_madd2(x[2], y, &carry); - z[3] = word_madd2(x[3], y, &carry); - z[4] = word_madd2(x[4], y, &carry); - z[5] = word_madd2(x[5], y, &carry); - z[6] = word_madd2(x[6], y, &carry); - z[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_generic/mp_madd.h b/src/lib/math/mp/mp_madd.h index 95a1069a4..0567622d9 100644 --- a/src/lib/math/mp/mp_generic/mp_madd.h +++ b/src/lib/math/mp/mp_madd.h @@ -35,12 +35,52 @@ namespace Botan { #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 #endif +#if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) + + #if defined(BOTAN_USE_GCC_INLINE_ASM) + #define BOTAN_MP_USE_X86_32_ASM + #define ASM(x) x "\n\t" + #elif defined(BOTAN_TARGET_COMPILER_IS_MSVC) + #define BOTAN_MP_USE_X86_32_MSVC_ASM + #endif + +#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) && (BOTAN_USE_GCC_INLINE_ASM) + #define BOTAN_MP_USE_X86_64_ASM + #define ASM(x) x "\n\t" +#endif + +#if defined(BOTAN_MP_USE_X86_32_ASM) || defined(BOTAN_MP_USE_X86_64_ASM) + #define ASM(x) x "\n\t" +#endif + /* * Word Multiply/Add */ inline word word_madd2(word a, word b, word* c) { -#if defined(BOTAN_HAS_MP_DWORD) +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[b]") + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + ASM("mulq %[b]") + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) const dword s = static_cast<dword>(a) * b + *c; *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); return static_cast<word>(s); @@ -64,7 +104,37 @@ inline word word_madd2(word a, word b, word* c) */ inline word word_madd3(word a, word b, word c, word* d) { -#if defined(BOTAN_HAS_MP_DWORD) +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ASM("mull %[b]") + + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + ASM("addl %[d],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + ASM("mulq %[b]") + + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + ASM("addq %[d],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) const dword s = static_cast<dword>(a) * b + c + *d; *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); return static_cast<word>(s); @@ -86,6 +156,10 @@ inline word word_madd3(word a, word b, word c, word* d) #endif } +#if defined(ASM) + #undef ASM +#endif + } #endif diff --git a/src/lib/math/mp/mp_misc.cpp b/src/lib/math/mp/mp_misc.cpp deleted file mode 100644 index 768543a64..000000000 --- a/src/lib/math/mp/mp_misc.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* -* MP Misc Functions -* (C) 1999-2008 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#include <botan/internal/mp_core.h> -#include <botan/internal/mp_madd.h> -#include <botan/exceptn.h> - -namespace Botan { - -/* -* Compare two MP integers -*/ -s32bit bigint_cmp(const word x[], size_t x_size, - const word y[], size_t y_size) - { - if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } - - while(x_size > y_size) - { - if(x[x_size-1]) - return 1; - x_size--; - } - - for(size_t i = x_size; i > 0; --i) - { - if(x[i-1] > y[i-1]) - return 1; - if(x[i-1] < y[i-1]) - return -1; - } - - return 0; - } - -/* -* Do a 2-word/1-word Division -*/ -word bigint_divop(word n1, word n0, word d) - { - if(d == 0) - throw Invalid_Argument("bigint_divop divide by zero"); - - word high = n1 % d, quotient = 0; - - for(size_t i = 0; i != MP_WORD_BITS; ++i) - { - word high_top_bit = (high & MP_WORD_TOP_BIT); - - high <<= 1; - high |= (n0 >> (MP_WORD_BITS-1-i)) & 1; - quotient <<= 1; - - if(high_top_bit || high >= d) - { - high -= d; - quotient |= 1; - } - } - - return quotient; - } - -/* -* Do a 2-word/1-word Modulo -*/ -word bigint_modop(word n1, word n0, word d) - { - word z = bigint_divop(n1, n0, d); - word dummy = 0; - z = word_madd2(z, d, &dummy); - return (n0-z); - } - -} diff --git a/src/lib/math/mp/mp_shift.cpp b/src/lib/math/mp/mp_shift.cpp deleted file mode 100644 index 1850888a0..000000000 --- a/src/lib/math/mp/mp_shift.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* -* MP Shift Algorithms -* (C) 1999-2007,2014 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> - -namespace Botan { - -/* -* Single Operand Left Shift -*/ -void bigint_shl1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) - { - if(word_shift) - { - copy_mem(x + word_shift, x, x_size); - clear_mem(x, word_shift); - } - - if(bit_shift) - { - word carry = 0; - for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) - { - word temp = x[j]; - x[j] = (temp << bit_shift) | carry; - carry = (temp >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Single Operand Right Shift -*/ -void bigint_shr1(word x[], size_t x_size, size_t word_shift, size_t bit_shift) - { - if(x_size < word_shift) - { - clear_mem(x, x_size); - return; - } - - if(word_shift) - { - copy_mem(x, x + word_shift, x_size - word_shift); - clear_mem(x + x_size - word_shift, word_shift); - } - - if(bit_shift) - { - word carry = 0; - - size_t top = x_size - word_shift; - - while(top >= 4) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-2]; - x[top-2] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-3]; - x[top-3] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-4]; - x[top-4] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top -= 4; - } - - while(top) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top--; - } - } - } - -/* -* Two Operand Left Shift -*/ -void bigint_shl2(word y[], const word x[], size_t x_size, - size_t word_shift, size_t bit_shift) - { - for(size_t j = 0; j != x_size; ++j) - y[j + word_shift] = x[j]; - if(bit_shift) - { - word carry = 0; - for(size_t j = word_shift; j != x_size + word_shift + 1; ++j) - { - word w = y[j]; - y[j] = (w << bit_shift) | carry; - carry = (w >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Two Operand Right Shift -*/ -void bigint_shr2(word y[], const word x[], size_t x_size, - size_t word_shift, size_t bit_shift) - { - if(x_size < word_shift) return; - - for(size_t j = 0; j != x_size - word_shift; ++j) - y[j] = x[j + word_shift]; - if(bit_shift) - { - word carry = 0; - for(size_t j = x_size - word_shift; j > 0; --j) - { - word w = y[j-1]; - y[j-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - } - } - } - -} diff --git a/src/lib/math/mp/mp_x86_32/info.txt b/src/lib/math/mp/mp_x86_32/info.txt deleted file mode 100644 index f36abaf62..000000000 --- a/src/lib/math/mp/mp_x86_32/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 32 - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_32 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/lib/math/mp/mp_x86_32/mp_asmi.h b/src/lib/math/mp/mp_x86_32/mp_asmi.h deleted file mode 100644 index 95af89fc0..000000000 --- a/src/lib/math/mp/mp_x86_32/mp_asmi.h +++ /dev/null @@ -1,236 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Helper Macros for x86 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ - ASM("movl %[carry], 4*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("addl 4*" #INDEX "(%[z]),%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorl %[carry]") \ - CORE_CODE \ - ASM("sbbl %[carry],%[carry]") \ - ASM("negl %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_32/mp_madd.h b/src/lib/math/mp/mp_x86_32/mp_madd.h deleted file mode 100644 index 9c0990398..000000000 --- a/src/lib/math/mp/mp_x86_32/mp_madd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_WORD_MULADD_H__ -#define BOTAN_MP_WORD_MULADD_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 32) - #error The mp_x86_32 module requires that BOTAN_MP_WORD_BITS == 32 -#endif - -namespace Botan { - -/* -* Helper Macros for x86 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mull %[b]") - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mull %[b]") - - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - ASM("addl %[d],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_32_msvc/info.txt b/src/lib/math/mp/mp_x86_32_msvc/info.txt deleted file mode 100644 index 3029d6a61..000000000 --- a/src/lib/math/mp/mp_x86_32_msvc/info.txt +++ /dev/null @@ -1,16 +0,0 @@ -mp_bits 32 - -load_on dep - -<header:internal> -mp_generic:mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_32 -</arch> - -<cc> -msvc -</cc> diff --git a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h b/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h deleted file mode 100644 index 92bf7980d..000000000 --- a/src/lib/math/mp/mp_x86_32_msvc/mp_asmi.h +++ /dev/null @@ -1,454 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - __asm { - mov edx,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[esi] - adc [edx],eax - mov eax,[esi+4] - adc [edx+4],eax - mov eax,[esi+8] - adc [edx+8],eax - mov eax,[esi+12] - adc [edx+12],eax - mov eax,[esi+16] - adc [edx+16],eax - mov eax,[esi+20] - adc [edx+20],eax - mov eax,[esi+24] - adc [edx+24],eax - mov eax,[esi+28] - adc [edx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - mov ebx,[z] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - adc eax,[esi] - mov [ebx],eax - - mov eax,[edi+4] - adc eax,[esi+4] - mov [ebx+4],eax - - mov eax,[edi+8] - adc eax,[esi+8] - mov [ebx+8],eax - - mov eax,[edi+12] - adc eax,[esi+12] - mov [ebx+12],eax - - mov eax,[edi+16] - adc eax,[esi+16] - mov [ebx+16],eax - - mov eax,[edi+20] - adc eax,[esi+20] - mov [ebx+20],eax - - mov eax,[edi+24] - adc eax,[esi+24] - mov [ebx+24],eax - - mov eax,[edi+28] - adc eax,[esi+28] - mov [ebx+28],eax - - sbb eax,eax - neg eax - } - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - sbb eax,[esi] - mov [edi],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [edi+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [edi+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [edi+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [edi+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [edi+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [edi+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [edi+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov ebx,[z] - mov eax,[edi] - sbb eax,[esi] - mov [ebx],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [ebx+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [ebx+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [ebx+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [ebx+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [ebx+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [ebx+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [ebx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - __asm { - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [esi+28],eax //load a - - mov eax,edx //store carry - } - } - -inline word word8_linmul3(word z[4], const word x[4], word y, word carry) - { - __asm { -#if 0 - //it's slower!!! - mov edx,[z] - mov eax,[x] - movd mm7,[y] - - movd mm0,[eax] - movd mm1,[eax+4] - movd mm2,[eax+8] - pmuludq mm0,mm7 - pmuludq mm1,mm7 - pmuludq mm2,mm7 - - movd mm6,[carry] - paddq mm0,mm6 - movd [edx],mm0 - - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+4],mm1 - - movd mm3,[eax+12] - psrlq mm1,32 - paddq mm2,mm1 - movd [edx+8],mm2 - - pmuludq mm3,mm7 - movd mm4,[eax+16] - psrlq mm2,32 - paddq mm3,mm2 - movd [edx+12],mm3 - - pmuludq mm4,mm7 - movd mm5,[eax+20] - psrlq mm3,32 - paddq mm4,mm3 - movd [edx+16],mm4 - - pmuludq mm5,mm7 - movd mm0,[eax+24] - psrlq mm4,32 - paddq mm5,mm4 - movd [edx+20],mm5 - - pmuludq mm0,mm7 - movd mm1,[eax+28] - psrlq mm5,32 - paddq mm0,mm5 - movd [edx+24],mm0 - - pmuludq mm1,mm7 - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+28],mm1 - psrlq mm1,32 - - movd eax,mm1 - emms -#else - mov edi,[z] - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [edi+28],eax //load a - mov eax,edx //store carry -#endif - } - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_64/info.txt b/src/lib/math/mp/mp_x86_64/info.txt deleted file mode 100644 index 75c42ddc1..000000000 --- a/src/lib/math/mp/mp_x86_64/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 64 - -<header:internal> -mp_madd.h -mp_asmi.h -</header:internal> - -<arch> -x86_64 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/lib/math/mp/mp_x86_64/mp_asmi.h b/src/lib/math/mp/mp_x86_64/mp_asmi.h deleted file mode 100644 index cd5884867..000000000 --- a/src/lib/math/mp/mp_x86_64/mp_asmi.h +++ /dev/null @@ -1,244 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_madd.h> - -namespace Botan { - -/* -* Helper Macros for x86-64 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ - ASM("movq %[carry], 8*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("addq 8*" #INDEX "(%[z]),%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorq %[carry]") \ - CORE_CODE \ - ASM("sbbq %[carry],%[carry]") \ - ASM("negq %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -#undef ASM -#undef DO_8_TIMES -#undef ADD_OR_SUBTRACT -#undef ADDSUB2_OP -#undef ADDSUB3_OP -#undef LINMUL_OP -#undef MULADD_OP - -} - -#endif diff --git a/src/lib/math/mp/mp_x86_64/mp_madd.h b/src/lib/math/mp/mp_x86_64/mp_madd.h deleted file mode 100644 index 6f9185dc0..000000000 --- a/src/lib/math/mp/mp_x86_64/mp_madd.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_MP_WORD_MULADD_H__ -#define BOTAN_MP_WORD_MULADD_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_x86_64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -namespace Botan { - -/* -* Helper Macros for x86-64 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mulq %[b]") - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mulq %[b]") - - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - ASM("addq %[d],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -#undef ASM - -} - -#endif diff --git a/src/lib/utils/cpuid.cpp b/src/lib/utils/cpuid.cpp index 695a28550..d3def91ed 100644 --- a/src/lib/utils/cpuid.cpp +++ b/src/lib/utils/cpuid.cpp @@ -159,14 +159,12 @@ bool altivec_check_pvr_emul() bool CPUID::has_simd_32() { -#if defined(BOTAN_HAS_SIMD_SSE2) +#if defined(BOTAN_TARGET_SUPPORTS_SSE2) return CPUID::has_sse2(); -#elif defined(BOTAN_HAS_SIMD_ALTIVEC) +#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) return CPUID::has_altivec(); -#elif defined(BOTAN_HAS_SIMD_SCALAR) - return true; #else - return false; + return true; #endif } diff --git a/src/lib/utils/simd/info.txt b/src/lib/utils/simd/info.txt index 35620c940..6b9e381fa 100644 --- a/src/lib/utils/simd/info.txt +++ b/src/lib/utils/simd/info.txt @@ -3,7 +3,3 @@ define SIMD_32 20131128 <header:internal> simd_32.h </header:internal> - -<requires> -simd_sse2|simd_altivec|simd_scalar -</requires> diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 265e347a9..351146f22 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -1,6 +1,6 @@ /* * Lightweight wrappers for SIMD operations -* (C) 2009,2011 Jack Lloyd +* (C) 2009,2011,2016 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -9,22 +9,470 @@ #define BOTAN_SIMD_32_H__ #include <botan/types.h> +#include <botan/loadstor.h> +#include <botan/bswap.h> -#if defined(BOTAN_HAS_SIMD_SSE2) - #include <botan/internal/simd_sse2.h> - namespace Botan { typedef SIMD_SSE2 SIMD_32; } +#if defined(BOTAN_TARGET_SUPPORTS_SSE2) && 0 + #include <emmintrin.h> + #define BOTAN_SIMD_USE_SSE2 -#elif defined(BOTAN_HAS_SIMD_ALTIVEC) - #include <botan/internal/simd_altivec.h> - namespace Botan { typedef SIMD_Altivec SIMD_32; } +#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) + #include <altivec.h> + #undef vector + #undef bool + #define BOTAN_SIMD_USE_ALTIVEC +#endif + +// TODO: NEON support + +namespace Botan { + +/** +* This class is not a general purpose SIMD type, and only offers +* instructions needed for evaluation of specific crypto primitives. +* For example it does not currently have equality operators of any +* kind. +*/ +class SIMD_4x32 + { + public: -#elif defined(BOTAN_HAS_SIMD_SCALAR) - #include <botan/internal/simd_scalar.h> - namespace Botan { typedef SIMD_Scalar<u32bit,4> SIMD_32; } + SIMD_4x32() // zero initialized + { +#if defined(BOTAN_SIMD_USE_SSE2) || defined(BOTAN_SIMD_USE_ALTIVEC) + ::memset(&m_reg, 0, sizeof(m_reg)); +#else + ::memset(m_reg, 0, sizeof(m_reg)); +#endif + } + explicit SIMD_4x32(const u32bit B[4]) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = (__vector unsigned int){B[0], B[1], B[2], B[3]}; #else - #error "No SIMD module defined" + m_reg[0] = B[0]; + m_reg[1] = B[1]; + m_reg[2] = B[2]; + m_reg[3] = B[3]; +#endif + } + + SIMD_4x32(u32bit B0, u32bit B1, u32bit B2, u32bit B3) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_set_epi32(B0, B1, B2, B3); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = (__vector unsigned int){B0, B1, B2, B3}; +#else + m_reg[0] = B0; + m_reg[1] = B1; + m_reg[2] = B2; + m_reg[3] = B3; +#endif + } + + explicit SIMD_4x32(u32bit B) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_set1_epi32(B); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = (__vector unsigned int){B, B, B, B}; +#else + m_reg[0] = B; + m_reg[1] = B; + m_reg[2] = B; + m_reg[3] = B; +#endif + } + + static SIMD_4x32 load_le(const void* in) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const u32bit* in_32 = static_cast<const u32bit*>(in); + __vector unsigned int R0 = vec_ld(0, in_32); + __vector unsigned int R1 = vec_ld(12, in_32); + + __vector unsigned char perm = vec_lvsl(0, in_32); + + perm = vec_xor(perm, vec_splat_u8(3)); + + R0 = vec_perm(R0, R1, perm); + + return SIMD_4x32(R0); +#else + SIMD_4x32 out; + Botan::load_le(out.m_reg, static_cast<const uint8_t*>(in), 4); + return out; +#endif + } + + static SIMD_4x32 load_be(const void* in) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return load_le(in).bswap(); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const u32bit* in_32 = static_cast<const u32bit*>(in); + + __vector unsigned int R0 = vec_ld(0, in_32); + __vector unsigned int R1 = vec_ld(12, in_32); + + __vector unsigned char perm = vec_lvsl(0, in_32); + + R0 = vec_perm(R0, R1, perm); + + return SIMD_4x32(R0); + +#else + SIMD_4x32 out; + Botan::load_be(out.m_reg, static_cast<const uint8_t*>(in), 4); + return out; +#endif + } + + void store_le(uint8_t out[]) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_reg); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + __vector unsigned char perm = vec_lvsl(0, static_cast<u32bit*>(nullptr)); + + perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector + + union { + __vector unsigned int V; + u32bit R[4]; + } vec; + + vec.V = vec_perm(m_reg, m_reg, perm); + + Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]); +#else + Botan::store_le(out, m_reg[0], m_reg[1], m_reg[2], m_reg[3]); +#endif + } + + void store_be(uint8_t out[]) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + bswap().store_le(out); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + + union { + __vector unsigned int V; + u32bit R[4]; + } vec; + + vec.V = m_reg; + + Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]); +#else + Botan::store_be(out, m_reg[0], m_reg[1], m_reg[2], m_reg[3]); +#endif + } + + void rotate_left(size_t rot) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_or_si128(_mm_slli_epi32(m_reg, static_cast<int>(rot)), + _mm_srli_epi32(m_reg, static_cast<int>(32-rot))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const unsigned int r = static_cast<unsigned int>(rot); + m_reg = vec_rl(m_reg, (__vector unsigned int){r, r, r, r}); + +#else + m_reg[0] = Botan::rotate_left(m_reg[0], rot); + m_reg[1] = Botan::rotate_left(m_reg[1], rot); + m_reg[2] = Botan::rotate_left(m_reg[2], rot); + m_reg[3] = Botan::rotate_left(m_reg[3], rot); +#endif + } + + void rotate_right(size_t rot) + { + rotate_left(32 - rot); + } + + void operator+=(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_add_epi32(m_reg, other.m_reg); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = vec_add(m_reg, other.m_reg); +#else + m_reg[0] += other.m_reg[0]; + m_reg[1] += other.m_reg[1]; + m_reg[2] += other.m_reg[2]; + m_reg[3] += other.m_reg[3]; +#endif + } + + SIMD_4x32 operator+(const SIMD_4x32& other) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_add_epi32(m_reg, other.m_reg)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + return SIMD_4x32(vec_add(m_reg, other.m_reg)); +#else + return SIMD_4x32(m_reg[0] + other.m_reg[0], + m_reg[1] + other.m_reg[1], + m_reg[2] + other.m_reg[2], + m_reg[3] + other.m_reg[3]); +#endif + } + + void operator-=(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_sub_epi32(m_reg, other.m_reg); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = vec_sub(m_reg, other.m_reg); +#else + m_reg[0] -= other.m_reg[0]; + m_reg[1] -= other.m_reg[1]; + m_reg[2] -= other.m_reg[2]; + m_reg[3] -= other.m_reg[3]; #endif + } + + SIMD_4x32 operator-(const SIMD_4x32& other) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_sub_epi32(m_reg, other.m_reg)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + return SIMD_4x32(vec_sub(m_reg, other.m_reg)); +#else + return SIMD_4x32(m_reg[0] - other.m_reg[0], + m_reg[1] - other.m_reg[1], + m_reg[2] - other.m_reg[2], + m_reg[3] - other.m_reg[3]); +#endif + } + + void operator^=(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_xor_si128(m_reg, other.m_reg); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = vec_xor(m_reg, other.m_reg); +#else + m_reg[0] ^= other.m_reg[0]; + m_reg[1] ^= other.m_reg[1]; + m_reg[2] ^= other.m_reg[2]; + m_reg[3] ^= other.m_reg[3]; +#endif + } + + SIMD_4x32 operator^(const SIMD_4x32& other) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_xor_si128(m_reg, other.m_reg)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + return SIMD_4x32(vec_xor(m_reg, other.m_reg)); +#else + return SIMD_4x32(m_reg[0] ^ other.m_reg[0], + m_reg[1] ^ other.m_reg[1], + m_reg[2] ^ other.m_reg[2], + m_reg[3] ^ other.m_reg[3]); +#endif + } + + void operator|=(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_or_si128(m_reg, other.m_reg); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = vec_or(m_reg, other.m_reg); +#else + m_reg[0] |= other.m_reg[0]; + m_reg[1] |= other.m_reg[1]; + m_reg[2] |= other.m_reg[2]; + m_reg[3] |= other.m_reg[3]; +#endif + } + + SIMD_4x32 operator&(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_and_si128(m_reg, other.m_reg)); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + return SIMD_4x32(vec_and(m_reg, other.m_reg)); +#else + return SIMD_4x32(m_reg[0] & other.m_reg[0], + m_reg[1] & other.m_reg[1], + m_reg[2] & other.m_reg[2], + m_reg[3] & other.m_reg[3]); +#endif + } + + void operator&=(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + m_reg = _mm_and_si128(m_reg, other.m_reg); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + m_reg = vec_and(m_reg, other.m_reg); +#else + m_reg[0] &= other.m_reg[0]; + m_reg[1] &= other.m_reg[1]; + m_reg[2] &= other.m_reg[2]; + m_reg[3] &= other.m_reg[3]; +#endif + } + + SIMD_4x32 operator<<(size_t shift) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_slli_epi32(m_reg, static_cast<int>(shift))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const unsigned int s = static_cast<unsigned int>(shift); + return SIMD_4x32(vec_sl(m_reg, (__vector unsigned int){s, s, s, s})); +#else + return SIMD_4x32(m_reg[0] << shift, + m_reg[1] << shift, + m_reg[2] << shift, + m_reg[3] << shift); +#endif + } + + SIMD_4x32 operator>>(size_t shift) const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_srli_epi32(m_reg, static_cast<int>(shift))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const unsigned int s = static_cast<unsigned int>(shift); + return SIMD_4x32(vec_sr(m_reg, (__vector unsigned int){s, s, s, s})); +#else + return SIMD_4x32(m_reg[0] >> shift, + m_reg[1] >> shift, + m_reg[2] >> shift, + m_reg[3] >> shift); + +#endif + } + + SIMD_4x32 operator~() const + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_xor_si128(m_reg, _mm_set1_epi32(0xFFFFFFFF))); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + return SIMD_4x32(vec_nor(m_reg, m_reg)); +#else + return SIMD_4x32(~m_reg[0], + ~m_reg[1], + ~m_reg[2], + ~m_reg[3]); +#endif + } + + // (~reg) & other + SIMD_4x32 andc(const SIMD_4x32& other) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_andnot_si128(m_reg, other.m_reg)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + /* + AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2 + so swap the arguments + */ + return SIMD_4x32(vec_andc(other.m_reg, m_reg)); +#else + return SIMD_4x32((~m_reg[0]) & other.m_reg[0], + (~m_reg[1]) & other.m_reg[1], + (~m_reg[2]) & other.m_reg[2], + (~m_reg[3]) & other.m_reg[3]); +#endif + } + + SIMD_4x32 bswap() const + { +#if defined(BOTAN_SIMD_USE_SSE2) + __m128i T = m_reg; + + T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); + + return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), + _mm_slli_epi16(T, 8))); + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + + __vector unsigned char perm = vec_lvsl(0, static_cast<u32bit*>(nullptr)); + + perm = vec_xor(perm, vec_splat_u8(3)); + + return SIMD_4x32(vec_perm(m_reg, m_reg, perm)); +#else + return SIMD_4x32(reverse_bytes(m_reg[0]), + reverse_bytes(m_reg[1]), + reverse_bytes(m_reg[2]), + reverse_bytes(m_reg[3])); +#endif + } + + static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, + SIMD_4x32& B2, SIMD_4x32& B3) + { +#if defined(BOTAN_SIMD_USE_SSE2) + __m128i T0 = _mm_unpacklo_epi32(B0.m_reg, B1.m_reg); + __m128i T1 = _mm_unpacklo_epi32(B2.m_reg, B3.m_reg); + __m128i T2 = _mm_unpackhi_epi32(B0.m_reg, B1.m_reg); + __m128i T3 = _mm_unpackhi_epi32(B2.m_reg, B3.m_reg); + B0.m_reg = _mm_unpacklo_epi64(T0, T1); + B1.m_reg = _mm_unpackhi_epi64(T0, T1); + B2.m_reg = _mm_unpacklo_epi64(T2, T3); + B3.m_reg = _mm_unpackhi_epi64(T2, T3); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + __vector unsigned int T0 = vec_mergeh(B0.m_reg, B2.m_reg); + __vector unsigned int T1 = vec_mergel(B0.m_reg, B2.m_reg); + __vector unsigned int T2 = vec_mergeh(B1.m_reg, B3.m_reg); + __vector unsigned int T3 = vec_mergel(B1.m_reg, B3.m_reg); + + B0.m_reg = vec_mergeh(T0, T2); + B1.m_reg = vec_mergel(T0, T2); + B2.m_reg = vec_mergeh(T1, T3); + B3.m_reg = vec_mergel(T1, T3); +#else + SIMD_4x32 T0(B0.m_reg[0], B1.m_reg[0], B2.m_reg[0], B3.m_reg[0]); + SIMD_4x32 T1(B0.m_reg[1], B1.m_reg[1], B2.m_reg[1], B3.m_reg[1]); + SIMD_4x32 T2(B0.m_reg[2], B1.m_reg[2], B2.m_reg[2], B3.m_reg[2]); + SIMD_4x32 T3(B0.m_reg[3], B1.m_reg[3], B2.m_reg[3], B3.m_reg[3]); + + B0 = T0; + B1 = T1; + B2 = T2; + B3 = T3; +#endif + } + + private: +#if defined(BOTAN_SIMD_USE_SSE2) + explicit SIMD_4x32(__m128i in) { m_reg = in; } +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + explicit SIMD_4x32(__vector unsigned int input) { m_reg = input; } +#endif + +#if defined(BOTAN_SIMD_USE_SSE2) + __m128i m_reg; +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + __vector unsigned int m_reg; +#else + uint32_t m_reg[4]; +#endif + }; + +typedef SIMD_4x32 SIMD_32; + +} #endif diff --git a/src/lib/utils/simd/simd_altivec/info.txt b/src/lib/utils/simd/simd_altivec/info.txt deleted file mode 100644 index 19168a928..000000000 --- a/src/lib/utils/simd/simd_altivec/info.txt +++ /dev/null @@ -1,9 +0,0 @@ -define SIMD_ALTIVEC 20131128 - -need_isa altivec - -load_on dep - -<header:internal> -simd_altivec.h -</header:internal> diff --git a/src/lib/utils/simd/simd_altivec/simd_altivec.h b/src/lib/utils/simd/simd_altivec/simd_altivec.h deleted file mode 100644 index 3963f2817..000000000 --- a/src/lib/utils/simd/simd_altivec/simd_altivec.h +++ /dev/null @@ -1,213 +0,0 @@ -/* -* Lightweight wrappers around AltiVec for 32-bit operations -* (C) 2009 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_SIMD_ALTIVEC_H__ -#define BOTAN_SIMD_ALTIVEC_H__ - -#if defined(BOTAN_TARGET_SUPPORTS_ALTIVEC) - -#include <botan/loadstor.h> -#include <botan/cpuid.h> - -#include <altivec.h> -#undef vector -#undef bool - -namespace Botan { - -class SIMD_Altivec - { - public: - SIMD_Altivec(const u32bit B[4]) - { - m_reg = (__vector unsigned int){B[0], B[1], B[2], B[3]}; - } - - SIMD_Altivec(u32bit B0, u32bit B1, u32bit B2, u32bit B3) - { - m_reg = (__vector unsigned int){B0, B1, B2, B3}; - } - - SIMD_Altivec(u32bit B) - { - m_reg = (__vector unsigned int){B, B, B, B}; - } - - static SIMD_Altivec load_le(const void* in) - { - const u32bit* in_32 = static_cast<const u32bit*>(in); - - __vector unsigned int R0 = vec_ld(0, in_32); - __vector unsigned int R1 = vec_ld(12, in_32); - - __vector unsigned char perm = vec_lvsl(0, in_32); - - perm = vec_xor(perm, vec_splat_u8(3)); - - R0 = vec_perm(R0, R1, perm); - - return SIMD_Altivec(R0); - } - - static SIMD_Altivec load_be(const void* in) - { - const u32bit* in_32 = static_cast<const u32bit*>(in); - - __vector unsigned int R0 = vec_ld(0, in_32); - __vector unsigned int R1 = vec_ld(12, in_32); - - __vector unsigned char perm = vec_lvsl(0, in_32); - - R0 = vec_perm(R0, R1, perm); - - return SIMD_Altivec(R0); - } - - void store_le(byte out[]) const - { - __vector unsigned char perm = vec_lvsl(0, static_cast<u32bit*>(nullptr)); - - perm = vec_xor(perm, vec_splat_u8(3)); - - union { - __vector unsigned int V; - u32bit R[4]; - } vec; - - vec.V = vec_perm(m_reg, m_reg, perm); - - Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]); - } - - void store_be(byte out[]) const - { - union { - __vector unsigned int V; - u32bit R[4]; - } vec; - - vec.V = m_reg; - - Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]); - } - - void rotate_left(size_t rot) - { - const unsigned int r = static_cast<unsigned int>(rot); - m_reg = vec_rl(m_reg, (__vector unsigned int){r, r, r, r}); - } - - void rotate_right(size_t rot) - { - rotate_left(32 - rot); - } - - void operator+=(const SIMD_Altivec& other) - { - m_reg = vec_add(m_reg, other.m_reg); - } - - SIMD_Altivec operator+(const SIMD_Altivec& other) const - { - return vec_add(m_reg, other.m_reg); - } - - void operator-=(const SIMD_Altivec& other) - { - m_reg = vec_sub(m_reg, other.m_reg); - } - - SIMD_Altivec operator-(const SIMD_Altivec& other) const - { - return vec_sub(m_reg, other.m_reg); - } - - void operator^=(const SIMD_Altivec& other) - { - m_reg = vec_xor(m_reg, other.m_reg); - } - - SIMD_Altivec operator^(const SIMD_Altivec& other) const - { - return vec_xor(m_reg, other.m_reg); - } - - void operator|=(const SIMD_Altivec& other) - { - m_reg = vec_or(m_reg, other.m_reg); - } - - SIMD_Altivec operator&(const SIMD_Altivec& other) - { - return vec_and(m_reg, other.m_reg); - } - - void operator&=(const SIMD_Altivec& other) - { - m_reg = vec_and(m_reg, other.m_reg); - } - - SIMD_Altivec operator<<(size_t shift) const - { - const unsigned int s = static_cast<unsigned int>(shift); - return vec_sl(m_reg, (__vector unsigned int){s, s, s, s}); - } - - SIMD_Altivec operator>>(size_t shift) const - { - const unsigned int s = static_cast<unsigned int>(shift); - return vec_sr(m_reg, (__vector unsigned int){s, s, s, s}); - } - - SIMD_Altivec operator~() const - { - return vec_nor(m_reg, m_reg); - } - - SIMD_Altivec andc(const SIMD_Altivec& other) - { - /* - AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2 - so swap the arguments - */ - return vec_andc(other.m_reg, m_reg); - } - - SIMD_Altivec bswap() const - { - __vector unsigned char perm = vec_lvsl(0, static_cast<u32bit*>(nullptr)); - - perm = vec_xor(perm, vec_splat_u8(3)); - - return SIMD_Altivec(vec_perm(m_reg, m_reg, perm)); - } - - static void transpose(SIMD_Altivec& B0, SIMD_Altivec& B1, - SIMD_Altivec& B2, SIMD_Altivec& B3) - { - __vector unsigned int T0 = vec_mergeh(B0.m_reg, B2.m_reg); - __vector unsigned int T1 = vec_mergel(B0.m_reg, B2.m_reg); - __vector unsigned int T2 = vec_mergeh(B1.m_reg, B3.m_reg); - __vector unsigned int T3 = vec_mergel(B1.m_reg, B3.m_reg); - - B0.m_reg = vec_mergeh(T0, T2); - B1.m_reg = vec_mergel(T0, T2); - B2.m_reg = vec_mergeh(T1, T3); - B3.m_reg = vec_mergel(T1, T3); - } - - private: - SIMD_Altivec(__vector unsigned int input) { m_reg = input; } - - __vector unsigned int m_reg; - }; - -} - -#endif - -#endif diff --git a/src/lib/utils/simd/simd_scalar/info.txt b/src/lib/utils/simd/simd_scalar/info.txt deleted file mode 100644 index 26a9fbfee..000000000 --- a/src/lib/utils/simd/simd_scalar/info.txt +++ /dev/null @@ -1,7 +0,0 @@ -define SIMD_SCALAR 20131128 - -load_on dep - -<header:internal> -simd_scalar.h -</header:internal> diff --git a/src/lib/utils/simd/simd_scalar/simd_scalar.h b/src/lib/utils/simd/simd_scalar/simd_scalar.h deleted file mode 100644 index 28d72c615..000000000 --- a/src/lib/utils/simd/simd_scalar/simd_scalar.h +++ /dev/null @@ -1,213 +0,0 @@ -/* -* Scalar emulation of SIMD -* (C) 2009,2013 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_SIMD_SCALAR_H__ -#define BOTAN_SIMD_SCALAR_H__ - -#include <botan/loadstor.h> -#include <botan/bswap.h> - -namespace Botan { - -/** -* Fake SIMD, using plain scalar operations -* Often still faster than iterative on superscalar machines -*/ -template<typename T, size_t N> -class SIMD_Scalar - { - public: - static size_t size() { return N; } - - SIMD_Scalar() { /* uninitialized */ } - - SIMD_Scalar(const T B[N]) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] = B[i]; - } - - SIMD_Scalar(T B) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] = B; - } - - static SIMD_Scalar<T,N> load_le(const void* in) - { - SIMD_Scalar<T,N> out; - const byte* in_b = static_cast<const byte*>(in); - - for(size_t i = 0; i != size(); ++i) - out.m_v[i] = Botan::load_le<T>(in_b, i); - - return out; - } - - static SIMD_Scalar<T,N> load_be(const void* in) - { - SIMD_Scalar<T,N> out; - const byte* in_b = static_cast<const byte*>(in); - - for(size_t i = 0; i != size(); ++i) - out.m_v[i] = Botan::load_be<T>(in_b, i); - - return out; - } - - void store_le(byte out[]) const - { - for(size_t i = 0; i != size(); ++i) - Botan::store_le(m_v[i], out + i*sizeof(T)); - } - - void store_be(byte out[]) const - { - for(size_t i = 0; i != size(); ++i) - Botan::store_be(m_v[i], out + i*sizeof(T)); - } - - void rotate_left(size_t rot) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] = Botan::rotate_left(m_v[i], rot); - } - - void rotate_right(size_t rot) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] = Botan::rotate_right(m_v[i], rot); - } - - void operator+=(const SIMD_Scalar<T,N>& other) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] += other.m_v[i]; - } - - void operator-=(const SIMD_Scalar<T,N>& other) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] -= other.m_v[i]; - } - - SIMD_Scalar<T,N> operator+(const SIMD_Scalar<T,N>& other) const - { - SIMD_Scalar<T,N> out = *this; - out += other; - return out; - } - - SIMD_Scalar<T,N> operator-(const SIMD_Scalar<T,N>& other) const - { - SIMD_Scalar<T,N> out = *this; - out -= other; - return out; - } - - void operator^=(const SIMD_Scalar<T,N>& other) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] ^= other.m_v[i]; - } - - SIMD_Scalar<T,N> operator^(const SIMD_Scalar<T,N>& other) const - { - SIMD_Scalar<T,N> out = *this; - out ^= other; - return out; - } - - void operator|=(const SIMD_Scalar<T,N>& other) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] |= other.m_v[i]; - } - - void operator&=(const SIMD_Scalar<T,N>& other) - { - for(size_t i = 0; i != size(); ++i) - m_v[i] &= other.m_v[i]; - } - - SIMD_Scalar<T,N> operator&(const SIMD_Scalar<T,N>& other) - { - SIMD_Scalar<T,N> out = *this; - out &= other; - return out; - } - - SIMD_Scalar<T,N> operator<<(size_t shift) const - { - SIMD_Scalar<T,N> out = *this; - for(size_t i = 0; i != size(); ++i) - out.m_v[i] <<= shift; - return out; - } - - SIMD_Scalar<T,N> operator>>(size_t shift) const - { - SIMD_Scalar<T,N> out = *this; - for(size_t i = 0; i != size(); ++i) - out.m_v[i] >>= shift; - return out; - } - - SIMD_Scalar<T,N> operator~() const - { - SIMD_Scalar<T,N> out = *this; - for(size_t i = 0; i != size(); ++i) - out.m_v[i] = ~out.m_v[i]; - return out; - } - - // (~reg) & other - SIMD_Scalar<T,N> andc(const SIMD_Scalar<T,N>& other) - { - SIMD_Scalar<T,N> out; - for(size_t i = 0; i != size(); ++i) - out.m_v[i] = (~m_v[i]) & other.m_v[i]; - return out; - } - - SIMD_Scalar<T,N> bswap() const - { - SIMD_Scalar<T,N> out; - for(size_t i = 0; i != size(); ++i) - out.m_v[i] = reverse_bytes(m_v[i]); - return out; - } - - static void transpose(SIMD_Scalar<T,N>& B0, SIMD_Scalar<T,N>& B1, - SIMD_Scalar<T,N>& B2, SIMD_Scalar<T,N>& B3) - { - static_assert(N == 4, "4x4 transpose"); - SIMD_Scalar<T,N> T0({B0.m_v[0], B1.m_v[0], B2.m_v[0], B3.m_v[0]}); - SIMD_Scalar<T,N> T1({B0.m_v[1], B1.m_v[1], B2.m_v[1], B3.m_v[1]}); - SIMD_Scalar<T,N> T2({B0.m_v[2], B1.m_v[2], B2.m_v[2], B3.m_v[2]}); - SIMD_Scalar<T,N> T3({B0.m_v[3], B1.m_v[3], B2.m_v[3], B3.m_v[3]}); - - B0 = T0; - B1 = T1; - B2 = T2; - B3 = T3; - } - - private: - SIMD_Scalar(std::initializer_list<T> B) - { - size_t i = 0; - for(auto v = B.begin(); v != B.end(); ++v) - m_v[i++] = *v; - } - - T m_v[N]; - }; - -} - -#endif diff --git a/src/lib/utils/simd/simd_sse2/info.txt b/src/lib/utils/simd/simd_sse2/info.txt deleted file mode 100644 index bd9e430cb..000000000 --- a/src/lib/utils/simd/simd_sse2/info.txt +++ /dev/null @@ -1,9 +0,0 @@ -define SIMD_SSE2 20131128 - -need_isa sse2 - -load_on dep - -<header:internal> -simd_sse2.h -</header:internal> diff --git a/src/lib/utils/simd/simd_sse2/simd_sse2.h b/src/lib/utils/simd/simd_sse2/simd_sse2.h deleted file mode 100644 index 551e9189c..000000000 --- a/src/lib/utils/simd/simd_sse2/simd_sse2.h +++ /dev/null @@ -1,167 +0,0 @@ -/* -* Lightweight wrappers for SSE2 intrinsics for 32-bit operations -* (C) 2009 Jack Lloyd -* -* Botan is released under the Simplified BSD License (see license.txt) -*/ - -#ifndef BOTAN_SIMD_SSE_H__ -#define BOTAN_SIMD_SSE_H__ - -#if defined(BOTAN_TARGET_SUPPORTS_SSE2) - -#include <botan/cpuid.h> -#include <emmintrin.h> - -namespace Botan { - -class SIMD_SSE2 - { - public: - explicit SIMD_SSE2(const u32bit B[4]) - { - m_reg = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B)); - } - - SIMD_SSE2(u32bit B0, u32bit B1, u32bit B2, u32bit B3) - { - m_reg = _mm_set_epi32(B0, B1, B2, B3); - } - - explicit SIMD_SSE2(u32bit B) - { - m_reg = _mm_set1_epi32(B); - } - - static SIMD_SSE2 load_le(const void* in) - { - return SIMD_SSE2(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))); - } - - static SIMD_SSE2 load_be(const void* in) - { - return load_le(in).bswap(); - } - - void store_le(byte out[]) const - { - _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_reg); - } - - void store_be(byte out[]) const - { - bswap().store_le(out); - } - - void rotate_left(size_t rot) - { - m_reg = _mm_or_si128(_mm_slli_epi32(m_reg, static_cast<int>(rot)), - _mm_srli_epi32(m_reg, static_cast<int>(32-rot))); - } - - void rotate_right(size_t rot) - { - rotate_left(32 - rot); - } - - void operator+=(const SIMD_SSE2& other) - { - m_reg = _mm_add_epi32(m_reg, other.m_reg); - } - - SIMD_SSE2 operator+(const SIMD_SSE2& other) const - { - return SIMD_SSE2(_mm_add_epi32(m_reg, other.m_reg)); - } - - void operator-=(const SIMD_SSE2& other) - { - m_reg = _mm_sub_epi32(m_reg, other.m_reg); - } - - SIMD_SSE2 operator-(const SIMD_SSE2& other) const - { - return SIMD_SSE2(_mm_sub_epi32(m_reg, other.m_reg)); - } - - void operator^=(const SIMD_SSE2& other) - { - m_reg = _mm_xor_si128(m_reg, other.m_reg); - } - - SIMD_SSE2 operator^(const SIMD_SSE2& other) const - { - return SIMD_SSE2(_mm_xor_si128(m_reg, other.m_reg)); - } - - void operator|=(const SIMD_SSE2& other) - { - m_reg = _mm_or_si128(m_reg, other.m_reg); - } - - SIMD_SSE2 operator&(const SIMD_SSE2& other) - { - return SIMD_SSE2(_mm_and_si128(m_reg, other.m_reg)); - } - - void operator&=(const SIMD_SSE2& other) - { - m_reg = _mm_and_si128(m_reg, other.m_reg); - } - - SIMD_SSE2 operator<<(size_t shift) const - { - return SIMD_SSE2(_mm_slli_epi32(m_reg, static_cast<int>(shift))); - } - - SIMD_SSE2 operator>>(size_t shift) const - { - return SIMD_SSE2(_mm_srli_epi32(m_reg, static_cast<int>(shift))); - } - - SIMD_SSE2 operator~() const - { - return SIMD_SSE2(_mm_xor_si128(m_reg, _mm_set1_epi32(0xFFFFFFFF))); - } - - // (~reg) & other - SIMD_SSE2 andc(const SIMD_SSE2& other) - { - return SIMD_SSE2(_mm_andnot_si128(m_reg, other.m_reg)); - } - - SIMD_SSE2 bswap() const - { - __m128i T = m_reg; - - T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); - T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1)); - - return SIMD_SSE2(_mm_or_si128(_mm_srli_epi16(T, 8), - _mm_slli_epi16(T, 8))); - } - - static void transpose(SIMD_SSE2& B0, SIMD_SSE2& B1, - SIMD_SSE2& B2, SIMD_SSE2& B3) - { - __m128i T0 = _mm_unpacklo_epi32(B0.m_reg, B1.m_reg); - __m128i T1 = _mm_unpacklo_epi32(B2.m_reg, B3.m_reg); - __m128i T2 = _mm_unpackhi_epi32(B0.m_reg, B1.m_reg); - __m128i T3 = _mm_unpackhi_epi32(B2.m_reg, B3.m_reg); - B0.m_reg = _mm_unpacklo_epi64(T0, T1); - B1.m_reg = _mm_unpackhi_epi64(T0, T1); - B2.m_reg = _mm_unpacklo_epi64(T2, T3); - B3.m_reg = _mm_unpackhi_epi64(T2, T3); - } - - private: - explicit SIMD_SSE2(__m128i in) { m_reg = in; } - - __m128i m_reg; - }; - -} - -#endif - -#endif |