diff options
Diffstat (limited to 'src/math/bigint')
27 files changed, 1 insertions, 3742 deletions
diff --git a/src/math/bigint/info.txt b/src/math/bigint/info.txt index 7892a6edf..6057c708e 100644 --- a/src/math/bigint/info.txt +++ b/src/math/bigint/info.txt @@ -5,13 +5,8 @@ define BIGINT <header:public> bigint.h divide.h -mp_types.h </header:public> -<header:internal> -mp_core.h -</header:internal> - <source> big_code.cpp big_io.cpp @@ -20,18 +15,11 @@ big_ops3.cpp big_rand.cpp bigint.cpp divide.cpp -mp_asm.cpp -mp_comba.cpp -mp_karat.cpp -mp_misc.cpp -mp_shift.cpp </source> <requires> alloc +mp hex -mp_amd64|mp_msvc64|mp_asm64|mp_ia32|mp_ia32_msvc|mp_generic -monty_generic -mulop_generic rng </requires> diff --git a/src/math/bigint/monty_generic/info.txt b/src/math/bigint/monty_generic/info.txt deleted file mode 100644 index cd05ccdc0..000000000 --- a/src/math/bigint/monty_generic/info.txt +++ /dev/null @@ -1,5 +0,0 @@ -load_on dep - -<source> -mp_monty.cpp -</source> diff --git a/src/math/bigint/monty_generic/mp_monty.cpp b/src/math/bigint/monty_generic/mp_monty.cpp deleted file mode 100644 index bce35259a..000000000 --- a/src/math/bigint/monty_generic/mp_monty.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* -* Montgomery Reduction -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_core.h> -#include <botan/internal/mp_asm.h> -#include <botan/internal/mp_asmi.h> -#include <botan/mem_ops.h> - -namespace Botan { - -extern "C" { - -/* -* Montgomery Reduction Algorithm -*/ -void bigint_monty_redc(word z[], u32bit z_size, - word ws[], - const word x[], u32bit x_size, - word u) - { - const u32bit blocks_of_8 = x_size - (x_size % 8); - - for(u32bit i = 0; i != x_size; ++i) - { - word* z_i = z + i; - - const word y = z_i[0] * u; - - /* - bigint_linmul3(ws, x, x_size, y); - bigint_add2(z_i, z_size - i, ws, x_size+1); - */ - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_madd3(z_i + j, x + j, y, carry); - - for(u32bit j = blocks_of_8; j != x_size; ++j) - z_i[j] = word_madd3(x[j], y, z_i[j], &carry); - - word z_sum = z_i[x_size] + carry; - carry = (z_sum < z_i[x_size]); - z_i[x_size] = z_sum; - - // Note: not constant time - for(u32bit j = x_size + 1; carry && j != z_size - i; ++j) - { - ++z_i[j]; - carry = !z_i[j]; - } - } - - word borrow = 0; - for(u32bit i = 0; i != x_size; ++i) - ws[i] = word_sub(z[x_size + i], x[i], &borrow); - - ws[x_size] = word_sub(z[x_size+x_size], 0, &borrow); - - copy_mem(ws + x_size + 1, z + x_size, x_size + 1); - - copy_mem(z, ws + borrow*(x_size+1), x_size + 1); - clear_mem(z + x_size + 1, z_size - x_size - 1); - } - -} - -} diff --git a/src/math/bigint/mp_amd64/info.txt b/src/math/bigint/mp_amd64/info.txt deleted file mode 100644 index 11cc380e2..000000000 --- a/src/math/bigint/mp_amd64/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 64 - -<header:internal> -mp_asm.h -mp_asmi.h -</header:internal> - -<arch> -amd64 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/math/bigint/mp_amd64/mp_asm.h b/src/math/bigint/mp_amd64/mp_asm.h deleted file mode 100644 index fa66d04f3..000000000 --- a/src/math/bigint/mp_amd64/mp_asm.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for amd64 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mulq %[b]") - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mulq %[b]") - - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - ASM("addq %[d],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -#undef ASM - -} - -} - -#endif diff --git a/src/math/bigint/mp_amd64/mp_asmi.h b/src/math/bigint/mp_amd64/mp_asmi.h deleted file mode 100644 index adf7774ef..000000000 --- a/src/math/bigint/mp_amd64/mp_asmi.h +++ /dev/null @@ -1,248 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_asm.h> - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for amd64 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ - ASM("movq %[carry], 8*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("addq 8*" #INDEX "(%[z]),%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorq %[carry]") \ - CORE_CODE \ - ASM("sbbq %[carry],%[carry]") \ - ASM("negq %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - - -#undef ASM -#undef DO_8_TIMES -#undef ADD_OR_SUBTRACT -#undef ADDSUB2_OP -#undef ADDSUB3_OP -#undef LINMUL_OP -#undef MULADD_OP - -} - -} -#endif diff --git a/src/math/bigint/mp_asm.cpp b/src/math/bigint/mp_asm.cpp deleted file mode 100644 index 4fcdee7a4..000000000 --- a/src/math/bigint/mp_asm.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_asm.h> -#include <botan/internal/mp_asmi.h> -#include <botan/internal/mp_core.h> -#include <botan/exceptn.h> -#include <botan/mem_ops.h> - -namespace Botan { - -extern "C" { - -/* -* Two Operand Addition, No Carry -*/ -word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size) - { - word carry = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_add2(x + i, y + i, carry); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_add(x[i], y[i], &carry); - - for(u32bit i = y_size; i != x_size; ++i) - x[i] = word_add(x[i], 0, &carry); - - return carry; - } - -/* -* Three Operand Addition, No Carry -*/ -word bigint_add3_nc(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - if(x_size < y_size) - { return bigint_add3_nc(z, y, y_size, x, x_size); } - - word carry = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_add3(z + i, x + i, y + i, carry); - - for(u32bit i = blocks; i != y_size; ++i) - z[i] = word_add(x[i], y[i], &carry); - - for(u32bit i = y_size; i != x_size; ++i) - z[i] = word_add(x[i], 0, &carry); - - return carry; - } - -/* -* Two Operand Addition -*/ -void bigint_add2(word x[], u32bit x_size, const word y[], u32bit y_size) - { - x[x_size] += bigint_add2_nc(x, x_size, y, y_size); - } - -/* -* Three Operand Addition -*/ -void bigint_add3(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - z[(x_size > y_size ? x_size : y_size)] += - bigint_add3_nc(z, x, x_size, y, y_size); - } - -/* -* Two Operand Subtraction -*/ -word bigint_sub2(word x[], u32bit x_size, const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub2(x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_sub(x[i], y[i], &borrow); - - for(u32bit i = y_size; i != x_size; ++i) - x[i] = word_sub(x[i], 0, &borrow); - - return borrow; - } - -/* -* Two Operand Subtraction x = y - x -*/ -void bigint_sub2_rev(word x[], const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub2_rev(x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_sub(y[i], x[i], &borrow); - - if(borrow) - throw Internal_Error("bigint_sub2_rev: x >= y"); - } - -/* -* Three Operand Subtraction -*/ -word bigint_sub3(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub3(z + i, x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - z[i] = word_sub(x[i], y[i], &borrow); - - for(u32bit i = y_size; i != x_size; ++i) - z[i] = word_sub(x[i], 0, &borrow); - - return borrow; - } - -/* -* Two Operand Linear Multiply -*/ -void bigint_linmul2(word x[], u32bit x_size, word y) - { - const u32bit blocks = x_size - (x_size % 8); - - word carry = 0; - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_linmul2(x + i, y, carry); - - for(u32bit i = blocks; i != x_size; ++i) - x[i] = word_madd2(x[i], y, &carry); - - x[x_size] = carry; - } - -/* -* Three Operand Linear Multiply -*/ -void bigint_linmul3(word z[], const word x[], u32bit x_size, word y) - { - const u32bit blocks = x_size - (x_size % 8); - - word carry = 0; - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_linmul3(z + i, x + i, y, carry); - - for(u32bit i = blocks; i != x_size; ++i) - z[i] = word_madd2(x[i], y, &carry); - - z[x_size] = carry; - } - -} - -} diff --git a/src/math/bigint/mp_asm64/info.txt b/src/math/bigint/mp_asm64/info.txt deleted file mode 100644 index fd0242a7a..000000000 --- a/src/math/bigint/mp_asm64/info.txt +++ /dev/null @@ -1,25 +0,0 @@ -mp_bits 64 - -load_on dep - -<header:internal> -mp_asm.h -mp_generic:mp_asmi.h -</header:internal> - -<arch> -#amd64 -alpha -ia64 -mips64 -ppc64 -sparc64 -</arch> - -# The inline asm only works with gcc, but it looks like (at least on -# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25% -# win, so it's probably worth using elsewhere. -<cc> -gcc -sunwspro -</cc> diff --git a/src/math/bigint/mp_asm64/mp_asm.h b/src/math/bigint/mp_asm64/mp_asm.h deleted file mode 100644 index b0906095d..000000000 --- a/src/math/bigint/mp_asm64/mp_asm.h +++ /dev/null @@ -1,122 +0,0 @@ -/* -* MPI Multiply-Add Core -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_MADD_H__ -#define BOTAN_MP_MADD_H__ - -#include <botan/mp_types.h> - -namespace Botan { - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -#if defined(BOTAN_TARGET_ARCH_IS_ALPHA) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_AMD64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("mulq %3" : "=d" (z0), "=a" (z1) : \ - "a" (a), "rm" (b) : "cc"); \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_IA64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_PPC64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - typedef unsigned int uint128_t __attribute__((mode(TI))); \ - uint128_t r = (uint128_t)a * b; \ - z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \ - z1 = (r ) & 0xFFFFFFFFFFFFFFFF; \ -} while(0); - -#else - -// Do a 64x64->128 multiply using four 64x64->64 multiplies -// plus some adds and shifts. Last resort for CPUs like UltraSPARC, -// with 64-bit registers/ALU, but no 64x64->128 multiply. -inline void bigint_2word_mul(word a, word b, word* z1, word* z0) - { - const u32bit MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2; - const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1; - - const word a_hi = (a >> MP_HWORD_BITS); - const word a_lo = (a & MP_HWORD_MASK); - const word b_hi = (b >> MP_HWORD_BITS); - const word b_lo = (b & MP_HWORD_MASK); - - word x0 = a_hi * b_hi; - word x1 = a_lo * b_hi; - word x2 = a_hi * b_lo; - word x3 = a_lo * b_lo; - - x2 += x3 >> (MP_HWORD_BITS); - x2 += x1; - if(x2 < x1) - x0 += ((word)1 << MP_HWORD_BITS); - - *z0 = x0 + (x2 >> MP_HWORD_BITS); - *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK); - } - -#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0) - -#endif - -/* -* Word Multiply/Add -*/ -inline word word_madd2(word a, word b, word* c) - { - word z0 = 0, z1 = 0; - - BOTAN_WORD_MUL(a, b, z1, z0); - - z1 += *c; if(z1 < *c) z0++; - - *c = z0; - return z1; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - word z0 = 0, z1 = 0; - - BOTAN_WORD_MUL(a, b, z1, z0); - - z1 += c; if(z1 < c) z0++; - z1 += *d; if(z1 < *d) z0++; - - *d = z0; - return z1; - } - -} - -#endif diff --git a/src/math/bigint/mp_comba.cpp b/src/math/bigint/mp_comba.cpp deleted file mode 100644 index 2770d3f0a..000000000 --- a/src/math/bigint/mp_comba.cpp +++ /dev/null @@ -1,920 +0,0 @@ -/* -* Comba Multiplication and Squaring -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_core.h> -#include <botan/internal/mp_asmi.h> - -namespace Botan { - -extern "C" { - -/* -* Comba 4x4 Squaring -*/ -void bigint_comba_sqr4(word z[8], const word x[4]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; - z[7] = w1; - } - -/* -* Comba 4x4 Multiplication -*/ -void bigint_comba_mul4(word z[8], const word x[4], const word y[4]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - z[6] = w0; - z[7] = w1; - } - -/* -* Comba 6x6 Squaring -*/ -void bigint_comba_sqr6(word z[12], const word x[6]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); - word3_muladd(&w2, &w1, &w0, x[4], x[4]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], x[5]); - z[10] = w0; - z[11] = w1; - } - -/* -* Comba 6x6 Multiplication -*/ -void bigint_comba_mul6(word z[12], const word x[6], const word y[6]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[4]); - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - word3_muladd(&w2, &w1, &w0, x[4], y[0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[5]); - word3_muladd(&w2, &w1, &w0, x[1], y[4]); - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - word3_muladd(&w2, &w1, &w0, x[4], y[1]); - word3_muladd(&w2, &w1, &w0, x[5], y[0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[5]); - word3_muladd(&w2, &w1, &w0, x[2], y[4]); - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - word3_muladd(&w2, &w1, &w0, x[4], y[2]); - word3_muladd(&w2, &w1, &w0, x[5], y[1]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[5]); - word3_muladd(&w2, &w1, &w0, x[3], y[4]); - word3_muladd(&w2, &w1, &w0, x[4], y[3]); - word3_muladd(&w2, &w1, &w0, x[5], y[2]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[5]); - word3_muladd(&w2, &w1, &w0, x[4], y[4]); - word3_muladd(&w2, &w1, &w0, x[5], y[3]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[4], y[5]); - word3_muladd(&w2, &w1, &w0, x[5], y[4]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], y[5]); - z[10] = w0; - z[11] = w1; - } - -/* -* Comba 8x8 Squaring -*/ -void bigint_comba_sqr8(word z[16], const word x[8]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); - word3_muladd(&w2, &w1, &w0, x[4], x[4]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[3], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[4], x[6]); - word3_muladd(&w2, &w1, &w0, x[5], x[5]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[4], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[5], x[6]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[5], x[7]); - word3_muladd(&w2, &w1, &w0, x[6], x[6]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[6], x[7]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[7], x[7]); - z[14] = w0; - z[15] = w1; - } - -/* -* Comba 8x8 Multiplication -*/ -void bigint_comba_mul8(word z[16], const word x[8], const word y[8]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[4]); - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - word3_muladd(&w2, &w1, &w0, x[4], y[0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[5]); - word3_muladd(&w2, &w1, &w0, x[1], y[4]); - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - word3_muladd(&w2, &w1, &w0, x[4], y[1]); - word3_muladd(&w2, &w1, &w0, x[5], y[0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[6]); - word3_muladd(&w2, &w1, &w0, x[1], y[5]); - word3_muladd(&w2, &w1, &w0, x[2], y[4]); - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - word3_muladd(&w2, &w1, &w0, x[4], y[2]); - word3_muladd(&w2, &w1, &w0, x[5], y[1]); - word3_muladd(&w2, &w1, &w0, x[6], y[0]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[7]); - word3_muladd(&w2, &w1, &w0, x[1], y[6]); - word3_muladd(&w2, &w1, &w0, x[2], y[5]); - word3_muladd(&w2, &w1, &w0, x[3], y[4]); - word3_muladd(&w2, &w1, &w0, x[4], y[3]); - word3_muladd(&w2, &w1, &w0, x[5], y[2]); - word3_muladd(&w2, &w1, &w0, x[6], y[1]); - word3_muladd(&w2, &w1, &w0, x[7], y[0]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[7]); - word3_muladd(&w2, &w1, &w0, x[2], y[6]); - word3_muladd(&w2, &w1, &w0, x[3], y[5]); - word3_muladd(&w2, &w1, &w0, x[4], y[4]); - word3_muladd(&w2, &w1, &w0, x[5], y[3]); - word3_muladd(&w2, &w1, &w0, x[6], y[2]); - word3_muladd(&w2, &w1, &w0, x[7], y[1]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[7]); - word3_muladd(&w2, &w1, &w0, x[3], y[6]); - word3_muladd(&w2, &w1, &w0, x[4], y[5]); - word3_muladd(&w2, &w1, &w0, x[5], y[4]); - word3_muladd(&w2, &w1, &w0, x[6], y[3]); - word3_muladd(&w2, &w1, &w0, x[7], y[2]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[7]); - word3_muladd(&w2, &w1, &w0, x[4], y[6]); - word3_muladd(&w2, &w1, &w0, x[5], y[5]); - word3_muladd(&w2, &w1, &w0, x[6], y[4]); - word3_muladd(&w2, &w1, &w0, x[7], y[3]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[4], y[7]); - word3_muladd(&w2, &w1, &w0, x[5], y[6]); - word3_muladd(&w2, &w1, &w0, x[6], y[5]); - word3_muladd(&w2, &w1, &w0, x[7], y[4]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], y[7]); - word3_muladd(&w2, &w1, &w0, x[6], y[6]); - word3_muladd(&w2, &w1, &w0, x[7], y[5]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[6], y[7]); - word3_muladd(&w2, &w1, &w0, x[7], y[6]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[7], y[7]); - z[14] = w0; - z[15] = w1; - } - -/* -* Comba 16x16 Squaring -*/ -void bigint_comba_sqr16(word z[32], const word x[16]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); - z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]); - z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]); - z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); - z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]); - z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]); - z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); - z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]); - z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]); - z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); - z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]); - z[14] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); - z[15] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]); - z[16] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]); - z[17] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); - word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]); - z[18] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]); - z[19] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]); - word3_muladd(&w2, &w1, &w0, x[10], x[10]); - z[20] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); - z[21] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[12]); - word3_muladd(&w2, &w1, &w0, x[11], x[11]); - z[22] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[12]); - z[23] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); - word3_muladd(&w2, &w1, &w0, x[12], x[12]); - z[24] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[10], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[12], x[13]); - z[25] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[11], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[12], x[14]); - word3_muladd(&w2, &w1, &w0, x[13], x[13]); - z[26] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); - z[27] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[13], x[15]); - word3_muladd(&w2, &w1, &w0, x[14], x[14]); - z[28] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[14], x[15]); - z[29] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[15], x[15]); - z[30] = w0; - z[31] = w1; - } - -/* -* Comba 16x16 Multiplication -*/ -void bigint_comba_mul16(word z[32], const word x[16], const word y[16]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 0]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 0]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 0]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 0]); - z[14] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); - z[15] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 1], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 1]); - z[16] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 2], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 2]); - z[17] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); - z[18] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 4], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[10]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 4]); - z[19] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 5], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[11]); - word3_muladd(&w2, &w1, &w0, x[10], y[10]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 5]); - z[20] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); - word3_muladd(&w2, &w1, &w0, x[10], y[11]); - word3_muladd(&w2, &w1, &w0, x[11], y[10]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); - z[21] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 7], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[13]); - word3_muladd(&w2, &w1, &w0, x[10], y[12]); - word3_muladd(&w2, &w1, &w0, x[11], y[11]); - word3_muladd(&w2, &w1, &w0, x[12], y[10]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 7]); - z[22] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 8], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[14]); - word3_muladd(&w2, &w1, &w0, x[10], y[13]); - word3_muladd(&w2, &w1, &w0, x[11], y[12]); - word3_muladd(&w2, &w1, &w0, x[12], y[11]); - word3_muladd(&w2, &w1, &w0, x[13], y[10]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 8]); - z[23] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); - word3_muladd(&w2, &w1, &w0, x[10], y[14]); - word3_muladd(&w2, &w1, &w0, x[11], y[13]); - word3_muladd(&w2, &w1, &w0, x[12], y[12]); - word3_muladd(&w2, &w1, &w0, x[13], y[11]); - word3_muladd(&w2, &w1, &w0, x[14], y[10]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); - z[24] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[10], y[15]); - word3_muladd(&w2, &w1, &w0, x[11], y[14]); - word3_muladd(&w2, &w1, &w0, x[12], y[13]); - word3_muladd(&w2, &w1, &w0, x[13], y[12]); - word3_muladd(&w2, &w1, &w0, x[14], y[11]); - word3_muladd(&w2, &w1, &w0, x[15], y[10]); - z[25] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[11], y[15]); - word3_muladd(&w2, &w1, &w0, x[12], y[14]); - word3_muladd(&w2, &w1, &w0, x[13], y[13]); - word3_muladd(&w2, &w1, &w0, x[14], y[12]); - word3_muladd(&w2, &w1, &w0, x[15], y[11]); - z[26] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[12], y[15]); - word3_muladd(&w2, &w1, &w0, x[13], y[14]); - word3_muladd(&w2, &w1, &w0, x[14], y[13]); - word3_muladd(&w2, &w1, &w0, x[15], y[12]); - z[27] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[13], y[15]); - word3_muladd(&w2, &w1, &w0, x[14], y[14]); - word3_muladd(&w2, &w1, &w0, x[15], y[13]); - z[28] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[14], y[15]); - word3_muladd(&w2, &w1, &w0, x[15], y[14]); - z[29] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[15], y[15]); - z[30] = w0; - z[31] = w1; - } - -} - -} diff --git a/src/math/bigint/mp_core.h b/src/math/bigint/mp_core.h deleted file mode 100644 index 63082795f..000000000 --- a/src/math/bigint/mp_core.h +++ /dev/null @@ -1,144 +0,0 @@ -/* -* MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_CORE_H__ -#define BOTAN_MP_CORE_H__ - -#include <botan/mp_types.h> - -namespace Botan { - -/* -* The size of the word type, in bits -*/ -const u32bit MP_WORD_BITS = BOTAN_MP_WORD_BITS; - -extern "C" { - -/* -* Addition/Subtraction Operations -*/ -void bigint_add2(word x[], u32bit x_size, - const word y[], u32bit y_size); - -void bigint_add3(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size); - -word bigint_add3_nc(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -word bigint_sub2(word x[], u32bit x_size, - const word y[], u32bit y_size); - -/** -* x = y - x; assumes y >= x -*/ -void bigint_sub2_rev(word x[], const word y[], u32bit y_size); - -word bigint_sub3(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -/* -* Shift Operations -*/ -void bigint_shl1(word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shr1(word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shl2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shr2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -/* -* Simple O(N^2) Multiplication and Squaring -*/ -void bigint_simple_mul(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -void bigint_simple_sqr(word z[], const word x[], u32bit x_size); - -/* -* Linear Multiply -*/ -void bigint_linmul2(word x[], u32bit x_size, word y); -void bigint_linmul3(word z[], const word x[], u32bit x_size, word y); - -/* -* Montgomery Reduction -* @param z integer to reduce (also output in first x_size+1 words) -* @param z_size size of z (should be >= 2*x_size+1) -* @param workspace array of at least 2*(x_size+1) words -* @param x modulus -* @param x_size size of x -* @param u Montgomery value -*/ -void bigint_monty_redc(word z[], u32bit z_size, - word workspace[], - const word x[], u32bit x_size, - word u); - -/* -* Division operation -*/ -u32bit bigint_divcore(word q, word y2, word y1, - word x3, word x2, word x1); - -/** -* Compare x and y -*/ -s32bit bigint_cmp(const word x[], u32bit x_size, - const word y[], u32bit y_size); - -/** -* Compute ((n1<<bits) + n0) / d -*/ -word bigint_divop(word n1, word n0, word d); - -/** -* Compute ((n1<<bits) + n0) % d -*/ -word bigint_modop(word n1, word n0, word d); - -/* -* Comba Multiplication / Squaring -*/ -void bigint_comba_mul4(word z[8], const word x[4], const word y[4]); -void bigint_comba_mul6(word z[12], const word x[6], const word y[6]); -void bigint_comba_mul8(word z[16], const word x[8], const word y[8]); -void bigint_comba_mul16(word z[32], const word x[16], const word y[16]); - -void bigint_comba_sqr4(word out[8], const word in[4]); -void bigint_comba_sqr6(word out[12], const word in[6]); -void bigint_comba_sqr8(word out[16], const word in[8]); -void bigint_comba_sqr8(word out[32], const word in[16]); -void bigint_comba_sqr16(word out[64], const word in[32]); - -} - -/* -* High Level Multiplication/Squaring Interfaces -*/ -void bigint_mul(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw, - const word y[], u32bit y_size, u32bit y_sw); - -void bigint_sqr(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw); - -} - -#endif diff --git a/src/math/bigint/mp_generic/info.txt b/src/math/bigint/mp_generic/info.txt deleted file mode 100644 index ab4d7406a..000000000 --- a/src/math/bigint/mp_generic/info.txt +++ /dev/null @@ -1,6 +0,0 @@ -load_on dep - -<header:internal> -mp_asm.h -mp_asmi.h -</header:internal> diff --git a/src/math/bigint/mp_generic/mp_asm.h b/src/math/bigint/mp_generic/mp_asm.h deleted file mode 100644 index 7c18343ef..000000000 --- a/src/math/bigint/mp_generic/mp_asm.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS == 8) - typedef Botan::u16bit dword; -#elif (BOTAN_MP_WORD_BITS == 16) - typedef Botan::u32bit dword; -#elif (BOTAN_MP_WORD_BITS == 32) - typedef Botan::u64bit dword; -#elif (BOTAN_MP_WORD_BITS == 64) - #error BOTAN_MP_WORD_BITS can be 64 only with assembly support -#else - #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 -#endif - -namespace Botan { - -extern "C" { - -/* -* Word Multiply/Add -*/ -inline word word_madd2(word a, word b, word* c) - { - dword z = (dword)a * b + *c; - *c = (word)(z >> BOTAN_MP_WORD_BITS); - return (word)z; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - dword z = (dword)a * b + c + *d; - *d = (word)(z >> BOTAN_MP_WORD_BITS); - return (word)z; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_generic/mp_asmi.h b/src/math/bigint/mp_generic/mp_asmi.h deleted file mode 100644 index 8225f372d..000000000 --- a/src/math/bigint/mp_generic/mp_asmi.h +++ /dev/null @@ -1,207 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_asm.h> - -namespace Botan { - -extern "C" { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - x[0] = word_add(x[0], y[0], &carry); - x[1] = word_add(x[1], y[1], &carry); - x[2] = word_add(x[2], y[2], &carry); - x[3] = word_add(x[3], y[3], &carry); - x[4] = word_add(x[4], y[4], &carry); - x[5] = word_add(x[5], y[5], &carry); - x[6] = word_add(x[6], y[6], &carry); - x[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_add(x[0], y[0], &carry); - z[1] = word_add(x[1], y[1], &carry); - z[2] = word_add(x[2], y[2], &carry); - z[3] = word_add(x[3], y[3], &carry); - z[4] = word_add(x[4], y[4], &carry); - z[5] = word_add(x[5], y[5], &carry); - z[6] = word_add(x[6], y[6], &carry); - z[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - x[0] = word_sub(x[0], y[0], &carry); - x[1] = word_sub(x[1], y[1], &carry); - x[2] = word_sub(x[2], y[2], &carry); - x[3] = word_sub(x[3], y[3], &carry); - x[4] = word_sub(x[4], y[4], &carry); - x[5] = word_sub(x[5], y[5], &carry); - x[6] = word_sub(x[6], y[6], &carry); - x[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_sub(x[0], y[0], &carry); - z[1] = word_sub(x[1], y[1], &carry); - z[2] = word_sub(x[2], y[2], &carry); - z[3] = word_sub(x[3], y[3], &carry); - z[4] = word_sub(x[4], y[4], &carry); - z[5] = word_sub(x[5], y[5], &carry); - z[6] = word_sub(x[6], y[6], &carry); - z[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - x[0] = word_madd2(x[0], y, &carry); - x[1] = word_madd2(x[1], y, &carry); - x[2] = word_madd2(x[2], y, &carry); - x[3] = word_madd2(x[3], y, &carry); - x[4] = word_madd2(x[4], y, &carry); - x[5] = word_madd2(x[5], y, &carry); - x[6] = word_madd2(x[6], y, &carry); - x[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd2(x[0], y, &carry); - z[1] = word_madd2(x[1], y, &carry); - z[2] = word_madd2(x[2], y, &carry); - z[3] = word_madd2(x[3], y, &carry); - z[4] = word_madd2(x[4], y, &carry); - z[5] = word_madd2(x[5], y, &carry); - z[6] = word_madd2(x[6], y, &carry); - z[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32/info.txt b/src/math/bigint/mp_ia32/info.txt deleted file mode 100644 index 1659f74cf..000000000 --- a/src/math/bigint/mp_ia32/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 32 - -<header:internal> -mp_asm.h -mp_asmi.h -</header:internal> - -<arch> -ia32 -</arch> - -<cc> -clang -gcc -icc -</cc> diff --git a/src/math/bigint/mp_ia32/mp_asm.h b/src/math/bigint/mp_ia32/mp_asm.h deleted file mode 100644 index 4d3afc992..000000000 --- a/src/math/bigint/mp_ia32/mp_asm.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include <botan/mp_types.h> - -#if (BOTAN_MP_WORD_BITS != 32) - #error The mp_ia32 module requires that BOTAN_MP_WORD_BITS == 32 -#endif - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for x86 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mull %[b]") - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mull %[b]") - - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - ASM("addl %[d],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32/mp_asmi.h b/src/math/bigint/mp_ia32/mp_asmi.h deleted file mode 100644 index c7b679e80..000000000 --- a/src/math/bigint/mp_ia32/mp_asmi.h +++ /dev/null @@ -1,240 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_asm.h> - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for x86 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ - ASM("movl %[carry], 4*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("addl 4*" #INDEX "(%[z]),%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorl %[carry]") \ - CORE_CODE \ - ASM("sbbl %[carry],%[carry]") \ - ASM("negl %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32_msvc/info.txt b/src/math/bigint/mp_ia32_msvc/info.txt deleted file mode 100644 index 55a42c310..000000000 --- a/src/math/bigint/mp_ia32_msvc/info.txt +++ /dev/null @@ -1,16 +0,0 @@ -mp_bits 32 - -load_on dep - -<header:internal> -mp_generic:mp_asm.h -mp_asmi.h -</header:internal> - -<arch> -ia32 -</arch> - -<cc> -msvc -</cc> diff --git a/src/math/bigint/mp_ia32_msvc/mp_asmi.h b/src/math/bigint/mp_ia32_msvc/mp_asmi.h deleted file mode 100644 index aee457d65..000000000 --- a/src/math/bigint/mp_ia32_msvc/mp_asmi.h +++ /dev/null @@ -1,542 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include <botan/internal/mp_asm.h> - -namespace Botan { - -extern "C" { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - __asm { - mov edx,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[esi] - adc [edx],eax - mov eax,[esi+4] - adc [edx+4],eax - mov eax,[esi+8] - adc [edx+8],eax - mov eax,[esi+12] - adc [edx+12],eax - mov eax,[esi+16] - adc [edx+16],eax - mov eax,[esi+20] - adc [edx+20],eax - mov eax,[esi+24] - adc [edx+24],eax - mov eax,[esi+28] - adc [edx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - mov ebx,[z] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - adc eax,[esi] - mov [ebx],eax - - mov eax,[edi+4] - adc eax,[esi+4] - mov [ebx+4],eax - - mov eax,[edi+8] - adc eax,[esi+8] - mov [ebx+8],eax - - mov eax,[edi+12] - adc eax,[esi+12] - mov [ebx+12],eax - - mov eax,[edi+16] - adc eax,[esi+16] - mov [ebx+16],eax - - mov eax,[edi+20] - adc eax,[esi+20] - mov [ebx+20],eax - - mov eax,[edi+24] - adc eax,[esi+24] - mov [ebx+24],eax - - mov eax,[edi+28] - adc eax,[esi+28] - mov [ebx+28],eax - - sbb eax,eax - neg eax - } - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - sbb eax,[esi] - mov [edi],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [edi+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [edi+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [edi+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [edi+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [edi+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [edi+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [edi+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov ebx,[z] - mov eax,[edi] - sbb eax,[esi] - mov [ebx],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [ebx+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [ebx+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [ebx+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [ebx+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [ebx+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [ebx+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [ebx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - __asm { - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [esi+28],eax //load a - - mov eax,edx //store carry - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_muladd(word z[8], const word x[8], - word y, word carry) - { - __asm { - mov esi,[x] - mov ebx,[y] - mov edi,[z] - mov eax,[esi] //load a - mul ebx //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - add eax,[edi] //sum lo z - adc edx,0 //sum hi z - mov ecx,edx //carry for next block = hi z - mov [edi],eax //save lo z - - mov eax,[esi+4] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+4] - adc edx,0 - mov ecx,edx - mov [edi+4],eax - - mov eax,[esi+8] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+8] - adc edx,0 - mov ecx,edx - mov [edi+8],eax - - mov eax,[esi+12] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+12] - adc edx,0 - mov ecx,edx - mov [edi+12],eax - - mov eax,[esi+16] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+16] - adc edx,0 - mov ecx,edx - mov [edi+16],eax - - mov eax,[esi+20] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+20] - adc edx,0 - mov ecx,edx - mov [edi+20],eax - - mov eax,[esi+24] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+24] - adc edx,0 - mov ecx,edx - mov [edi+24],eax - - mov eax,[esi+28] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+28] - adc edx,0 - mov [edi+28],eax - mov eax,edx - } - } - -inline word word8_linmul3(word z[4], const word x[4], word y, word carry) - { - __asm { -#if 0 - //it's slower!!! - mov edx,[z] - mov eax,[x] - movd mm7,[y] - - movd mm0,[eax] - movd mm1,[eax+4] - movd mm2,[eax+8] - pmuludq mm0,mm7 - pmuludq mm1,mm7 - pmuludq mm2,mm7 - - movd mm6,[carry] - paddq mm0,mm6 - movd [edx],mm0 - - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+4],mm1 - - movd mm3,[eax+12] - psrlq mm1,32 - paddq mm2,mm1 - movd [edx+8],mm2 - - pmuludq mm3,mm7 - movd mm4,[eax+16] - psrlq mm2,32 - paddq mm3,mm2 - movd [edx+12],mm3 - - pmuludq mm4,mm7 - movd mm5,[eax+20] - psrlq mm3,32 - paddq mm4,mm3 - movd [edx+16],mm4 - - pmuludq mm5,mm7 - movd mm0,[eax+24] - psrlq mm4,32 - paddq mm5,mm4 - movd [edx+20],mm5 - - pmuludq mm0,mm7 - movd mm1,[eax+28] - psrlq mm5,32 - paddq mm0,mm5 - movd [edx+24],mm0 - - pmuludq mm1,mm7 - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+28],mm1 - psrlq mm1,32 - - movd eax,mm1 - emms -#else - mov edi,[z] - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [edi+28],eax //load a - mov eax,edx //store carry -#endif - } - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_karat.cpp b/src/math/bigint/mp_karat.cpp deleted file mode 100644 index 8ae346f1e..000000000 --- a/src/math/bigint/mp_karat.cpp +++ /dev/null @@ -1,340 +0,0 @@ -/* -* Karatsuba Multiplication/Squaring -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> -#include <botan/internal/mp_asmi.h> - -namespace Botan { - -namespace { - -/* -* Karatsuba Multiplication Operation -*/ -void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, - word workspace[]) - { - if(N == 6) - bigint_comba_mul6(z, x, y); - else if(N == 8) - bigint_comba_mul8(z, x, y); - else if(N == 16) - bigint_comba_mul16(z, x, y); - else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) - bigint_simple_mul(z, x, N, y, N); - else - { - const u32bit N2 = N / 2; - - const word* x0 = x; - const word* x1 = x + N2; - const word* y0 = y; - const word* y1 = y + N2; - word* z0 = z; - word* z1 = z + N; - - const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2); - const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2); - - clear_mem(workspace, 2*N); - - if(cmp0 && cmp1) - { - if(cmp0 > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); - - if(cmp1 > 0) - bigint_sub3(z1, y1, N2, y0, N2); - else - bigint_sub3(z1, y0, N2, y1, N2); - - karatsuba_mul(workspace, z0, z1, N2, workspace+N); - } - - karatsuba_mul(z0, x0, y0, N2, workspace+N); - karatsuba_mul(z1, x1, y1, N2, workspace+N); - - const u32bit blocks_of_8 = N - (N % 8); - - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); - - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); - - word carry2 = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); - - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); - - z[N + N2] = word_add(z[N + N2], carry2, &carry); - - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; - - if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0)) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } - } - -/* -* Karatsuba Squaring Operation -*/ -void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[]) - { - if(N == 6) - bigint_comba_sqr6(z, x); - else if(N == 8) - bigint_comba_sqr8(z, x); - else if(N == 16) - bigint_comba_sqr16(z, x); - else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) - bigint_simple_sqr(z, x, N); - else - { - const u32bit N2 = N / 2; - - const word* x0 = x; - const word* x1 = x + N2; - word* z0 = z; - word* z1 = z + N; - - const s32bit cmp = bigint_cmp(x0, N2, x1, N2); - - clear_mem(workspace, 2*N); - - if(cmp) - { - if(cmp > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); - - karatsuba_sqr(workspace, z0, N2, workspace+N); - } - - karatsuba_sqr(z0, x0, N2, workspace+N); - karatsuba_sqr(z1, x1, N2, workspace+N); - - const u32bit blocks_of_8 = N - (N % 8); - - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); - - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); - - word carry2 = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); - - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); - - z[N + N2] = word_add(z[N + N2], carry2, &carry); - - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; - - if(cmp == 0) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } - } - -/* -* Pick a good size for the Karatsuba multiply -*/ -u32bit karatsuba_size(u32bit z_size, - u32bit x_size, u32bit x_sw, - u32bit y_size, u32bit y_sw) - { - if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) - return 0; - - if(((x_size == x_sw) && (x_size % 2)) || - ((y_size == y_sw) && (y_size % 2))) - return 0; - - const u32bit start = (x_sw > y_sw) ? x_sw : y_sw; - const u32bit end = (x_size < y_size) ? x_size : y_size; - - if(start == end) - { - if(start % 2) - return 0; - return start; - } - - for(u32bit j = start; j <= end; ++j) - { - if(j % 2) - continue; - - if(2*j > z_size) - return 0; - - if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) - { - if(j % 4 == 2 && - (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size) - return j+2; - return j; - } - } - - return 0; - } - -/* -* Pick a good size for the Karatsuba squaring -*/ -u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw) - { - if(x_sw == x_size) - { - if(x_sw % 2) - return 0; - return x_sw; - } - - for(u32bit j = x_sw; j <= x_size; ++j) - { - if(j % 2) - continue; - - if(2*j > z_size) - return 0; - - if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size) - return j+2; - return j; - } - - return 0; - } - -} - -/* -* Multiplication Algorithm Dispatcher -*/ -void bigint_mul(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw, - const word y[], u32bit y_size, u32bit y_sw) - { - if(x_sw == 1) - { - bigint_linmul3(z, y, y_sw, x[0]); - } - else if(y_sw == 1) - { - bigint_linmul3(z, x, x_sw, y[0]); - } - else if(x_sw <= 4 && x_size >= 4 && - y_sw <= 4 && y_size >= 4 && z_size >= 8) - { - bigint_comba_mul4(z, x, y); - } - else if(x_sw <= 6 && x_size >= 6 && - y_sw <= 6 && y_size >= 6 && z_size >= 12) - { - bigint_comba_mul6(z, x, y); - } - else if(x_sw <= 8 && x_size >= 8 && - y_sw <= 8 && y_size >= 8 && z_size >= 16) - { - bigint_comba_mul8(z, x, y); - } - else if(x_sw <= 16 && x_size >= 16 && - y_sw <= 16 && y_size >= 16 && z_size >= 32) - { - bigint_comba_mul16(z, x, y); - } - else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || - y_sw < BOTAN_KARAT_MUL_THRESHOLD || - !workspace) - { - bigint_simple_mul(z, x, x_sw, y, y_sw); - } - else - { - const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); - - if(N) - { - clear_mem(workspace, 2*N); - karatsuba_mul(z, x, y, N, workspace); - } - else - bigint_simple_mul(z, x, x_sw, y, y_sw); - } - } - -/* -* Squaring Algorithm Dispatcher -*/ -void bigint_sqr(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw) - { - if(x_sw == 1) - { - bigint_linmul3(z, x, x_sw, x[0]); - } - else if(x_sw <= 4 && x_size >= 4 && z_size >= 8) - { - bigint_comba_sqr4(z, x); - } - else if(x_sw <= 6 && x_size >= 6 && z_size >= 12) - { - bigint_comba_sqr6(z, x); - } - else if(x_sw <= 8 && x_size >= 8 && z_size >= 16) - { - bigint_comba_sqr8(z, x); - } - else if(x_sw <= 16 && x_size >= 16 && z_size >= 32) - { - bigint_comba_sqr16(z, x); - } - else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace) - { - bigint_simple_sqr(z, x, x_sw); - } - else - { - const u32bit N = karatsuba_size(z_size, x_size, x_sw); - - if(N) - { - clear_mem(workspace, 2*N); - karatsuba_sqr(z, x, N, workspace); - } - else - bigint_simple_sqr(z, x, x_sw); - } - } - -} diff --git a/src/math/bigint/mp_misc.cpp b/src/math/bigint/mp_misc.cpp deleted file mode 100644 index 77b8e6f51..000000000 --- a/src/math/bigint/mp_misc.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* -* MP Misc Functions -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_core.h> -#include <botan/internal/mp_asm.h> - -namespace Botan { - -extern "C" { - -/* -* Core Division Operation -*/ -u32bit bigint_divcore(word q, word y2, word y1, - word x3, word x2, word x1) - { - // Compute (y2,y1) * q - - word y3 = 0; - y1 = word_madd2(q, y1, &y3); - y2 = word_madd2(q, y2, &y3); - - // Return (y3,y2,y1) >? (x3,x2,x1) - - if(y3 > x3) return 1; - if(y3 < x3) return 0; - if(y2 > x2) return 1; - if(y2 < x2) return 0; - if(y1 > x1) return 1; - if(y1 < x1) return 0; - return 0; - } - -/* -* Compare two MP integers -*/ -s32bit bigint_cmp(const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } - - while(x_size > y_size) - { - if(x[x_size-1]) - return 1; - x_size--; - } - - for(u32bit j = x_size; j > 0; --j) - { - if(x[j-1] > y[j-1]) - return 1; - if(x[j-1] < y[j-1]) - return -1; - } - - return 0; - } - -/* -* Do a 2-word/1-word Division -*/ -word bigint_divop(word n1, word n0, word d) - { - word high = n1 % d, quotient = 0; - - for(u32bit j = 0; j != MP_WORD_BITS; ++j) - { - word high_top_bit = (high & MP_WORD_TOP_BIT); - - high <<= 1; - high |= (n0 >> (MP_WORD_BITS-1-j)) & 1; - quotient <<= 1; - - if(high_top_bit || high >= d) - { - high -= d; - quotient |= 1; - } - } - - return quotient; - } - -/* -* Do a 2-word/1-word Modulo -*/ -word bigint_modop(word n1, word n0, word d) - { - word z = bigint_divop(n1, n0, d); - word dummy = 0; - z = word_madd2(z, d, &dummy); - return (n0-z); - } - -} - -} diff --git a/src/math/bigint/mp_msvc64/info.txt b/src/math/bigint/mp_msvc64/info.txt deleted file mode 100644 index 56ae05927..000000000 --- a/src/math/bigint/mp_msvc64/info.txt +++ /dev/null @@ -1,17 +0,0 @@ -load_on dep - -mp_bits 64 - -<header:internal> -mp_asm.h -mp_generic:mp_asmi.h -</header:internal> - -<arch> -amd64 -ia64 -</arch> - -<cc> -msvc -</cc> diff --git a/src/math/bigint/mp_msvc64/mp_asm.h b/src/math/bigint/mp_msvc64/mp_asm.h deleted file mode 100644 index 8e4535c35..000000000 --- a/src/math/bigint/mp_msvc64/mp_asm.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -* Multiply-Add for 64-bit MSVC -* (C) 2010 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include <botan/mp_types.h> -#include <intrin.h> - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -#pragma intrinsic(_umul128) - -namespace Botan { - -extern "C" { - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - word hi, lo; - lo = _umul128(a, b, &hi); - - lo += *c; - hi += (lo < *c); // carry? - - *c = hi; - return lo; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - word hi, lo; - lo = _umul128(a, b, &hi); - - lo += c; - hi += (lo < c); // carry? - - lo += *d; - hi += (lo < *d); // carry? - - *d = hi; - return lo; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_shift.cpp b/src/math/bigint/mp_shift.cpp deleted file mode 100644 index f1d609bfb..000000000 --- a/src/math/bigint/mp_shift.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* -* MP Shift Algorithms -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> - -namespace Botan { - -extern "C" { - -/* -* Single Operand Left Shift -*/ -void bigint_shl1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) - { - if(word_shift) - { - for(u32bit j = 1; j != x_size + 1; ++j) - x[(x_size - j) + word_shift] = x[x_size - j]; - clear_mem(x, word_shift); - } - - if(bit_shift) - { - word carry = 0; - for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) - { - word temp = x[j]; - x[j] = (temp << bit_shift) | carry; - carry = (temp >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Single Operand Right Shift -*/ -void bigint_shr1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) - { - if(x_size < word_shift) - { - clear_mem(x, x_size); - return; - } - - if(word_shift) - { - copy_mem(x, x + word_shift, x_size - word_shift); - clear_mem(x + x_size - word_shift, word_shift); - } - - if(bit_shift) - { - word carry = 0; - - u32bit top = x_size - word_shift; - - while(top >= 4) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-2]; - x[top-2] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-3]; - x[top-3] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-4]; - x[top-4] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top -= 4; - } - - while(top) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top--; - } - } - } - -/* -* Two Operand Left Shift -*/ -void bigint_shl2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift) - { - for(u32bit j = 0; j != x_size; ++j) - y[j + word_shift] = x[j]; - if(bit_shift) - { - word carry = 0; - for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) - { - word w = y[j]; - y[j] = (w << bit_shift) | carry; - carry = (w >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Two Operand Right Shift -*/ -void bigint_shr2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift) - { - if(x_size < word_shift) return; - - for(u32bit j = 0; j != x_size - word_shift; ++j) - y[j] = x[j + word_shift]; - if(bit_shift) - { - word carry = 0; - for(u32bit j = x_size - word_shift; j > 0; --j) - { - word w = y[j-1]; - y[j-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - } - } - } - -} - -} diff --git a/src/math/bigint/mp_types.h b/src/math/bigint/mp_types.h deleted file mode 100644 index 1648713ed..000000000 --- a/src/math/bigint/mp_types.h +++ /dev/null @@ -1,33 +0,0 @@ -/* -* Low Level MPI Types -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MPI_TYPES_H__ -#define BOTAN_MPI_TYPES_H__ - -#include <botan/types.h> - -namespace Botan { - -#if (BOTAN_MP_WORD_BITS == 8) - typedef byte word; -#elif (BOTAN_MP_WORD_BITS == 16) - typedef u16bit word; -#elif (BOTAN_MP_WORD_BITS == 32) - typedef u32bit word; -#elif (BOTAN_MP_WORD_BITS == 64) - typedef u64bit word; -#else - #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 -#endif - -const word MP_WORD_MASK = ~static_cast<word>(0); -const word MP_WORD_TOP_BIT = static_cast<word>(1) << (8*sizeof(word) - 1); -const word MP_WORD_MAX = MP_WORD_MASK; - -} - -#endif diff --git a/src/math/bigint/mulop_generic/info.txt b/src/math/bigint/mulop_generic/info.txt deleted file mode 100644 index 548d0f44b..000000000 --- a/src/math/bigint/mulop_generic/info.txt +++ /dev/null @@ -1,5 +0,0 @@ -load_on dep - -<source> -mp_mulop.cpp -</source> diff --git a/src/math/bigint/mulop_generic/mp_mulop.cpp b/src/math/bigint/mulop_generic/mp_mulop.cpp deleted file mode 100644 index 33ee2af32..000000000 --- a/src/math/bigint/mulop_generic/mp_mulop.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -* Simple O(N^2) Multiplication and Squaring -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include <botan/internal/mp_asm.h> -#include <botan/internal/mp_asmi.h> -#include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> - -namespace Botan { - -extern "C" { - -/* -* Simple O(N^2) Multiplication -*/ -void bigint_simple_mul(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - const u32bit x_size_8 = x_size - (x_size % 8); - - clear_mem(z, x_size + y_size); - - for(u32bit i = 0; i != y_size; ++i) - { - const word y_i = y[i]; - - word carry = 0; - - for(u32bit j = 0; j != x_size_8; j += 8) - carry = word8_madd3(z + i + j, x + j, y_i, carry); - - for(u32bit j = x_size_8; j != x_size; ++j) - z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry); - - z[x_size+i] = carry; - } - } - -/* -* Simple O(N^2) Squaring - -This is exactly the same algorithm as bigint_simple_mul, -however because C/C++ compilers suck at alias analysis it -is good to have the version where the compiler knows -that x == y - -There is an O(n^1.5) squaring algorithm specified in Handbook of -Applied Cryptography, chapter 14 -*/ -void bigint_simple_sqr(word z[], const word x[], u32bit x_size) - { - const u32bit x_size_8 = x_size - (x_size % 8); - - clear_mem(z, 2*x_size); - - for(u32bit i = 0; i != x_size; ++i) - { - const word x_i = x[i]; - word carry = 0; - - for(u32bit j = 0; j != x_size_8; j += 8) - carry = word8_madd3(z + i + j, x + j, x_i, carry); - - for(u32bit j = x_size_8; j != x_size; ++j) - z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); - - z[x_size+i] = carry; - } - } - -} - -} |