From 3d0ac39eab74c6f74fe41eda9e5f057d1b396f10 Mon Sep 17 00:00:00 2001 From: lloyd Date: Fri, 24 Sep 2010 21:58:47 +0000 Subject: Move the core MPI functions to src/math/mp, leaving src/math/bigint just for the implementation of the BigInt class --- src/math/bigint/info.txt | 14 +- src/math/bigint/monty_generic/info.txt | 5 - src/math/bigint/monty_generic/mp_monty.cpp | 72 --- src/math/bigint/mp_amd64/info.txt | 18 - src/math/bigint/mp_amd64/mp_asm.h | 69 --- src/math/bigint/mp_amd64/mp_asmi.h | 248 -------- src/math/bigint/mp_asm.cpp | 183 ------ src/math/bigint/mp_asm64/info.txt | 25 - src/math/bigint/mp_asm64/mp_asm.h | 122 ---- src/math/bigint/mp_comba.cpp | 920 ----------------------------- src/math/bigint/mp_core.h | 144 ----- src/math/bigint/mp_generic/info.txt | 6 - src/math/bigint/mp_generic/mp_asm.h | 54 -- src/math/bigint/mp_generic/mp_asmi.h | 207 ------- src/math/bigint/mp_ia32/info.txt | 18 - src/math/bigint/mp_ia32/mp_asm.h | 67 --- src/math/bigint/mp_ia32/mp_asmi.h | 240 -------- src/math/bigint/mp_ia32_msvc/info.txt | 16 - src/math/bigint/mp_ia32_msvc/mp_asmi.h | 542 ----------------- src/math/bigint/mp_karat.cpp | 340 ----------- src/math/bigint/mp_misc.cpp | 102 ---- src/math/bigint/mp_msvc64/info.txt | 17 - src/math/bigint/mp_msvc64/mp_asm.h | 61 -- src/math/bigint/mp_shift.cpp | 138 ----- src/math/bigint/mp_types.h | 33 -- src/math/bigint/mulop_generic/info.txt | 5 - src/math/bigint/mulop_generic/mp_mulop.cpp | 77 --- src/math/mp/info.txt | 23 + src/math/mp/monty_generic/info.txt | 5 + src/math/mp/monty_generic/mp_monty.cpp | 72 +++ src/math/mp/mp_amd64/info.txt | 18 + src/math/mp/mp_amd64/mp_asm.h | 69 +++ src/math/mp/mp_amd64/mp_asmi.h | 248 ++++++++ src/math/mp/mp_asm.cpp | 183 ++++++ src/math/mp/mp_asm64/info.txt | 25 + src/math/mp/mp_asm64/mp_asm.h | 122 ++++ src/math/mp/mp_comba.cpp | 920 +++++++++++++++++++++++++++++ src/math/mp/mp_core.h | 144 +++++ src/math/mp/mp_generic/info.txt | 6 + src/math/mp/mp_generic/mp_asm.h | 54 ++ src/math/mp/mp_generic/mp_asmi.h | 207 +++++++ src/math/mp/mp_ia32/info.txt | 18 + src/math/mp/mp_ia32/mp_asm.h | 67 +++ src/math/mp/mp_ia32/mp_asmi.h | 240 ++++++++ src/math/mp/mp_ia32_msvc/info.txt | 16 + src/math/mp/mp_ia32_msvc/mp_asmi.h | 542 +++++++++++++++++ src/math/mp/mp_karat.cpp | 340 +++++++++++ src/math/mp/mp_misc.cpp | 102 ++++ src/math/mp/mp_msvc64/info.txt | 17 + src/math/mp/mp_msvc64/mp_asm.h | 61 ++ src/math/mp/mp_shift.cpp | 138 +++++ src/math/mp/mp_types.h | 33 ++ src/math/mp/mulop_generic/info.txt | 5 + src/math/mp/mulop_generic/mp_mulop.cpp | 77 +++ 54 files changed, 3753 insertions(+), 3742 deletions(-) delete mode 100644 src/math/bigint/monty_generic/info.txt delete mode 100644 src/math/bigint/monty_generic/mp_monty.cpp delete mode 100644 src/math/bigint/mp_amd64/info.txt delete mode 100644 src/math/bigint/mp_amd64/mp_asm.h delete mode 100644 src/math/bigint/mp_amd64/mp_asmi.h delete mode 100644 src/math/bigint/mp_asm.cpp delete mode 100644 src/math/bigint/mp_asm64/info.txt delete mode 100644 src/math/bigint/mp_asm64/mp_asm.h delete mode 100644 src/math/bigint/mp_comba.cpp delete mode 100644 src/math/bigint/mp_core.h delete mode 100644 src/math/bigint/mp_generic/info.txt delete mode 100644 src/math/bigint/mp_generic/mp_asm.h delete mode 100644 src/math/bigint/mp_generic/mp_asmi.h delete mode 100644 src/math/bigint/mp_ia32/info.txt delete mode 100644 src/math/bigint/mp_ia32/mp_asm.h delete mode 100644 src/math/bigint/mp_ia32/mp_asmi.h delete mode 100644 src/math/bigint/mp_ia32_msvc/info.txt delete mode 100644 src/math/bigint/mp_ia32_msvc/mp_asmi.h delete mode 100644 src/math/bigint/mp_karat.cpp delete mode 100644 src/math/bigint/mp_misc.cpp delete mode 100644 src/math/bigint/mp_msvc64/info.txt delete mode 100644 src/math/bigint/mp_msvc64/mp_asm.h delete mode 100644 src/math/bigint/mp_shift.cpp delete mode 100644 src/math/bigint/mp_types.h delete mode 100644 src/math/bigint/mulop_generic/info.txt delete mode 100644 src/math/bigint/mulop_generic/mp_mulop.cpp create mode 100644 src/math/mp/info.txt create mode 100644 src/math/mp/monty_generic/info.txt create mode 100644 src/math/mp/monty_generic/mp_monty.cpp create mode 100644 src/math/mp/mp_amd64/info.txt create mode 100644 src/math/mp/mp_amd64/mp_asm.h create mode 100644 src/math/mp/mp_amd64/mp_asmi.h create mode 100644 src/math/mp/mp_asm.cpp create mode 100644 src/math/mp/mp_asm64/info.txt create mode 100644 src/math/mp/mp_asm64/mp_asm.h create mode 100644 src/math/mp/mp_comba.cpp create mode 100644 src/math/mp/mp_core.h create mode 100644 src/math/mp/mp_generic/info.txt create mode 100644 src/math/mp/mp_generic/mp_asm.h create mode 100644 src/math/mp/mp_generic/mp_asmi.h create mode 100644 src/math/mp/mp_ia32/info.txt create mode 100644 src/math/mp/mp_ia32/mp_asm.h create mode 100644 src/math/mp/mp_ia32/mp_asmi.h create mode 100644 src/math/mp/mp_ia32_msvc/info.txt create mode 100644 src/math/mp/mp_ia32_msvc/mp_asmi.h create mode 100644 src/math/mp/mp_karat.cpp create mode 100644 src/math/mp/mp_misc.cpp create mode 100644 src/math/mp/mp_msvc64/info.txt create mode 100644 src/math/mp/mp_msvc64/mp_asm.h create mode 100644 src/math/mp/mp_shift.cpp create mode 100644 src/math/mp/mp_types.h create mode 100644 src/math/mp/mulop_generic/info.txt create mode 100644 src/math/mp/mulop_generic/mp_mulop.cpp (limited to 'src') diff --git a/src/math/bigint/info.txt b/src/math/bigint/info.txt index 7892a6edf..6057c708e 100644 --- a/src/math/bigint/info.txt +++ b/src/math/bigint/info.txt @@ -5,13 +5,8 @@ define BIGINT bigint.h divide.h -mp_types.h - -mp_core.h - - big_code.cpp big_io.cpp @@ -20,18 +15,11 @@ big_ops3.cpp big_rand.cpp bigint.cpp divide.cpp -mp_asm.cpp -mp_comba.cpp -mp_karat.cpp -mp_misc.cpp -mp_shift.cpp alloc +mp hex -mp_amd64|mp_msvc64|mp_asm64|mp_ia32|mp_ia32_msvc|mp_generic -monty_generic -mulop_generic rng diff --git a/src/math/bigint/monty_generic/info.txt b/src/math/bigint/monty_generic/info.txt deleted file mode 100644 index cd05ccdc0..000000000 --- a/src/math/bigint/monty_generic/info.txt +++ /dev/null @@ -1,5 +0,0 @@ -load_on dep - - -mp_monty.cpp - diff --git a/src/math/bigint/monty_generic/mp_monty.cpp b/src/math/bigint/monty_generic/mp_monty.cpp deleted file mode 100644 index bce35259a..000000000 --- a/src/math/bigint/monty_generic/mp_monty.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* -* Montgomery Reduction -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#include -#include -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Montgomery Reduction Algorithm -*/ -void bigint_monty_redc(word z[], u32bit z_size, - word ws[], - const word x[], u32bit x_size, - word u) - { - const u32bit blocks_of_8 = x_size - (x_size % 8); - - for(u32bit i = 0; i != x_size; ++i) - { - word* z_i = z + i; - - const word y = z_i[0] * u; - - /* - bigint_linmul3(ws, x, x_size, y); - bigint_add2(z_i, z_size - i, ws, x_size+1); - */ - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_madd3(z_i + j, x + j, y, carry); - - for(u32bit j = blocks_of_8; j != x_size; ++j) - z_i[j] = word_madd3(x[j], y, z_i[j], &carry); - - word z_sum = z_i[x_size] + carry; - carry = (z_sum < z_i[x_size]); - z_i[x_size] = z_sum; - - // Note: not constant time - for(u32bit j = x_size + 1; carry && j != z_size - i; ++j) - { - ++z_i[j]; - carry = !z_i[j]; - } - } - - word borrow = 0; - for(u32bit i = 0; i != x_size; ++i) - ws[i] = word_sub(z[x_size + i], x[i], &borrow); - - ws[x_size] = word_sub(z[x_size+x_size], 0, &borrow); - - copy_mem(ws + x_size + 1, z + x_size, x_size + 1); - - copy_mem(z, ws + borrow*(x_size+1), x_size + 1); - clear_mem(z + x_size + 1, z_size - x_size - 1); - } - -} - -} diff --git a/src/math/bigint/mp_amd64/info.txt b/src/math/bigint/mp_amd64/info.txt deleted file mode 100644 index 11cc380e2..000000000 --- a/src/math/bigint/mp_amd64/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 64 - - -mp_asm.h -mp_asmi.h - - - -amd64 - - - -clang -gcc -icc - diff --git a/src/math/bigint/mp_amd64/mp_asm.h b/src/math/bigint/mp_amd64/mp_asm.h deleted file mode 100644 index fa66d04f3..000000000 --- a/src/math/bigint/mp_amd64/mp_asm.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for amd64 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mulq %[b]") - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mulq %[b]") - - ASM("addq %[c],%[a]") - ASM("adcq $0,%[carry]") - - ASM("addq %[d],%[a]") - ASM("adcq $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -#undef ASM - -} - -} - -#endif diff --git a/src/math/bigint/mp_amd64/mp_asmi.h b/src/math/bigint/mp_amd64/mp_asmi.h deleted file mode 100644 index adf7774ef..000000000 --- a/src/math/bigint/mp_amd64/mp_asmi.h +++ /dev/null @@ -1,248 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for amd64 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ - ASM("movq %[carry], 8*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movq 8*" #INDEX "(%[x]),%%rax") \ - ASM("mulq %[y]") \ - ASM("addq %[carry],%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("addq 8*" #INDEX "(%[z]),%%rax") \ - ASM("adcq $0,%%rdx") \ - ASM("movq %%rdx,%[carry]") \ - ASM("movq %%rax, 8*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorq %[carry]") \ - CORE_CODE \ - ASM("sbbq %[carry],%[carry]") \ - ASM("negq %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%rax", "%rdx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mulq %[y]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - ASM("addq %[x],%[w0]") - ASM("adcq %[y],%[w1]") - ASM("adcq $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - - -#undef ASM -#undef DO_8_TIMES -#undef ADD_OR_SUBTRACT -#undef ADDSUB2_OP -#undef ADDSUB3_OP -#undef LINMUL_OP -#undef MULADD_OP - -} - -} -#endif diff --git a/src/math/bigint/mp_asm.cpp b/src/math/bigint/mp_asm.cpp deleted file mode 100644 index 4fcdee7a4..000000000 --- a/src/math/bigint/mp_asm.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#include -#include -#include -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Two Operand Addition, No Carry -*/ -word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size) - { - word carry = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_add2(x + i, y + i, carry); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_add(x[i], y[i], &carry); - - for(u32bit i = y_size; i != x_size; ++i) - x[i] = word_add(x[i], 0, &carry); - - return carry; - } - -/* -* Three Operand Addition, No Carry -*/ -word bigint_add3_nc(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - if(x_size < y_size) - { return bigint_add3_nc(z, y, y_size, x, x_size); } - - word carry = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_add3(z + i, x + i, y + i, carry); - - for(u32bit i = blocks; i != y_size; ++i) - z[i] = word_add(x[i], y[i], &carry); - - for(u32bit i = y_size; i != x_size; ++i) - z[i] = word_add(x[i], 0, &carry); - - return carry; - } - -/* -* Two Operand Addition -*/ -void bigint_add2(word x[], u32bit x_size, const word y[], u32bit y_size) - { - x[x_size] += bigint_add2_nc(x, x_size, y, y_size); - } - -/* -* Three Operand Addition -*/ -void bigint_add3(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - z[(x_size > y_size ? x_size : y_size)] += - bigint_add3_nc(z, x, x_size, y, y_size); - } - -/* -* Two Operand Subtraction -*/ -word bigint_sub2(word x[], u32bit x_size, const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub2(x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_sub(x[i], y[i], &borrow); - - for(u32bit i = y_size; i != x_size; ++i) - x[i] = word_sub(x[i], 0, &borrow); - - return borrow; - } - -/* -* Two Operand Subtraction x = y - x -*/ -void bigint_sub2_rev(word x[], const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub2_rev(x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - x[i] = word_sub(y[i], x[i], &borrow); - - if(borrow) - throw Internal_Error("bigint_sub2_rev: x >= y"); - } - -/* -* Three Operand Subtraction -*/ -word bigint_sub3(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - word borrow = 0; - - const u32bit blocks = y_size - (y_size % 8); - - for(u32bit i = 0; i != blocks; i += 8) - borrow = word8_sub3(z + i, x + i, y + i, borrow); - - for(u32bit i = blocks; i != y_size; ++i) - z[i] = word_sub(x[i], y[i], &borrow); - - for(u32bit i = y_size; i != x_size; ++i) - z[i] = word_sub(x[i], 0, &borrow); - - return borrow; - } - -/* -* Two Operand Linear Multiply -*/ -void bigint_linmul2(word x[], u32bit x_size, word y) - { - const u32bit blocks = x_size - (x_size % 8); - - word carry = 0; - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_linmul2(x + i, y, carry); - - for(u32bit i = blocks; i != x_size; ++i) - x[i] = word_madd2(x[i], y, &carry); - - x[x_size] = carry; - } - -/* -* Three Operand Linear Multiply -*/ -void bigint_linmul3(word z[], const word x[], u32bit x_size, word y) - { - const u32bit blocks = x_size - (x_size % 8); - - word carry = 0; - - for(u32bit i = 0; i != blocks; i += 8) - carry = word8_linmul3(z + i, x + i, y, carry); - - for(u32bit i = blocks; i != x_size; ++i) - z[i] = word_madd2(x[i], y, &carry); - - z[x_size] = carry; - } - -} - -} diff --git a/src/math/bigint/mp_asm64/info.txt b/src/math/bigint/mp_asm64/info.txt deleted file mode 100644 index fd0242a7a..000000000 --- a/src/math/bigint/mp_asm64/info.txt +++ /dev/null @@ -1,25 +0,0 @@ -mp_bits 64 - -load_on dep - - -mp_asm.h -mp_generic:mp_asmi.h - - - -#amd64 -alpha -ia64 -mips64 -ppc64 -sparc64 - - -# The inline asm only works with gcc, but it looks like (at least on -# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25% -# win, so it's probably worth using elsewhere. - -gcc -sunwspro - diff --git a/src/math/bigint/mp_asm64/mp_asm.h b/src/math/bigint/mp_asm64/mp_asm.h deleted file mode 100644 index b0906095d..000000000 --- a/src/math/bigint/mp_asm64/mp_asm.h +++ /dev/null @@ -1,122 +0,0 @@ -/* -* MPI Multiply-Add Core -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_MADD_H__ -#define BOTAN_MP_MADD_H__ - -#include - -namespace Botan { - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -#if defined(BOTAN_TARGET_ARCH_IS_ALPHA) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_AMD64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("mulq %3" : "=d" (z0), "=a" (z1) : \ - "a" (a), "rm" (b) : "cc"); \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_IA64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_PPC64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \ - z1 = a * b; \ -} while(0); - -#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64) - -#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ - typedef unsigned int uint128_t __attribute__((mode(TI))); \ - uint128_t r = (uint128_t)a * b; \ - z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \ - z1 = (r ) & 0xFFFFFFFFFFFFFFFF; \ -} while(0); - -#else - -// Do a 64x64->128 multiply using four 64x64->64 multiplies -// plus some adds and shifts. Last resort for CPUs like UltraSPARC, -// with 64-bit registers/ALU, but no 64x64->128 multiply. -inline void bigint_2word_mul(word a, word b, word* z1, word* z0) - { - const u32bit MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2; - const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1; - - const word a_hi = (a >> MP_HWORD_BITS); - const word a_lo = (a & MP_HWORD_MASK); - const word b_hi = (b >> MP_HWORD_BITS); - const word b_lo = (b & MP_HWORD_MASK); - - word x0 = a_hi * b_hi; - word x1 = a_lo * b_hi; - word x2 = a_hi * b_lo; - word x3 = a_lo * b_lo; - - x2 += x3 >> (MP_HWORD_BITS); - x2 += x1; - if(x2 < x1) - x0 += ((word)1 << MP_HWORD_BITS); - - *z0 = x0 + (x2 >> MP_HWORD_BITS); - *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK); - } - -#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0) - -#endif - -/* -* Word Multiply/Add -*/ -inline word word_madd2(word a, word b, word* c) - { - word z0 = 0, z1 = 0; - - BOTAN_WORD_MUL(a, b, z1, z0); - - z1 += *c; if(z1 < *c) z0++; - - *c = z0; - return z1; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - word z0 = 0, z1 = 0; - - BOTAN_WORD_MUL(a, b, z1, z0); - - z1 += c; if(z1 < c) z0++; - z1 += *d; if(z1 < *d) z0++; - - *d = z0; - return z1; - } - -} - -#endif diff --git a/src/math/bigint/mp_comba.cpp b/src/math/bigint/mp_comba.cpp deleted file mode 100644 index 2770d3f0a..000000000 --- a/src/math/bigint/mp_comba.cpp +++ /dev/null @@ -1,920 +0,0 @@ -/* -* Comba Multiplication and Squaring -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Comba 4x4 Squaring -*/ -void bigint_comba_sqr4(word z[8], const word x[4]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; - z[7] = w1; - } - -/* -* Comba 4x4 Multiplication -*/ -void bigint_comba_mul4(word z[8], const word x[4], const word y[4]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - z[6] = w0; - z[7] = w1; - } - -/* -* Comba 6x6 Squaring -*/ -void bigint_comba_sqr6(word z[12], const word x[6]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); - word3_muladd(&w2, &w1, &w0, x[4], x[4]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], x[5]); - z[10] = w0; - z[11] = w1; - } - -/* -* Comba 6x6 Multiplication -*/ -void bigint_comba_mul6(word z[12], const word x[6], const word y[6]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[4]); - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - word3_muladd(&w2, &w1, &w0, x[4], y[0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[5]); - word3_muladd(&w2, &w1, &w0, x[1], y[4]); - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - word3_muladd(&w2, &w1, &w0, x[4], y[1]); - word3_muladd(&w2, &w1, &w0, x[5], y[0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[5]); - word3_muladd(&w2, &w1, &w0, x[2], y[4]); - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - word3_muladd(&w2, &w1, &w0, x[4], y[2]); - word3_muladd(&w2, &w1, &w0, x[5], y[1]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[5]); - word3_muladd(&w2, &w1, &w0, x[3], y[4]); - word3_muladd(&w2, &w1, &w0, x[4], y[3]); - word3_muladd(&w2, &w1, &w0, x[5], y[2]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[5]); - word3_muladd(&w2, &w1, &w0, x[4], y[4]); - word3_muladd(&w2, &w1, &w0, x[5], y[3]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[4], y[5]); - word3_muladd(&w2, &w1, &w0, x[5], y[4]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], y[5]); - z[10] = w0; - z[11] = w1; - } - -/* -* Comba 8x8 Squaring -*/ -void bigint_comba_sqr8(word z[16], const word x[8]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], x[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); - word3_muladd(&w2, &w1, &w0, x[1], x[1]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); - word3_muladd(&w2, &w1, &w0, x[2], x[2]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); - word3_muladd(&w2, &w1, &w0, x[3], x[3]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[0], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[1], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[1], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[2], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); - word3_muladd(&w2, &w1, &w0, x[4], x[4]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[2], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[3], x[6]); - word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[3], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[4], x[6]); - word3_muladd(&w2, &w1, &w0, x[5], x[5]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[4], x[7]); - word3_muladd_2(&w2, &w1, &w0, x[5], x[6]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[5], x[7]); - word3_muladd(&w2, &w1, &w0, x[6], x[6]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[6], x[7]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[7], x[7]); - z[14] = w0; - z[15] = w1; - } - -/* -* Comba 8x8 Multiplication -*/ -void bigint_comba_mul8(word z[16], const word x[8], const word y[8]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[1]); - word3_muladd(&w2, &w1, &w0, x[1], y[0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[2]); - word3_muladd(&w2, &w1, &w0, x[1], y[1]); - word3_muladd(&w2, &w1, &w0, x[2], y[0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[3]); - word3_muladd(&w2, &w1, &w0, x[1], y[2]); - word3_muladd(&w2, &w1, &w0, x[2], y[1]); - word3_muladd(&w2, &w1, &w0, x[3], y[0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[4]); - word3_muladd(&w2, &w1, &w0, x[1], y[3]); - word3_muladd(&w2, &w1, &w0, x[2], y[2]); - word3_muladd(&w2, &w1, &w0, x[3], y[1]); - word3_muladd(&w2, &w1, &w0, x[4], y[0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[5]); - word3_muladd(&w2, &w1, &w0, x[1], y[4]); - word3_muladd(&w2, &w1, &w0, x[2], y[3]); - word3_muladd(&w2, &w1, &w0, x[3], y[2]); - word3_muladd(&w2, &w1, &w0, x[4], y[1]); - word3_muladd(&w2, &w1, &w0, x[5], y[0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[6]); - word3_muladd(&w2, &w1, &w0, x[1], y[5]); - word3_muladd(&w2, &w1, &w0, x[2], y[4]); - word3_muladd(&w2, &w1, &w0, x[3], y[3]); - word3_muladd(&w2, &w1, &w0, x[4], y[2]); - word3_muladd(&w2, &w1, &w0, x[5], y[1]); - word3_muladd(&w2, &w1, &w0, x[6], y[0]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[0], y[7]); - word3_muladd(&w2, &w1, &w0, x[1], y[6]); - word3_muladd(&w2, &w1, &w0, x[2], y[5]); - word3_muladd(&w2, &w1, &w0, x[3], y[4]); - word3_muladd(&w2, &w1, &w0, x[4], y[3]); - word3_muladd(&w2, &w1, &w0, x[5], y[2]); - word3_muladd(&w2, &w1, &w0, x[6], y[1]); - word3_muladd(&w2, &w1, &w0, x[7], y[0]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[1], y[7]); - word3_muladd(&w2, &w1, &w0, x[2], y[6]); - word3_muladd(&w2, &w1, &w0, x[3], y[5]); - word3_muladd(&w2, &w1, &w0, x[4], y[4]); - word3_muladd(&w2, &w1, &w0, x[5], y[3]); - word3_muladd(&w2, &w1, &w0, x[6], y[2]); - word3_muladd(&w2, &w1, &w0, x[7], y[1]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[2], y[7]); - word3_muladd(&w2, &w1, &w0, x[3], y[6]); - word3_muladd(&w2, &w1, &w0, x[4], y[5]); - word3_muladd(&w2, &w1, &w0, x[5], y[4]); - word3_muladd(&w2, &w1, &w0, x[6], y[3]); - word3_muladd(&w2, &w1, &w0, x[7], y[2]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[3], y[7]); - word3_muladd(&w2, &w1, &w0, x[4], y[6]); - word3_muladd(&w2, &w1, &w0, x[5], y[5]); - word3_muladd(&w2, &w1, &w0, x[6], y[4]); - word3_muladd(&w2, &w1, &w0, x[7], y[3]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[4], y[7]); - word3_muladd(&w2, &w1, &w0, x[5], y[6]); - word3_muladd(&w2, &w1, &w0, x[6], y[5]); - word3_muladd(&w2, &w1, &w0, x[7], y[4]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[5], y[7]); - word3_muladd(&w2, &w1, &w0, x[6], y[6]); - word3_muladd(&w2, &w1, &w0, x[7], y[5]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[6], y[7]); - word3_muladd(&w2, &w1, &w0, x[7], y[6]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[7], y[7]); - z[14] = w0; - z[15] = w1; - } - -/* -* Comba 16x16 Squaring -*/ -void bigint_comba_sqr16(word z[32], const word x[16]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); - z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]); - z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]); - z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); - z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]); - z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]); - z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); - z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]); - z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]); - z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); - z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]); - z[14] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); - z[15] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]); - z[16] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]); - z[17] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); - word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]); - z[18] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]); - z[19] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]); - word3_muladd(&w2, &w1, &w0, x[10], x[10]); - z[20] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); - z[21] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[12]); - word3_muladd(&w2, &w1, &w0, x[11], x[11]); - z[22] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[13]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[12]); - z[23] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); - word3_muladd(&w2, &w1, &w0, x[12], x[12]); - z[24] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[10], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[11], x[14]); - word3_muladd_2(&w2, &w1, &w0, x[12], x[13]); - z[25] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[11], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[12], x[14]); - word3_muladd(&w2, &w1, &w0, x[13], x[13]); - z[26] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); - word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); - z[27] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[13], x[15]); - word3_muladd(&w2, &w1, &w0, x[14], x[14]); - z[28] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd_2(&w2, &w1, &w0, x[14], x[15]); - z[29] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[15], x[15]); - z[30] = w0; - z[31] = w1; - } - -/* -* Comba 16x16 Multiplication -*/ -void bigint_comba_mul16(word z[32], const word x[16], const word y[16]) - { - word w2 = 0, w1 = 0, w0 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); - z[0] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]); - z[1] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]); - z[2] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); - z[3] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]); - z[4] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]); - z[5] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); - z[6] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]); - z[7] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]); - z[8] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); - z[9] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 0]); - z[10] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 0]); - z[11] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); - z[12] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 0]); - z[13] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 0]); - z[14] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); - z[15] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 1], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 2], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 2]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 1]); - z[16] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 2], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 3], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 3]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 2]); - z[17] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); - z[18] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 4], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 5], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[11]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[10]); - word3_muladd(&w2, &w1, &w0, x[10], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 5]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 4]); - z[19] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 5], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 6], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[12]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[11]); - word3_muladd(&w2, &w1, &w0, x[10], y[10]); - word3_muladd(&w2, &w1, &w0, x[11], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 6]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 5]); - z[20] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); - word3_muladd(&w2, &w1, &w0, x[10], y[11]); - word3_muladd(&w2, &w1, &w0, x[11], y[10]); - word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); - z[21] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 7], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 8], y[14]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[13]); - word3_muladd(&w2, &w1, &w0, x[10], y[12]); - word3_muladd(&w2, &w1, &w0, x[11], y[11]); - word3_muladd(&w2, &w1, &w0, x[12], y[10]); - word3_muladd(&w2, &w1, &w0, x[13], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 8]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 7]); - z[22] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 8], y[15]); - word3_muladd(&w2, &w1, &w0, x[ 9], y[14]); - word3_muladd(&w2, &w1, &w0, x[10], y[13]); - word3_muladd(&w2, &w1, &w0, x[11], y[12]); - word3_muladd(&w2, &w1, &w0, x[12], y[11]); - word3_muladd(&w2, &w1, &w0, x[13], y[10]); - word3_muladd(&w2, &w1, &w0, x[14], y[ 9]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 8]); - z[23] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); - word3_muladd(&w2, &w1, &w0, x[10], y[14]); - word3_muladd(&w2, &w1, &w0, x[11], y[13]); - word3_muladd(&w2, &w1, &w0, x[12], y[12]); - word3_muladd(&w2, &w1, &w0, x[13], y[11]); - word3_muladd(&w2, &w1, &w0, x[14], y[10]); - word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); - z[24] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[10], y[15]); - word3_muladd(&w2, &w1, &w0, x[11], y[14]); - word3_muladd(&w2, &w1, &w0, x[12], y[13]); - word3_muladd(&w2, &w1, &w0, x[13], y[12]); - word3_muladd(&w2, &w1, &w0, x[14], y[11]); - word3_muladd(&w2, &w1, &w0, x[15], y[10]); - z[25] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[11], y[15]); - word3_muladd(&w2, &w1, &w0, x[12], y[14]); - word3_muladd(&w2, &w1, &w0, x[13], y[13]); - word3_muladd(&w2, &w1, &w0, x[14], y[12]); - word3_muladd(&w2, &w1, &w0, x[15], y[11]); - z[26] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[12], y[15]); - word3_muladd(&w2, &w1, &w0, x[13], y[14]); - word3_muladd(&w2, &w1, &w0, x[14], y[13]); - word3_muladd(&w2, &w1, &w0, x[15], y[12]); - z[27] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[13], y[15]); - word3_muladd(&w2, &w1, &w0, x[14], y[14]); - word3_muladd(&w2, &w1, &w0, x[15], y[13]); - z[28] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[14], y[15]); - word3_muladd(&w2, &w1, &w0, x[15], y[14]); - z[29] = w0; w0 = w1; w1 = w2; w2 = 0; - - word3_muladd(&w2, &w1, &w0, x[15], y[15]); - z[30] = w0; - z[31] = w1; - } - -} - -} diff --git a/src/math/bigint/mp_core.h b/src/math/bigint/mp_core.h deleted file mode 100644 index 63082795f..000000000 --- a/src/math/bigint/mp_core.h +++ /dev/null @@ -1,144 +0,0 @@ -/* -* MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_CORE_H__ -#define BOTAN_MP_CORE_H__ - -#include - -namespace Botan { - -/* -* The size of the word type, in bits -*/ -const u32bit MP_WORD_BITS = BOTAN_MP_WORD_BITS; - -extern "C" { - -/* -* Addition/Subtraction Operations -*/ -void bigint_add2(word x[], u32bit x_size, - const word y[], u32bit y_size); - -void bigint_add3(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size); - -word bigint_add3_nc(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -word bigint_sub2(word x[], u32bit x_size, - const word y[], u32bit y_size); - -/** -* x = y - x; assumes y >= x -*/ -void bigint_sub2_rev(word x[], const word y[], u32bit y_size); - -word bigint_sub3(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -/* -* Shift Operations -*/ -void bigint_shl1(word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shr1(word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shl2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -void bigint_shr2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift); - -/* -* Simple O(N^2) Multiplication and Squaring -*/ -void bigint_simple_mul(word z[], - const word x[], u32bit x_size, - const word y[], u32bit y_size); - -void bigint_simple_sqr(word z[], const word x[], u32bit x_size); - -/* -* Linear Multiply -*/ -void bigint_linmul2(word x[], u32bit x_size, word y); -void bigint_linmul3(word z[], const word x[], u32bit x_size, word y); - -/* -* Montgomery Reduction -* @param z integer to reduce (also output in first x_size+1 words) -* @param z_size size of z (should be >= 2*x_size+1) -* @param workspace array of at least 2*(x_size+1) words -* @param x modulus -* @param x_size size of x -* @param u Montgomery value -*/ -void bigint_monty_redc(word z[], u32bit z_size, - word workspace[], - const word x[], u32bit x_size, - word u); - -/* -* Division operation -*/ -u32bit bigint_divcore(word q, word y2, word y1, - word x3, word x2, word x1); - -/** -* Compare x and y -*/ -s32bit bigint_cmp(const word x[], u32bit x_size, - const word y[], u32bit y_size); - -/** -* Compute ((n1< -mp_asm.h -mp_asmi.h - diff --git a/src/math/bigint/mp_generic/mp_asm.h b/src/math/bigint/mp_generic/mp_asm.h deleted file mode 100644 index 7c18343ef..000000000 --- a/src/math/bigint/mp_generic/mp_asm.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include - -#if (BOTAN_MP_WORD_BITS == 8) - typedef Botan::u16bit dword; -#elif (BOTAN_MP_WORD_BITS == 16) - typedef Botan::u32bit dword; -#elif (BOTAN_MP_WORD_BITS == 32) - typedef Botan::u64bit dword; -#elif (BOTAN_MP_WORD_BITS == 64) - #error BOTAN_MP_WORD_BITS can be 64 only with assembly support -#else - #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 -#endif - -namespace Botan { - -extern "C" { - -/* -* Word Multiply/Add -*/ -inline word word_madd2(word a, word b, word* c) - { - dword z = (dword)a * b + *c; - *c = (word)(z >> BOTAN_MP_WORD_BITS); - return (word)z; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - dword z = (dword)a * b + c + *d; - *d = (word)(z >> BOTAN_MP_WORD_BITS); - return (word)z; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_generic/mp_asmi.h b/src/math/bigint/mp_generic/mp_asmi.h deleted file mode 100644 index 8225f372d..000000000 --- a/src/math/bigint/mp_generic/mp_asmi.h +++ /dev/null @@ -1,207 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include - -namespace Botan { - -extern "C" { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - x[0] = word_add(x[0], y[0], &carry); - x[1] = word_add(x[1], y[1], &carry); - x[2] = word_add(x[2], y[2], &carry); - x[3] = word_add(x[3], y[3], &carry); - x[4] = word_add(x[4], y[4], &carry); - x[5] = word_add(x[5], y[5], &carry); - x[6] = word_add(x[6], y[6], &carry); - x[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_add(x[0], y[0], &carry); - z[1] = word_add(x[1], y[1], &carry); - z[2] = word_add(x[2], y[2], &carry); - z[3] = word_add(x[3], y[3], &carry); - z[4] = word_add(x[4], y[4], &carry); - z[5] = word_add(x[5], y[5], &carry); - z[6] = word_add(x[6], y[6], &carry); - z[7] = word_add(x[7], y[7], &carry); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - x[0] = word_sub(x[0], y[0], &carry); - x[1] = word_sub(x[1], y[1], &carry); - x[2] = word_sub(x[2], y[2], &carry); - x[3] = word_sub(x[3], y[3], &carry); - x[4] = word_sub(x[4], y[4], &carry); - x[5] = word_sub(x[5], y[5], &carry); - x[6] = word_sub(x[6], y[6], &carry); - x[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - z[0] = word_sub(x[0], y[0], &carry); - z[1] = word_sub(x[1], y[1], &carry); - z[2] = word_sub(x[2], y[2], &carry); - z[3] = word_sub(x[3], y[3], &carry); - z[4] = word_sub(x[4], y[4], &carry); - z[5] = word_sub(x[5], y[5], &carry); - z[6] = word_sub(x[6], y[6], &carry); - z[7] = word_sub(x[7], y[7], &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - x[0] = word_madd2(x[0], y, &carry); - x[1] = word_madd2(x[1], y, &carry); - x[2] = word_madd2(x[2], y, &carry); - x[3] = word_madd2(x[3], y, &carry); - x[4] = word_madd2(x[4], y, &carry); - x[5] = word_madd2(x[5], y, &carry); - x[6] = word_madd2(x[6], y, &carry); - x[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd2(x[0], y, &carry); - z[1] = word_madd2(x[1], y, &carry); - z[2] = word_madd2(x[2], y, &carry); - z[3] = word_madd2(x[3], y, &carry); - z[4] = word_madd2(x[4], y, &carry); - z[5] = word_madd2(x[5], y, &carry); - z[6] = word_madd2(x[6], y, &carry); - z[7] = word_madd2(x[7], y, &carry); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32/info.txt b/src/math/bigint/mp_ia32/info.txt deleted file mode 100644 index 1659f74cf..000000000 --- a/src/math/bigint/mp_ia32/info.txt +++ /dev/null @@ -1,18 +0,0 @@ -load_on dep - -mp_bits 32 - - -mp_asm.h -mp_asmi.h - - - -ia32 - - - -clang -gcc -icc - diff --git a/src/math/bigint/mp_ia32/mp_asm.h b/src/math/bigint/mp_ia32/mp_asm.h deleted file mode 100644 index 4d3afc992..000000000 --- a/src/math/bigint/mp_ia32/mp_asm.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2008 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include - -#if (BOTAN_MP_WORD_BITS != 32) - #error The mp_ia32 module requires that BOTAN_MP_WORD_BITS == 32 -#endif - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for x86 Assembly -*/ -#define ASM(x) x "\n\t" - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - asm( - ASM("mull %[b]") - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) - : "0"(a), "1"(b), [c]"g"(*c) : "cc"); - - return a; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - asm( - ASM("mull %[b]") - - ASM("addl %[c],%[a]") - ASM("adcl $0,%[carry]") - - ASM("addl %[d],%[a]") - ASM("adcl $0,%[carry]") - - : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) - : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); - - return a; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32/mp_asmi.h b/src/math/bigint/mp_ia32/mp_asmi.h deleted file mode 100644 index c7b679e80..000000000 --- a/src/math/bigint/mp_ia32/mp_asmi.h +++ /dev/null @@ -1,240 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include - -namespace Botan { - -extern "C" { - -/* -* Helper Macros for x86 Assembly -*/ -#ifndef ASM - #define ASM(x) x "\n\t" -#endif - -#define ADDSUB2_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ - ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ - -#define ADDSUB3_OP(OPERATION, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ - ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ - ASM("movl %[carry], 4*" #INDEX "(%[z])") \ - -#define LINMUL_OP(WRITE_TO, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") - -#define MULADD_OP(IGNORED, INDEX) \ - ASM("movl 4*" #INDEX "(%[x]),%%eax") \ - ASM("mull %[y]") \ - ASM("addl %[carry],%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("addl 4*" #INDEX "(%[z]),%%eax") \ - ASM("adcl $0,%%edx") \ - ASM("movl %%edx,%[carry]") \ - ASM("movl %%eax, 4*" #INDEX " (%[z])") - -#define DO_8_TIMES(MACRO, ARG) \ - MACRO(ARG, 0) \ - MACRO(ARG, 1) \ - MACRO(ARG, 2) \ - MACRO(ARG, 3) \ - MACRO(ARG, 4) \ - MACRO(ARG, 5) \ - MACRO(ARG, 6) \ - MACRO(ARG, 7) - -#define ADD_OR_SUBTRACT(CORE_CODE) \ - ASM("rorl %[carry]") \ - CORE_CODE \ - ASM("sbbl %[carry],%[carry]") \ - ASM("negl %[carry]") - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - asm( - ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) - : [x]"=r"(x), [carry]"=r"(*carry) - : "0"(x), [y]"rm"(y), "1"(*carry) - : "cc"); - return x; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) - { - asm( - ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) - : [carry]"=r"(carry) - : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) - : "cc", "memory"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "x") - : [carry]"=r"(carry) - : [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(LINMUL_OP, "z") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - asm( - DO_8_TIMES(MULADD_OP, "") - : [carry]"=r"(carry) - : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) - : "cc", "%eax", "%edx"); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) - { - asm( - ASM("mull %[y]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - ASM("addl %[x],%[w0]") - ASM("adcl %[y],%[w1]") - ASM("adcl $0,%[w2]") - - : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) - : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) - : "cc"); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_ia32_msvc/info.txt b/src/math/bigint/mp_ia32_msvc/info.txt deleted file mode 100644 index 55a42c310..000000000 --- a/src/math/bigint/mp_ia32_msvc/info.txt +++ /dev/null @@ -1,16 +0,0 @@ -mp_bits 32 - -load_on dep - - -mp_generic:mp_asm.h -mp_asmi.h - - - -ia32 - - - -msvc - diff --git a/src/math/bigint/mp_ia32_msvc/mp_asmi.h b/src/math/bigint/mp_ia32_msvc/mp_asmi.h deleted file mode 100644 index aee457d65..000000000 --- a/src/math/bigint/mp_ia32_msvc/mp_asmi.h +++ /dev/null @@ -1,542 +0,0 @@ -/* -* Lowest Level MPI Algorithms -* (C) 1999-2010 Jack Lloyd -* 2006 Luca Piccarreta -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_INTERNAL_H__ -#define BOTAN_MP_ASM_INTERNAL_H__ - -#include - -namespace Botan { - -extern "C" { - -/* -* Word Addition -*/ -inline word word_add(word x, word y, word* carry) - { - word z = x + y; - word c1 = (z < x); - z += *carry; - *carry = c1 | (z < *carry); - return z; - } - -/* -* Eight Word Block Addition, Two Argument -*/ -inline word word8_add2(word x[8], const word y[8], word carry) - { - __asm { - mov edx,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[esi] - adc [edx],eax - mov eax,[esi+4] - adc [edx+4],eax - mov eax,[esi+8] - adc [edx+8],eax - mov eax,[esi+12] - adc [edx+12],eax - mov eax,[esi+16] - adc [edx+16],eax - mov eax,[esi+20] - adc [edx+20],eax - mov eax,[esi+24] - adc [edx+24],eax - mov eax,[esi+28] - adc [edx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Addition, Three Argument -*/ -inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - mov ebx,[z] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - adc eax,[esi] - mov [ebx],eax - - mov eax,[edi+4] - adc eax,[esi+4] - mov [ebx+4],eax - - mov eax,[edi+8] - adc eax,[esi+8] - mov [ebx+8],eax - - mov eax,[edi+12] - adc eax,[esi+12] - mov [ebx+12],eax - - mov eax,[edi+16] - adc eax,[esi+16] - mov [ebx+16],eax - - mov eax,[edi+20] - adc eax,[esi+20] - mov [ebx+20],eax - - mov eax,[edi+24] - adc eax,[esi+24] - mov [ebx+24],eax - - mov eax,[edi+28] - adc eax,[esi+28] - mov [ebx+28],eax - - sbb eax,eax - neg eax - } - } - -/* -* Word Subtraction -*/ -inline word word_sub(word x, word y, word* carry) - { - word t0 = x - y; - word c1 = (t0 > x); - word z = t0 - *carry; - *carry = c1 | (z > t0); - return z; - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2(word x[8], const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov eax,[edi] - sbb eax,[esi] - mov [edi],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [edi+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [edi+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [edi+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [edi+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [edi+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [edi+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [edi+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Subtraction, Two Argument -*/ -inline word word8_sub2_rev(word x[8], const word y[8], word carry) - { - x[0] = word_sub(y[0], x[0], &carry); - x[1] = word_sub(y[1], x[1], &carry); - x[2] = word_sub(y[2], x[2], &carry); - x[3] = word_sub(y[3], x[3], &carry); - x[4] = word_sub(y[4], x[4], &carry); - x[5] = word_sub(y[5], x[5], &carry); - x[6] = word_sub(y[6], x[6], &carry); - x[7] = word_sub(y[7], x[7], &carry); - return carry; - } - - -/* -* Eight Word Block Subtraction, Three Argument -*/ -inline word word8_sub3(word z[8], const word x[8], - const word y[8], word carry) - { - __asm { - mov edi,[x] - mov esi,[y] - xor eax,eax - sub eax,[carry] //force CF=1 iff *carry==1 - mov ebx,[z] - mov eax,[edi] - sbb eax,[esi] - mov [ebx],eax - mov eax,[edi+4] - sbb eax,[esi+4] - mov [ebx+4],eax - mov eax,[edi+8] - sbb eax,[esi+8] - mov [ebx+8],eax - mov eax,[edi+12] - sbb eax,[esi+12] - mov [ebx+12],eax - mov eax,[edi+16] - sbb eax,[esi+16] - mov [ebx+16],eax - mov eax,[edi+20] - sbb eax,[esi+20] - mov [ebx+20],eax - mov eax,[edi+24] - sbb eax,[esi+24] - mov [ebx+24],eax - mov eax,[edi+28] - sbb eax,[esi+28] - mov [ebx+28],eax - sbb eax,eax - neg eax - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_linmul2(word x[8], word y, word carry) - { - __asm { - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [esi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [esi+28],eax //load a - - mov eax,edx //store carry - } - } - -/* -* Eight Word Block Linear Multiplication -*/ -inline word word8_muladd(word z[8], const word x[8], - word y, word carry) - { - __asm { - mov esi,[x] - mov ebx,[y] - mov edi,[z] - mov eax,[esi] //load a - mul ebx //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - add eax,[edi] //sum lo z - adc edx,0 //sum hi z - mov ecx,edx //carry for next block = hi z - mov [edi],eax //save lo z - - mov eax,[esi+4] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+4] - adc edx,0 - mov ecx,edx - mov [edi+4],eax - - mov eax,[esi+8] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+8] - adc edx,0 - mov ecx,edx - mov [edi+8],eax - - mov eax,[esi+12] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+12] - adc edx,0 - mov ecx,edx - mov [edi+12],eax - - mov eax,[esi+16] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+16] - adc edx,0 - mov ecx,edx - mov [edi+16],eax - - mov eax,[esi+20] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+20] - adc edx,0 - mov ecx,edx - mov [edi+20],eax - - mov eax,[esi+24] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+24] - adc edx,0 - mov ecx,edx - mov [edi+24],eax - - mov eax,[esi+28] - mul ebx - add eax,ecx - adc edx,0 - add eax,[edi+28] - adc edx,0 - mov [edi+28],eax - mov eax,edx - } - } - -inline word word8_linmul3(word z[4], const word x[4], word y, word carry) - { - __asm { -#if 0 - //it's slower!!! - mov edx,[z] - mov eax,[x] - movd mm7,[y] - - movd mm0,[eax] - movd mm1,[eax+4] - movd mm2,[eax+8] - pmuludq mm0,mm7 - pmuludq mm1,mm7 - pmuludq mm2,mm7 - - movd mm6,[carry] - paddq mm0,mm6 - movd [edx],mm0 - - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+4],mm1 - - movd mm3,[eax+12] - psrlq mm1,32 - paddq mm2,mm1 - movd [edx+8],mm2 - - pmuludq mm3,mm7 - movd mm4,[eax+16] - psrlq mm2,32 - paddq mm3,mm2 - movd [edx+12],mm3 - - pmuludq mm4,mm7 - movd mm5,[eax+20] - psrlq mm3,32 - paddq mm4,mm3 - movd [edx+16],mm4 - - pmuludq mm5,mm7 - movd mm0,[eax+24] - psrlq mm4,32 - paddq mm5,mm4 - movd [edx+20],mm5 - - pmuludq mm0,mm7 - movd mm1,[eax+28] - psrlq mm5,32 - paddq mm0,mm5 - movd [edx+24],mm0 - - pmuludq mm1,mm7 - psrlq mm0,32 - paddq mm1,mm0 - movd [edx+28],mm1 - psrlq mm1,32 - - movd eax,mm1 - emms -#else - mov edi,[z] - mov esi,[x] - mov eax,[esi] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,[carry] //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi],eax //load a - - mov eax,[esi+4] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+4],eax //load a - - mov eax,[esi+8] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+8],eax //load a - - mov eax,[esi+12] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+12],eax //load a - - mov eax,[esi+16] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+16],eax //load a - - mov eax,[esi+20] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+20],eax //load a - - mov eax,[esi+24] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov ecx,edx //store carry - mov [edi+24],eax //load a - - mov eax,[esi+28] //load a - mul [y] //edx(hi):eax(lo)=a*b - add eax,ecx //sum lo carry - adc edx,0 //sum hi carry - mov [edi+28],eax //load a - mov eax,edx //store carry -#endif - } - } - -/* -* Eight Word Block Multiply/Add -*/ -inline word word8_madd3(word z[8], const word x[8], word y, word carry) - { - z[0] = word_madd3(x[0], y, z[0], &carry); - z[1] = word_madd3(x[1], y, z[1], &carry); - z[2] = word_madd3(x[2], y, z[2], &carry); - z[3] = word_madd3(x[3], y, z[3], &carry); - z[4] = word_madd3(x[4], y, z[4], &carry); - z[5] = word_madd3(x[5], y, z[5], &carry); - z[6] = word_madd3(x[6], y, z[6], &carry); - z[7] = word_madd3(x[7], y, z[7], &carry); - return carry; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) - { - word carry = *w0; - *w0 = word_madd2(a, b, &carry); - *w1 += carry; - *w2 += (*w1 < carry) ? 1 : 0; - } - -/* -* Multiply-Add Accumulator -*/ -inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) - { - word carry = 0; - a = word_madd2(a, b, &carry); - b = carry; - - word top = (b >> (BOTAN_MP_WORD_BITS-1)); - b <<= 1; - b |= (a >> (BOTAN_MP_WORD_BITS-1)); - a <<= 1; - - carry = 0; - *w0 = word_add(*w0, a, &carry); - *w1 = word_add(*w1, b, &carry); - *w2 = word_add(*w2, top, &carry); - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_karat.cpp b/src/math/bigint/mp_karat.cpp deleted file mode 100644 index 8ae346f1e..000000000 --- a/src/math/bigint/mp_karat.cpp +++ /dev/null @@ -1,340 +0,0 @@ -/* -* Karatsuba Multiplication/Squaring -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include -#include -#include - -namespace Botan { - -namespace { - -/* -* Karatsuba Multiplication Operation -*/ -void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, - word workspace[]) - { - if(N == 6) - bigint_comba_mul6(z, x, y); - else if(N == 8) - bigint_comba_mul8(z, x, y); - else if(N == 16) - bigint_comba_mul16(z, x, y); - else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) - bigint_simple_mul(z, x, N, y, N); - else - { - const u32bit N2 = N / 2; - - const word* x0 = x; - const word* x1 = x + N2; - const word* y0 = y; - const word* y1 = y + N2; - word* z0 = z; - word* z1 = z + N; - - const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2); - const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2); - - clear_mem(workspace, 2*N); - - if(cmp0 && cmp1) - { - if(cmp0 > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); - - if(cmp1 > 0) - bigint_sub3(z1, y1, N2, y0, N2); - else - bigint_sub3(z1, y0, N2, y1, N2); - - karatsuba_mul(workspace, z0, z1, N2, workspace+N); - } - - karatsuba_mul(z0, x0, y0, N2, workspace+N); - karatsuba_mul(z1, x1, y1, N2, workspace+N); - - const u32bit blocks_of_8 = N - (N % 8); - - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); - - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); - - word carry2 = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); - - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); - - z[N + N2] = word_add(z[N + N2], carry2, &carry); - - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; - - if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0)) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } - } - -/* -* Karatsuba Squaring Operation -*/ -void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[]) - { - if(N == 6) - bigint_comba_sqr6(z, x); - else if(N == 8) - bigint_comba_sqr8(z, x); - else if(N == 16) - bigint_comba_sqr16(z, x); - else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) - bigint_simple_sqr(z, x, N); - else - { - const u32bit N2 = N / 2; - - const word* x0 = x; - const word* x1 = x + N2; - word* z0 = z; - word* z1 = z + N; - - const s32bit cmp = bigint_cmp(x0, N2, x1, N2); - - clear_mem(workspace, 2*N); - - if(cmp) - { - if(cmp > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); - - karatsuba_sqr(workspace, z0, N2, workspace+N); - } - - karatsuba_sqr(z0, x0, N2, workspace+N); - karatsuba_sqr(z1, x1, N2, workspace+N); - - const u32bit blocks_of_8 = N - (N % 8); - - word carry = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); - - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); - - word carry2 = 0; - - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); - - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); - - z[N + N2] = word_add(z[N + N2], carry2, &carry); - - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; - - if(cmp == 0) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } - } - -/* -* Pick a good size for the Karatsuba multiply -*/ -u32bit karatsuba_size(u32bit z_size, - u32bit x_size, u32bit x_sw, - u32bit y_size, u32bit y_sw) - { - if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) - return 0; - - if(((x_size == x_sw) && (x_size % 2)) || - ((y_size == y_sw) && (y_size % 2))) - return 0; - - const u32bit start = (x_sw > y_sw) ? x_sw : y_sw; - const u32bit end = (x_size < y_size) ? x_size : y_size; - - if(start == end) - { - if(start % 2) - return 0; - return start; - } - - for(u32bit j = start; j <= end; ++j) - { - if(j % 2) - continue; - - if(2*j > z_size) - return 0; - - if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) - { - if(j % 4 == 2 && - (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size) - return j+2; - return j; - } - } - - return 0; - } - -/* -* Pick a good size for the Karatsuba squaring -*/ -u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw) - { - if(x_sw == x_size) - { - if(x_sw % 2) - return 0; - return x_sw; - } - - for(u32bit j = x_sw; j <= x_size; ++j) - { - if(j % 2) - continue; - - if(2*j > z_size) - return 0; - - if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size) - return j+2; - return j; - } - - return 0; - } - -} - -/* -* Multiplication Algorithm Dispatcher -*/ -void bigint_mul(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw, - const word y[], u32bit y_size, u32bit y_sw) - { - if(x_sw == 1) - { - bigint_linmul3(z, y, y_sw, x[0]); - } - else if(y_sw == 1) - { - bigint_linmul3(z, x, x_sw, y[0]); - } - else if(x_sw <= 4 && x_size >= 4 && - y_sw <= 4 && y_size >= 4 && z_size >= 8) - { - bigint_comba_mul4(z, x, y); - } - else if(x_sw <= 6 && x_size >= 6 && - y_sw <= 6 && y_size >= 6 && z_size >= 12) - { - bigint_comba_mul6(z, x, y); - } - else if(x_sw <= 8 && x_size >= 8 && - y_sw <= 8 && y_size >= 8 && z_size >= 16) - { - bigint_comba_mul8(z, x, y); - } - else if(x_sw <= 16 && x_size >= 16 && - y_sw <= 16 && y_size >= 16 && z_size >= 32) - { - bigint_comba_mul16(z, x, y); - } - else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || - y_sw < BOTAN_KARAT_MUL_THRESHOLD || - !workspace) - { - bigint_simple_mul(z, x, x_sw, y, y_sw); - } - else - { - const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); - - if(N) - { - clear_mem(workspace, 2*N); - karatsuba_mul(z, x, y, N, workspace); - } - else - bigint_simple_mul(z, x, x_sw, y, y_sw); - } - } - -/* -* Squaring Algorithm Dispatcher -*/ -void bigint_sqr(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw) - { - if(x_sw == 1) - { - bigint_linmul3(z, x, x_sw, x[0]); - } - else if(x_sw <= 4 && x_size >= 4 && z_size >= 8) - { - bigint_comba_sqr4(z, x); - } - else if(x_sw <= 6 && x_size >= 6 && z_size >= 12) - { - bigint_comba_sqr6(z, x); - } - else if(x_sw <= 8 && x_size >= 8 && z_size >= 16) - { - bigint_comba_sqr8(z, x); - } - else if(x_sw <= 16 && x_size >= 16 && z_size >= 32) - { - bigint_comba_sqr16(z, x); - } - else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace) - { - bigint_simple_sqr(z, x, x_sw); - } - else - { - const u32bit N = karatsuba_size(z_size, x_size, x_sw); - - if(N) - { - clear_mem(workspace, 2*N); - karatsuba_sqr(z, x, N, workspace); - } - else - bigint_simple_sqr(z, x, x_sw); - } - } - -} diff --git a/src/math/bigint/mp_misc.cpp b/src/math/bigint/mp_misc.cpp deleted file mode 100644 index 77b8e6f51..000000000 --- a/src/math/bigint/mp_misc.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* -* MP Misc Functions -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Core Division Operation -*/ -u32bit bigint_divcore(word q, word y2, word y1, - word x3, word x2, word x1) - { - // Compute (y2,y1) * q - - word y3 = 0; - y1 = word_madd2(q, y1, &y3); - y2 = word_madd2(q, y2, &y3); - - // Return (y3,y2,y1) >? (x3,x2,x1) - - if(y3 > x3) return 1; - if(y3 < x3) return 0; - if(y2 > x2) return 1; - if(y2 < x2) return 0; - if(y1 > x1) return 1; - if(y1 < x1) return 0; - return 0; - } - -/* -* Compare two MP integers -*/ -s32bit bigint_cmp(const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } - - while(x_size > y_size) - { - if(x[x_size-1]) - return 1; - x_size--; - } - - for(u32bit j = x_size; j > 0; --j) - { - if(x[j-1] > y[j-1]) - return 1; - if(x[j-1] < y[j-1]) - return -1; - } - - return 0; - } - -/* -* Do a 2-word/1-word Division -*/ -word bigint_divop(word n1, word n0, word d) - { - word high = n1 % d, quotient = 0; - - for(u32bit j = 0; j != MP_WORD_BITS; ++j) - { - word high_top_bit = (high & MP_WORD_TOP_BIT); - - high <<= 1; - high |= (n0 >> (MP_WORD_BITS-1-j)) & 1; - quotient <<= 1; - - if(high_top_bit || high >= d) - { - high -= d; - quotient |= 1; - } - } - - return quotient; - } - -/* -* Do a 2-word/1-word Modulo -*/ -word bigint_modop(word n1, word n0, word d) - { - word z = bigint_divop(n1, n0, d); - word dummy = 0; - z = word_madd2(z, d, &dummy); - return (n0-z); - } - -} - -} diff --git a/src/math/bigint/mp_msvc64/info.txt b/src/math/bigint/mp_msvc64/info.txt deleted file mode 100644 index 56ae05927..000000000 --- a/src/math/bigint/mp_msvc64/info.txt +++ /dev/null @@ -1,17 +0,0 @@ -load_on dep - -mp_bits 64 - - -mp_asm.h -mp_generic:mp_asmi.h - - - -amd64 -ia64 - - - -msvc - diff --git a/src/math/bigint/mp_msvc64/mp_asm.h b/src/math/bigint/mp_msvc64/mp_asm.h deleted file mode 100644 index 8e4535c35..000000000 --- a/src/math/bigint/mp_msvc64/mp_asm.h +++ /dev/null @@ -1,61 +0,0 @@ -/* -* Multiply-Add for 64-bit MSVC -* (C) 2010 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MP_ASM_H__ -#define BOTAN_MP_ASM_H__ - -#include -#include - -#if (BOTAN_MP_WORD_BITS != 64) - #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64 -#endif - -#pragma intrinsic(_umul128) - -namespace Botan { - -extern "C" { - -/* -* Word Multiply -*/ -inline word word_madd2(word a, word b, word* c) - { - word hi, lo; - lo = _umul128(a, b, &hi); - - lo += *c; - hi += (lo < *c); // carry? - - *c = hi; - return lo; - } - -/* -* Word Multiply/Add -*/ -inline word word_madd3(word a, word b, word c, word* d) - { - word hi, lo; - lo = _umul128(a, b, &hi); - - lo += c; - hi += (lo < c); // carry? - - lo += *d; - hi += (lo < *d); // carry? - - *d = hi; - return lo; - } - -} - -} - -#endif diff --git a/src/math/bigint/mp_shift.cpp b/src/math/bigint/mp_shift.cpp deleted file mode 100644 index f1d609bfb..000000000 --- a/src/math/bigint/mp_shift.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* -* MP Shift Algorithms -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Single Operand Left Shift -*/ -void bigint_shl1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) - { - if(word_shift) - { - for(u32bit j = 1; j != x_size + 1; ++j) - x[(x_size - j) + word_shift] = x[x_size - j]; - clear_mem(x, word_shift); - } - - if(bit_shift) - { - word carry = 0; - for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) - { - word temp = x[j]; - x[j] = (temp << bit_shift) | carry; - carry = (temp >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Single Operand Right Shift -*/ -void bigint_shr1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) - { - if(x_size < word_shift) - { - clear_mem(x, x_size); - return; - } - - if(word_shift) - { - copy_mem(x, x + word_shift, x_size - word_shift); - clear_mem(x + x_size - word_shift, word_shift); - } - - if(bit_shift) - { - word carry = 0; - - u32bit top = x_size - word_shift; - - while(top >= 4) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-2]; - x[top-2] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-3]; - x[top-3] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - w = x[top-4]; - x[top-4] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top -= 4; - } - - while(top) - { - word w = x[top-1]; - x[top-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - - top--; - } - } - } - -/* -* Two Operand Left Shift -*/ -void bigint_shl2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift) - { - for(u32bit j = 0; j != x_size; ++j) - y[j + word_shift] = x[j]; - if(bit_shift) - { - word carry = 0; - for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) - { - word w = y[j]; - y[j] = (w << bit_shift) | carry; - carry = (w >> (MP_WORD_BITS - bit_shift)); - } - } - } - -/* -* Two Operand Right Shift -*/ -void bigint_shr2(word y[], const word x[], u32bit x_size, - u32bit word_shift, u32bit bit_shift) - { - if(x_size < word_shift) return; - - for(u32bit j = 0; j != x_size - word_shift; ++j) - y[j] = x[j + word_shift]; - if(bit_shift) - { - word carry = 0; - for(u32bit j = x_size - word_shift; j > 0; --j) - { - word w = y[j-1]; - y[j-1] = (w >> bit_shift) | carry; - carry = (w << (MP_WORD_BITS - bit_shift)); - } - } - } - -} - -} diff --git a/src/math/bigint/mp_types.h b/src/math/bigint/mp_types.h deleted file mode 100644 index 1648713ed..000000000 --- a/src/math/bigint/mp_types.h +++ /dev/null @@ -1,33 +0,0 @@ -/* -* Low Level MPI Types -* (C) 1999-2007 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#ifndef BOTAN_MPI_TYPES_H__ -#define BOTAN_MPI_TYPES_H__ - -#include - -namespace Botan { - -#if (BOTAN_MP_WORD_BITS == 8) - typedef byte word; -#elif (BOTAN_MP_WORD_BITS == 16) - typedef u16bit word; -#elif (BOTAN_MP_WORD_BITS == 32) - typedef u32bit word; -#elif (BOTAN_MP_WORD_BITS == 64) - typedef u64bit word; -#else - #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 -#endif - -const word MP_WORD_MASK = ~static_cast(0); -const word MP_WORD_TOP_BIT = static_cast(1) << (8*sizeof(word) - 1); -const word MP_WORD_MAX = MP_WORD_MASK; - -} - -#endif diff --git a/src/math/bigint/mulop_generic/info.txt b/src/math/bigint/mulop_generic/info.txt deleted file mode 100644 index 548d0f44b..000000000 --- a/src/math/bigint/mulop_generic/info.txt +++ /dev/null @@ -1,5 +0,0 @@ -load_on dep - - -mp_mulop.cpp - diff --git a/src/math/bigint/mulop_generic/mp_mulop.cpp b/src/math/bigint/mulop_generic/mp_mulop.cpp deleted file mode 100644 index 33ee2af32..000000000 --- a/src/math/bigint/mulop_generic/mp_mulop.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -* Simple O(N^2) Multiplication and Squaring -* (C) 1999-2008 Jack Lloyd -* -* Distributed under the terms of the Botan license -*/ - -#include -#include -#include -#include - -namespace Botan { - -extern "C" { - -/* -* Simple O(N^2) Multiplication -*/ -void bigint_simple_mul(word z[], const word x[], u32bit x_size, - const word y[], u32bit y_size) - { - const u32bit x_size_8 = x_size - (x_size % 8); - - clear_mem(z, x_size + y_size); - - for(u32bit i = 0; i != y_size; ++i) - { - const word y_i = y[i]; - - word carry = 0; - - for(u32bit j = 0; j != x_size_8; j += 8) - carry = word8_madd3(z + i + j, x + j, y_i, carry); - - for(u32bit j = x_size_8; j != x_size; ++j) - z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry); - - z[x_size+i] = carry; - } - } - -/* -* Simple O(N^2) Squaring - -This is exactly the same algorithm as bigint_simple_mul, -however because C/C++ compilers suck at alias analysis it -is good to have the version where the compiler knows -that x == y - -There is an O(n^1.5) squaring algorithm specified in Handbook of -Applied Cryptography, chapter 14 -*/ -void bigint_simple_sqr(word z[], const word x[], u32bit x_size) - { - const u32bit x_size_8 = x_size - (x_size % 8); - - clear_mem(z, 2*x_size); - - for(u32bit i = 0; i != x_size; ++i) - { - const word x_i = x[i]; - word carry = 0; - - for(u32bit j = 0; j != x_size_8; j += 8) - carry = word8_madd3(z + i + j, x + j, x_i, carry); - - for(u32bit j = x_size_8; j != x_size; ++j) - z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); - - z[x_size+i] = carry; - } - } - -} - -} diff --git a/src/math/mp/info.txt b/src/math/mp/info.txt new file mode 100644 index 000000000..a3c994d8b --- /dev/null +++ b/src/math/mp/info.txt @@ -0,0 +1,23 @@ +define BIGINT_MP + + +mp_asm.cpp +mp_comba.cpp +mp_karat.cpp +mp_misc.cpp +mp_shift.cpp + + + +mp_types.h + + + +mp_core.h + + + +mp_amd64|mp_msvc64|mp_asm64|mp_ia32|mp_ia32_msvc|mp_generic +monty_generic +mulop_generic + diff --git a/src/math/mp/monty_generic/info.txt b/src/math/mp/monty_generic/info.txt new file mode 100644 index 000000000..cd05ccdc0 --- /dev/null +++ b/src/math/mp/monty_generic/info.txt @@ -0,0 +1,5 @@ +load_on dep + + +mp_monty.cpp + diff --git a/src/math/mp/monty_generic/mp_monty.cpp b/src/math/mp/monty_generic/mp_monty.cpp new file mode 100644 index 000000000..bce35259a --- /dev/null +++ b/src/math/mp/monty_generic/mp_monty.cpp @@ -0,0 +1,72 @@ +/* +* Montgomery Reduction +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Montgomery Reduction Algorithm +*/ +void bigint_monty_redc(word z[], u32bit z_size, + word ws[], + const word x[], u32bit x_size, + word u) + { + const u32bit blocks_of_8 = x_size - (x_size % 8); + + for(u32bit i = 0; i != x_size; ++i) + { + word* z_i = z + i; + + const word y = z_i[0] * u; + + /* + bigint_linmul3(ws, x, x_size, y); + bigint_add2(z_i, z_size - i, ws, x_size+1); + */ + word carry = 0; + + for(u32bit j = 0; j != blocks_of_8; j += 8) + carry = word8_madd3(z_i + j, x + j, y, carry); + + for(u32bit j = blocks_of_8; j != x_size; ++j) + z_i[j] = word_madd3(x[j], y, z_i[j], &carry); + + word z_sum = z_i[x_size] + carry; + carry = (z_sum < z_i[x_size]); + z_i[x_size] = z_sum; + + // Note: not constant time + for(u32bit j = x_size + 1; carry && j != z_size - i; ++j) + { + ++z_i[j]; + carry = !z_i[j]; + } + } + + word borrow = 0; + for(u32bit i = 0; i != x_size; ++i) + ws[i] = word_sub(z[x_size + i], x[i], &borrow); + + ws[x_size] = word_sub(z[x_size+x_size], 0, &borrow); + + copy_mem(ws + x_size + 1, z + x_size, x_size + 1); + + copy_mem(z, ws + borrow*(x_size+1), x_size + 1); + clear_mem(z + x_size + 1, z_size - x_size - 1); + } + +} + +} diff --git a/src/math/mp/mp_amd64/info.txt b/src/math/mp/mp_amd64/info.txt new file mode 100644 index 000000000..11cc380e2 --- /dev/null +++ b/src/math/mp/mp_amd64/info.txt @@ -0,0 +1,18 @@ +load_on dep + +mp_bits 64 + + +mp_asm.h +mp_asmi.h + + + +amd64 + + + +clang +gcc +icc + diff --git a/src/math/mp/mp_amd64/mp_asm.h b/src/math/mp/mp_amd64/mp_asm.h new file mode 100644 index 000000000..fa66d04f3 --- /dev/null +++ b/src/math/mp/mp_amd64/mp_asm.h @@ -0,0 +1,69 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2008 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_H__ +#define BOTAN_MP_ASM_H__ + +#include + +#if (BOTAN_MP_WORD_BITS != 64) + #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64 +#endif + +namespace Botan { + +extern "C" { + +/* +* Helper Macros for amd64 Assembly +*/ +#define ASM(x) x "\n\t" + +/* +* Word Multiply +*/ +inline word word_madd2(word a, word b, word* c) + { + asm( + ASM("mulq %[b]") + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { + asm( + ASM("mulq %[b]") + + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + ASM("addq %[d],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + } + +#undef ASM + +} + +} + +#endif diff --git a/src/math/mp/mp_amd64/mp_asmi.h b/src/math/mp/mp_amd64/mp_asmi.h new file mode 100644 index 000000000..adf7774ef --- /dev/null +++ b/src/math/mp/mp_amd64/mp_asmi.h @@ -0,0 +1,248 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include + +namespace Botan { + +extern "C" { + +/* +* Helper Macros for amd64 Assembly +*/ +#ifndef ASM + #define ASM(x) x "\n\t" +#endif + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + + +#undef ASM +#undef DO_8_TIMES +#undef ADD_OR_SUBTRACT +#undef ADDSUB2_OP +#undef ADDSUB3_OP +#undef LINMUL_OP +#undef MULADD_OP + +} + +} +#endif diff --git a/src/math/mp/mp_asm.cpp b/src/math/mp/mp_asm.cpp new file mode 100644 index 000000000..4fcdee7a4 --- /dev/null +++ b/src/math/mp/mp_asm.cpp @@ -0,0 +1,183 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Two Operand Addition, No Carry +*/ +word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size) + { + word carry = 0; + + const u32bit blocks = y_size - (y_size % 8); + + for(u32bit i = 0; i != blocks; i += 8) + carry = word8_add2(x + i, y + i, carry); + + for(u32bit i = blocks; i != y_size; ++i) + x[i] = word_add(x[i], y[i], &carry); + + for(u32bit i = y_size; i != x_size; ++i) + x[i] = word_add(x[i], 0, &carry); + + return carry; + } + +/* +* Three Operand Addition, No Carry +*/ +word bigint_add3_nc(word z[], const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + if(x_size < y_size) + { return bigint_add3_nc(z, y, y_size, x, x_size); } + + word carry = 0; + + const u32bit blocks = y_size - (y_size % 8); + + for(u32bit i = 0; i != blocks; i += 8) + carry = word8_add3(z + i, x + i, y + i, carry); + + for(u32bit i = blocks; i != y_size; ++i) + z[i] = word_add(x[i], y[i], &carry); + + for(u32bit i = y_size; i != x_size; ++i) + z[i] = word_add(x[i], 0, &carry); + + return carry; + } + +/* +* Two Operand Addition +*/ +void bigint_add2(word x[], u32bit x_size, const word y[], u32bit y_size) + { + x[x_size] += bigint_add2_nc(x, x_size, y, y_size); + } + +/* +* Three Operand Addition +*/ +void bigint_add3(word z[], const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + z[(x_size > y_size ? x_size : y_size)] += + bigint_add3_nc(z, x, x_size, y, y_size); + } + +/* +* Two Operand Subtraction +*/ +word bigint_sub2(word x[], u32bit x_size, const word y[], u32bit y_size) + { + word borrow = 0; + + const u32bit blocks = y_size - (y_size % 8); + + for(u32bit i = 0; i != blocks; i += 8) + borrow = word8_sub2(x + i, y + i, borrow); + + for(u32bit i = blocks; i != y_size; ++i) + x[i] = word_sub(x[i], y[i], &borrow); + + for(u32bit i = y_size; i != x_size; ++i) + x[i] = word_sub(x[i], 0, &borrow); + + return borrow; + } + +/* +* Two Operand Subtraction x = y - x +*/ +void bigint_sub2_rev(word x[], const word y[], u32bit y_size) + { + word borrow = 0; + + const u32bit blocks = y_size - (y_size % 8); + + for(u32bit i = 0; i != blocks; i += 8) + borrow = word8_sub2_rev(x + i, y + i, borrow); + + for(u32bit i = blocks; i != y_size; ++i) + x[i] = word_sub(y[i], x[i], &borrow); + + if(borrow) + throw Internal_Error("bigint_sub2_rev: x >= y"); + } + +/* +* Three Operand Subtraction +*/ +word bigint_sub3(word z[], const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + word borrow = 0; + + const u32bit blocks = y_size - (y_size % 8); + + for(u32bit i = 0; i != blocks; i += 8) + borrow = word8_sub3(z + i, x + i, y + i, borrow); + + for(u32bit i = blocks; i != y_size; ++i) + z[i] = word_sub(x[i], y[i], &borrow); + + for(u32bit i = y_size; i != x_size; ++i) + z[i] = word_sub(x[i], 0, &borrow); + + return borrow; + } + +/* +* Two Operand Linear Multiply +*/ +void bigint_linmul2(word x[], u32bit x_size, word y) + { + const u32bit blocks = x_size - (x_size % 8); + + word carry = 0; + + for(u32bit i = 0; i != blocks; i += 8) + carry = word8_linmul2(x + i, y, carry); + + for(u32bit i = blocks; i != x_size; ++i) + x[i] = word_madd2(x[i], y, &carry); + + x[x_size] = carry; + } + +/* +* Three Operand Linear Multiply +*/ +void bigint_linmul3(word z[], const word x[], u32bit x_size, word y) + { + const u32bit blocks = x_size - (x_size % 8); + + word carry = 0; + + for(u32bit i = 0; i != blocks; i += 8) + carry = word8_linmul3(z + i, x + i, y, carry); + + for(u32bit i = blocks; i != x_size; ++i) + z[i] = word_madd2(x[i], y, &carry); + + z[x_size] = carry; + } + +} + +} diff --git a/src/math/mp/mp_asm64/info.txt b/src/math/mp/mp_asm64/info.txt new file mode 100644 index 000000000..fd0242a7a --- /dev/null +++ b/src/math/mp/mp_asm64/info.txt @@ -0,0 +1,25 @@ +mp_bits 64 + +load_on dep + + +mp_asm.h +mp_generic:mp_asmi.h + + + +#amd64 +alpha +ia64 +mips64 +ppc64 +sparc64 + + +# The inline asm only works with gcc, but it looks like (at least on +# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25% +# win, so it's probably worth using elsewhere. + +gcc +sunwspro + diff --git a/src/math/mp/mp_asm64/mp_asm.h b/src/math/mp/mp_asm64/mp_asm.h new file mode 100644 index 000000000..b0906095d --- /dev/null +++ b/src/math/mp/mp_asm64/mp_asm.h @@ -0,0 +1,122 @@ +/* +* MPI Multiply-Add Core +* (C) 1999-2007 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_MADD_H__ +#define BOTAN_MP_MADD_H__ + +#include + +namespace Botan { + +#if (BOTAN_MP_WORD_BITS != 64) + #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64 +#endif + +#if defined(BOTAN_TARGET_ARCH_IS_ALPHA) + +#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ + asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \ + z1 = a * b; \ +} while(0); + +#elif defined(BOTAN_TARGET_ARCH_IS_AMD64) + +#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ + asm("mulq %3" : "=d" (z0), "=a" (z1) : \ + "a" (a), "rm" (b) : "cc"); \ +} while(0); + +#elif defined(BOTAN_TARGET_ARCH_IS_IA64) + +#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ + asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \ + z1 = a * b; \ +} while(0); + +#elif defined(BOTAN_TARGET_ARCH_IS_PPC64) + +#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ + asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \ + z1 = a * b; \ +} while(0); + +#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64) + +#define BOTAN_WORD_MUL(a,b,z1,z0) do { \ + typedef unsigned int uint128_t __attribute__((mode(TI))); \ + uint128_t r = (uint128_t)a * b; \ + z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \ + z1 = (r ) & 0xFFFFFFFFFFFFFFFF; \ +} while(0); + +#else + +// Do a 64x64->128 multiply using four 64x64->64 multiplies +// plus some adds and shifts. Last resort for CPUs like UltraSPARC, +// with 64-bit registers/ALU, but no 64x64->128 multiply. +inline void bigint_2word_mul(word a, word b, word* z1, word* z0) + { + const u32bit MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2; + const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1; + + const word a_hi = (a >> MP_HWORD_BITS); + const word a_lo = (a & MP_HWORD_MASK); + const word b_hi = (b >> MP_HWORD_BITS); + const word b_lo = (b & MP_HWORD_MASK); + + word x0 = a_hi * b_hi; + word x1 = a_lo * b_hi; + word x2 = a_hi * b_lo; + word x3 = a_lo * b_lo; + + x2 += x3 >> (MP_HWORD_BITS); + x2 += x1; + if(x2 < x1) + x0 += ((word)1 << MP_HWORD_BITS); + + *z0 = x0 + (x2 >> MP_HWORD_BITS); + *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK); + } + +#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0) + +#endif + +/* +* Word Multiply/Add +*/ +inline word word_madd2(word a, word b, word* c) + { + word z0 = 0, z1 = 0; + + BOTAN_WORD_MUL(a, b, z1, z0); + + z1 += *c; if(z1 < *c) z0++; + + *c = z0; + return z1; + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { + word z0 = 0, z1 = 0; + + BOTAN_WORD_MUL(a, b, z1, z0); + + z1 += c; if(z1 < c) z0++; + z1 += *d; if(z1 < *d) z0++; + + *d = z0; + return z1; + } + +} + +#endif diff --git a/src/math/mp/mp_comba.cpp b/src/math/mp/mp_comba.cpp new file mode 100644 index 000000000..2770d3f0a --- /dev/null +++ b/src/math/mp/mp_comba.cpp @@ -0,0 +1,920 @@ +/* +* Comba Multiplication and Squaring +* (C) 1999-2007 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Comba 4x4 Squaring +*/ +void bigint_comba_sqr4(word z[8], const word x[4]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], x[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); + word3_muladd(&w2, &w1, &w0, x[1], x[1]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); + word3_muladd(&w2, &w1, &w0, x[2], x[2]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[3], x[3]); + z[6] = w0; + z[7] = w1; + } + +/* +* Comba 4x4 Multiplication +*/ +void bigint_comba_mul4(word z[8], const word x[4], const word y[4]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[1]); + word3_muladd(&w2, &w1, &w0, x[1], y[0]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[2]); + word3_muladd(&w2, &w1, &w0, x[1], y[1]); + word3_muladd(&w2, &w1, &w0, x[2], y[0]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[3]); + word3_muladd(&w2, &w1, &w0, x[1], y[2]); + word3_muladd(&w2, &w1, &w0, x[2], y[1]); + word3_muladd(&w2, &w1, &w0, x[3], y[0]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[1], y[3]); + word3_muladd(&w2, &w1, &w0, x[2], y[2]); + word3_muladd(&w2, &w1, &w0, x[3], y[1]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[2], y[3]); + word3_muladd(&w2, &w1, &w0, x[3], y[2]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[3], y[3]); + z[6] = w0; + z[7] = w1; + } + +/* +* Comba 6x6 Squaring +*/ +void bigint_comba_sqr6(word z[12], const word x[6]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], x[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); + word3_muladd(&w2, &w1, &w0, x[1], x[1]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); + word3_muladd(&w2, &w1, &w0, x[2], x[2]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); + word3_muladd(&w2, &w1, &w0, x[3], x[3]); + z[6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); + z[7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); + word3_muladd(&w2, &w1, &w0, x[4], x[4]); + z[8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); + z[9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[5], x[5]); + z[10] = w0; + z[11] = w1; + } + +/* +* Comba 6x6 Multiplication +*/ +void bigint_comba_mul6(word z[12], const word x[6], const word y[6]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[1]); + word3_muladd(&w2, &w1, &w0, x[1], y[0]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[2]); + word3_muladd(&w2, &w1, &w0, x[1], y[1]); + word3_muladd(&w2, &w1, &w0, x[2], y[0]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[3]); + word3_muladd(&w2, &w1, &w0, x[1], y[2]); + word3_muladd(&w2, &w1, &w0, x[2], y[1]); + word3_muladd(&w2, &w1, &w0, x[3], y[0]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[4]); + word3_muladd(&w2, &w1, &w0, x[1], y[3]); + word3_muladd(&w2, &w1, &w0, x[2], y[2]); + word3_muladd(&w2, &w1, &w0, x[3], y[1]); + word3_muladd(&w2, &w1, &w0, x[4], y[0]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[5]); + word3_muladd(&w2, &w1, &w0, x[1], y[4]); + word3_muladd(&w2, &w1, &w0, x[2], y[3]); + word3_muladd(&w2, &w1, &w0, x[3], y[2]); + word3_muladd(&w2, &w1, &w0, x[4], y[1]); + word3_muladd(&w2, &w1, &w0, x[5], y[0]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[1], y[5]); + word3_muladd(&w2, &w1, &w0, x[2], y[4]); + word3_muladd(&w2, &w1, &w0, x[3], y[3]); + word3_muladd(&w2, &w1, &w0, x[4], y[2]); + word3_muladd(&w2, &w1, &w0, x[5], y[1]); + z[6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[2], y[5]); + word3_muladd(&w2, &w1, &w0, x[3], y[4]); + word3_muladd(&w2, &w1, &w0, x[4], y[3]); + word3_muladd(&w2, &w1, &w0, x[5], y[2]); + z[7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[3], y[5]); + word3_muladd(&w2, &w1, &w0, x[4], y[4]); + word3_muladd(&w2, &w1, &w0, x[5], y[3]); + z[8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[4], y[5]); + word3_muladd(&w2, &w1, &w0, x[5], y[4]); + z[9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[5], y[5]); + z[10] = w0; + z[11] = w1; + } + +/* +* Comba 8x8 Squaring +*/ +void bigint_comba_sqr8(word z[16], const word x[8]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], x[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[1]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[2]); + word3_muladd(&w2, &w1, &w0, x[1], x[1]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[3]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[2]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[4]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[3]); + word3_muladd(&w2, &w1, &w0, x[2], x[2]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[4]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[3]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[6]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[4]); + word3_muladd(&w2, &w1, &w0, x[3], x[3]); + z[6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[0], x[7]); + word3_muladd_2(&w2, &w1, &w0, x[1], x[6]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[5]); + word3_muladd_2(&w2, &w1, &w0, x[3], x[4]); + z[7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[1], x[7]); + word3_muladd_2(&w2, &w1, &w0, x[2], x[6]); + word3_muladd_2(&w2, &w1, &w0, x[3], x[5]); + word3_muladd(&w2, &w1, &w0, x[4], x[4]); + z[8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[2], x[7]); + word3_muladd_2(&w2, &w1, &w0, x[3], x[6]); + word3_muladd_2(&w2, &w1, &w0, x[4], x[5]); + z[9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[3], x[7]); + word3_muladd_2(&w2, &w1, &w0, x[4], x[6]); + word3_muladd(&w2, &w1, &w0, x[5], x[5]); + z[10] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[4], x[7]); + word3_muladd_2(&w2, &w1, &w0, x[5], x[6]); + z[11] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[5], x[7]); + word3_muladd(&w2, &w1, &w0, x[6], x[6]); + z[12] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[6], x[7]); + z[13] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[7], x[7]); + z[14] = w0; + z[15] = w1; + } + +/* +* Comba 8x8 Multiplication +*/ +void bigint_comba_mul8(word z[16], const word x[8], const word y[8]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[1]); + word3_muladd(&w2, &w1, &w0, x[1], y[0]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[2]); + word3_muladd(&w2, &w1, &w0, x[1], y[1]); + word3_muladd(&w2, &w1, &w0, x[2], y[0]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[3]); + word3_muladd(&w2, &w1, &w0, x[1], y[2]); + word3_muladd(&w2, &w1, &w0, x[2], y[1]); + word3_muladd(&w2, &w1, &w0, x[3], y[0]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[4]); + word3_muladd(&w2, &w1, &w0, x[1], y[3]); + word3_muladd(&w2, &w1, &w0, x[2], y[2]); + word3_muladd(&w2, &w1, &w0, x[3], y[1]); + word3_muladd(&w2, &w1, &w0, x[4], y[0]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[5]); + word3_muladd(&w2, &w1, &w0, x[1], y[4]); + word3_muladd(&w2, &w1, &w0, x[2], y[3]); + word3_muladd(&w2, &w1, &w0, x[3], y[2]); + word3_muladd(&w2, &w1, &w0, x[4], y[1]); + word3_muladd(&w2, &w1, &w0, x[5], y[0]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[6]); + word3_muladd(&w2, &w1, &w0, x[1], y[5]); + word3_muladd(&w2, &w1, &w0, x[2], y[4]); + word3_muladd(&w2, &w1, &w0, x[3], y[3]); + word3_muladd(&w2, &w1, &w0, x[4], y[2]); + word3_muladd(&w2, &w1, &w0, x[5], y[1]); + word3_muladd(&w2, &w1, &w0, x[6], y[0]); + z[6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[0], y[7]); + word3_muladd(&w2, &w1, &w0, x[1], y[6]); + word3_muladd(&w2, &w1, &w0, x[2], y[5]); + word3_muladd(&w2, &w1, &w0, x[3], y[4]); + word3_muladd(&w2, &w1, &w0, x[4], y[3]); + word3_muladd(&w2, &w1, &w0, x[5], y[2]); + word3_muladd(&w2, &w1, &w0, x[6], y[1]); + word3_muladd(&w2, &w1, &w0, x[7], y[0]); + z[7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[1], y[7]); + word3_muladd(&w2, &w1, &w0, x[2], y[6]); + word3_muladd(&w2, &w1, &w0, x[3], y[5]); + word3_muladd(&w2, &w1, &w0, x[4], y[4]); + word3_muladd(&w2, &w1, &w0, x[5], y[3]); + word3_muladd(&w2, &w1, &w0, x[6], y[2]); + word3_muladd(&w2, &w1, &w0, x[7], y[1]); + z[8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[2], y[7]); + word3_muladd(&w2, &w1, &w0, x[3], y[6]); + word3_muladd(&w2, &w1, &w0, x[4], y[5]); + word3_muladd(&w2, &w1, &w0, x[5], y[4]); + word3_muladd(&w2, &w1, &w0, x[6], y[3]); + word3_muladd(&w2, &w1, &w0, x[7], y[2]); + z[9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[3], y[7]); + word3_muladd(&w2, &w1, &w0, x[4], y[6]); + word3_muladd(&w2, &w1, &w0, x[5], y[5]); + word3_muladd(&w2, &w1, &w0, x[6], y[4]); + word3_muladd(&w2, &w1, &w0, x[7], y[3]); + z[10] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[4], y[7]); + word3_muladd(&w2, &w1, &w0, x[5], y[6]); + word3_muladd(&w2, &w1, &w0, x[6], y[5]); + word3_muladd(&w2, &w1, &w0, x[7], y[4]); + z[11] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[5], y[7]); + word3_muladd(&w2, &w1, &w0, x[6], y[6]); + word3_muladd(&w2, &w1, &w0, x[7], y[5]); + z[12] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[6], y[7]); + word3_muladd(&w2, &w1, &w0, x[7], y[6]); + z[13] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[7], y[7]); + z[14] = w0; + z[15] = w1; + } + +/* +* Comba 16x16 Squaring +*/ +void bigint_comba_sqr16(word z[32], const word x[16]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]); + z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]); + z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]); + z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]); + z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]); + z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]); + z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]); + z[10] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]); + z[11] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]); + z[13] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]); + z[14] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); + z[15] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]); + z[16] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]); + z[17] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); + word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]); + z[18] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]); + z[19] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]); + word3_muladd(&w2, &w1, &w0, x[10], x[10]); + z[20] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); + z[21] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[12]); + word3_muladd(&w2, &w1, &w0, x[11], x[11]); + z[22] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[12]); + z[23] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); + word3_muladd(&w2, &w1, &w0, x[12], x[12]); + z[24] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[10], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[12], x[13]); + z[25] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[11], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[12], x[14]); + word3_muladd(&w2, &w1, &w0, x[13], x[13]); + z[26] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); + z[27] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[13], x[15]); + word3_muladd(&w2, &w1, &w0, x[14], x[14]); + z[28] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[14], x[15]); + z[29] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[15], x[15]); + z[30] = w0; + z[31] = w1; + } + +/* +* Comba 16x16 Multiplication +*/ +void bigint_comba_mul16(word z[32], const word x[16], const word y[16]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[0] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]); + z[1] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]); + z[2] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[3] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]); + z[4] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]); + z[5] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[6] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]); + z[7] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]); + z[8] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); + z[9] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 0]); + z[10] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 0]); + z[11] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); + z[12] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 0]); + z[13] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 0]); + z[14] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); + z[15] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 1], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 1]); + z[16] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 2], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 2]); + z[17] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); + z[18] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 4], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[10]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 4]); + z[19] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 5], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[11]); + word3_muladd(&w2, &w1, &w0, x[10], y[10]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 5]); + z[20] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); + word3_muladd(&w2, &w1, &w0, x[10], y[11]); + word3_muladd(&w2, &w1, &w0, x[11], y[10]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); + z[21] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 7], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[13]); + word3_muladd(&w2, &w1, &w0, x[10], y[12]); + word3_muladd(&w2, &w1, &w0, x[11], y[11]); + word3_muladd(&w2, &w1, &w0, x[12], y[10]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 7]); + z[22] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 8], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[14]); + word3_muladd(&w2, &w1, &w0, x[10], y[13]); + word3_muladd(&w2, &w1, &w0, x[11], y[12]); + word3_muladd(&w2, &w1, &w0, x[12], y[11]); + word3_muladd(&w2, &w1, &w0, x[13], y[10]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 8]); + z[23] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); + word3_muladd(&w2, &w1, &w0, x[10], y[14]); + word3_muladd(&w2, &w1, &w0, x[11], y[13]); + word3_muladd(&w2, &w1, &w0, x[12], y[12]); + word3_muladd(&w2, &w1, &w0, x[13], y[11]); + word3_muladd(&w2, &w1, &w0, x[14], y[10]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); + z[24] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[10], y[15]); + word3_muladd(&w2, &w1, &w0, x[11], y[14]); + word3_muladd(&w2, &w1, &w0, x[12], y[13]); + word3_muladd(&w2, &w1, &w0, x[13], y[12]); + word3_muladd(&w2, &w1, &w0, x[14], y[11]); + word3_muladd(&w2, &w1, &w0, x[15], y[10]); + z[25] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[11], y[15]); + word3_muladd(&w2, &w1, &w0, x[12], y[14]); + word3_muladd(&w2, &w1, &w0, x[13], y[13]); + word3_muladd(&w2, &w1, &w0, x[14], y[12]); + word3_muladd(&w2, &w1, &w0, x[15], y[11]); + z[26] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[12], y[15]); + word3_muladd(&w2, &w1, &w0, x[13], y[14]); + word3_muladd(&w2, &w1, &w0, x[14], y[13]); + word3_muladd(&w2, &w1, &w0, x[15], y[12]); + z[27] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[13], y[15]); + word3_muladd(&w2, &w1, &w0, x[14], y[14]); + word3_muladd(&w2, &w1, &w0, x[15], y[13]); + z[28] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[14], y[15]); + word3_muladd(&w2, &w1, &w0, x[15], y[14]); + z[29] = w0; w0 = w1; w1 = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[15], y[15]); + z[30] = w0; + z[31] = w1; + } + +} + +} diff --git a/src/math/mp/mp_core.h b/src/math/mp/mp_core.h new file mode 100644 index 000000000..63082795f --- /dev/null +++ b/src/math/mp/mp_core.h @@ -0,0 +1,144 @@ +/* +* MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_CORE_H__ +#define BOTAN_MP_CORE_H__ + +#include + +namespace Botan { + +/* +* The size of the word type, in bits +*/ +const u32bit MP_WORD_BITS = BOTAN_MP_WORD_BITS; + +extern "C" { + +/* +* Addition/Subtraction Operations +*/ +void bigint_add2(word x[], u32bit x_size, + const word y[], u32bit y_size); + +void bigint_add3(word z[], + const word x[], u32bit x_size, + const word y[], u32bit y_size); + +word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size); + +word bigint_add3_nc(word z[], + const word x[], u32bit x_size, + const word y[], u32bit y_size); + +word bigint_sub2(word x[], u32bit x_size, + const word y[], u32bit y_size); + +/** +* x = y - x; assumes y >= x +*/ +void bigint_sub2_rev(word x[], const word y[], u32bit y_size); + +word bigint_sub3(word z[], + const word x[], u32bit x_size, + const word y[], u32bit y_size); + +/* +* Shift Operations +*/ +void bigint_shl1(word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift); + +void bigint_shr1(word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift); + +void bigint_shl2(word y[], const word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift); + +void bigint_shr2(word y[], const word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift); + +/* +* Simple O(N^2) Multiplication and Squaring +*/ +void bigint_simple_mul(word z[], + const word x[], u32bit x_size, + const word y[], u32bit y_size); + +void bigint_simple_sqr(word z[], const word x[], u32bit x_size); + +/* +* Linear Multiply +*/ +void bigint_linmul2(word x[], u32bit x_size, word y); +void bigint_linmul3(word z[], const word x[], u32bit x_size, word y); + +/* +* Montgomery Reduction +* @param z integer to reduce (also output in first x_size+1 words) +* @param z_size size of z (should be >= 2*x_size+1) +* @param workspace array of at least 2*(x_size+1) words +* @param x modulus +* @param x_size size of x +* @param u Montgomery value +*/ +void bigint_monty_redc(word z[], u32bit z_size, + word workspace[], + const word x[], u32bit x_size, + word u); + +/* +* Division operation +*/ +u32bit bigint_divcore(word q, word y2, word y1, + word x3, word x2, word x1); + +/** +* Compare x and y +*/ +s32bit bigint_cmp(const word x[], u32bit x_size, + const word y[], u32bit y_size); + +/** +* Compute ((n1< +mp_asm.h +mp_asmi.h + diff --git a/src/math/mp/mp_generic/mp_asm.h b/src/math/mp/mp_generic/mp_asm.h new file mode 100644 index 000000000..7c18343ef --- /dev/null +++ b/src/math/mp/mp_generic/mp_asm.h @@ -0,0 +1,54 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2008 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_H__ +#define BOTAN_MP_ASM_H__ + +#include + +#if (BOTAN_MP_WORD_BITS == 8) + typedef Botan::u16bit dword; +#elif (BOTAN_MP_WORD_BITS == 16) + typedef Botan::u32bit dword; +#elif (BOTAN_MP_WORD_BITS == 32) + typedef Botan::u64bit dword; +#elif (BOTAN_MP_WORD_BITS == 64) + #error BOTAN_MP_WORD_BITS can be 64 only with assembly support +#else + #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 +#endif + +namespace Botan { + +extern "C" { + +/* +* Word Multiply/Add +*/ +inline word word_madd2(word a, word b, word* c) + { + dword z = (dword)a * b + *c; + *c = (word)(z >> BOTAN_MP_WORD_BITS); + return (word)z; + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { + dword z = (dword)a * b + c + *d; + *d = (word)(z >> BOTAN_MP_WORD_BITS); + return (word)z; + } + +} + +} + +#endif diff --git a/src/math/mp/mp_generic/mp_asmi.h b/src/math/mp/mp_generic/mp_asmi.h new file mode 100644 index 000000000..8225f372d --- /dev/null +++ b/src/math/mp/mp_generic/mp_asmi.h @@ -0,0 +1,207 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include + +namespace Botan { + +extern "C" { + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { + x[0] = word_add(x[0], y[0], &carry); + x[1] = word_add(x[1], y[1], &carry); + x[2] = word_add(x[2], y[2], &carry); + x[3] = word_add(x[3], y[3], &carry); + x[4] = word_add(x[4], y[4], &carry); + x[5] = word_add(x[5], y[5], &carry); + x[6] = word_add(x[6], y[6], &carry); + x[7] = word_add(x[7], y[7], &carry); + return carry; + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], + const word y[8], word carry) + { + z[0] = word_add(x[0], y[0], &carry); + z[1] = word_add(x[1], y[1], &carry); + z[2] = word_add(x[2], y[2], &carry); + z[3] = word_add(x[3], y[3], &carry); + z[4] = word_add(x[4], y[4], &carry); + z[5] = word_add(x[5], y[5], &carry); + z[6] = word_add(x[6], y[6], &carry); + z[7] = word_add(x[7], y[7], &carry); + return carry; + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { + x[0] = word_sub(x[0], y[0], &carry); + x[1] = word_sub(x[1], y[1], &carry); + x[2] = word_sub(x[2], y[2], &carry); + x[3] = word_sub(x[3], y[3], &carry); + x[4] = word_sub(x[4], y[4], &carry); + x[5] = word_sub(x[5], y[5], &carry); + x[6] = word_sub(x[6], y[6], &carry); + x[7] = word_sub(x[7], y[7], &carry); + return carry; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { + z[0] = word_sub(x[0], y[0], &carry); + z[1] = word_sub(x[1], y[1], &carry); + z[2] = word_sub(x[2], y[2], &carry); + z[3] = word_sub(x[3], y[3], &carry); + z[4] = word_sub(x[4], y[4], &carry); + z[5] = word_sub(x[5], y[5], &carry); + z[6] = word_sub(x[6], y[6], &carry); + z[7] = word_sub(x[7], y[7], &carry); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { + x[0] = word_madd2(x[0], y, &carry); + x[1] = word_madd2(x[1], y, &carry); + x[2] = word_madd2(x[2], y, &carry); + x[3] = word_madd2(x[3], y, &carry); + x[4] = word_madd2(x[4], y, &carry); + x[5] = word_madd2(x[5], y, &carry); + x[6] = word_madd2(x[6], y, &carry); + x[7] = word_madd2(x[7], y, &carry); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { + z[0] = word_madd2(x[0], y, &carry); + z[1] = word_madd2(x[1], y, &carry); + z[2] = word_madd2(x[2], y, &carry); + z[3] = word_madd2(x[3], y, &carry); + z[4] = word_madd2(x[4], y, &carry); + z[5] = word_madd2(x[5], y, &carry); + z[6] = word_madd2(x[6], y, &carry); + z[7] = word_madd2(x[7], y, &carry); + return carry; + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) + { + word carry = *w0; + *w0 = word_madd2(a, b, &carry); + *w1 += carry; + *w2 += (*w1 < carry) ? 1 : 0; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) + { + word carry = 0; + a = word_madd2(a, b, &carry); + b = carry; + + word top = (b >> (BOTAN_MP_WORD_BITS-1)); + b <<= 1; + b |= (a >> (BOTAN_MP_WORD_BITS-1)); + a <<= 1; + + carry = 0; + *w0 = word_add(*w0, a, &carry); + *w1 = word_add(*w1, b, &carry); + *w2 = word_add(*w2, top, &carry); + } + +} + +} + +#endif diff --git a/src/math/mp/mp_ia32/info.txt b/src/math/mp/mp_ia32/info.txt new file mode 100644 index 000000000..1659f74cf --- /dev/null +++ b/src/math/mp/mp_ia32/info.txt @@ -0,0 +1,18 @@ +load_on dep + +mp_bits 32 + + +mp_asm.h +mp_asmi.h + + + +ia32 + + + +clang +gcc +icc + diff --git a/src/math/mp/mp_ia32/mp_asm.h b/src/math/mp/mp_ia32/mp_asm.h new file mode 100644 index 000000000..4d3afc992 --- /dev/null +++ b/src/math/mp/mp_ia32/mp_asm.h @@ -0,0 +1,67 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2008 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_H__ +#define BOTAN_MP_ASM_H__ + +#include + +#if (BOTAN_MP_WORD_BITS != 32) + #error The mp_ia32 module requires that BOTAN_MP_WORD_BITS == 32 +#endif + +namespace Botan { + +extern "C" { + +/* +* Helper Macros for x86 Assembly +*/ +#define ASM(x) x "\n\t" + +/* +* Word Multiply +*/ +inline word word_madd2(word a, word b, word* c) + { + asm( + ASM("mull %[b]") + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { + asm( + ASM("mull %[b]") + + ASM("addl %[c],%[a]") + ASM("adcl $0,%[carry]") + + ASM("addl %[d],%[a]") + ASM("adcl $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + } + +} + +} + +#endif diff --git a/src/math/mp/mp_ia32/mp_asmi.h b/src/math/mp/mp_ia32/mp_asmi.h new file mode 100644 index 000000000..c7b679e80 --- /dev/null +++ b/src/math/mp/mp_ia32/mp_asmi.h @@ -0,0 +1,240 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include + +namespace Botan { + +extern "C" { + +/* +* Helper Macros for x86 Assembly +*/ +#ifndef ASM + #define ASM(x) x "\n\t" +#endif + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ + ASM("movl %[carry], 4*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("addl 4*" #INDEX "(%[z]),%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX " (%[z])") + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorl %[carry]") \ + CORE_CODE \ + ASM("sbbl %[carry],%[carry]") \ + ASM("negl %[carry]") + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mull %[y]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + ASM("addl %[x],%[w0]") + ASM("adcl %[y],%[w1]") + ASM("adcl $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + +} + +} + +#endif diff --git a/src/math/mp/mp_ia32_msvc/info.txt b/src/math/mp/mp_ia32_msvc/info.txt new file mode 100644 index 000000000..55a42c310 --- /dev/null +++ b/src/math/mp/mp_ia32_msvc/info.txt @@ -0,0 +1,16 @@ +mp_bits 32 + +load_on dep + + +mp_generic:mp_asm.h +mp_asmi.h + + + +ia32 + + + +msvc + diff --git a/src/math/mp/mp_ia32_msvc/mp_asmi.h b/src/math/mp/mp_ia32_msvc/mp_asmi.h new file mode 100644 index 000000000..aee457d65 --- /dev/null +++ b/src/math/mp/mp_ia32_msvc/mp_asmi.h @@ -0,0 +1,542 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include + +namespace Botan { + +extern "C" { + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { + __asm { + mov edx,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[esi] + adc [edx],eax + mov eax,[esi+4] + adc [edx+4],eax + mov eax,[esi+8] + adc [edx+8],eax + mov eax,[esi+12] + adc [edx+12],eax + mov eax,[esi+16] + adc [edx+16],eax + mov eax,[esi+20] + adc [edx+20],eax + mov eax,[esi+24] + adc [edx+24],eax + mov eax,[esi+28] + adc [edx+28],eax + sbb eax,eax + neg eax + } + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) + { + __asm { + mov edi,[x] + mov esi,[y] + mov ebx,[z] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + adc eax,[esi] + mov [ebx],eax + + mov eax,[edi+4] + adc eax,[esi+4] + mov [ebx+4],eax + + mov eax,[edi+8] + adc eax,[esi+8] + mov [ebx+8],eax + + mov eax,[edi+12] + adc eax,[esi+12] + mov [ebx+12],eax + + mov eax,[edi+16] + adc eax,[esi+16] + mov [ebx+16],eax + + mov eax,[edi+20] + adc eax,[esi+20] + mov [ebx+20],eax + + mov eax,[edi+24] + adc eax,[esi+24] + mov [ebx+24],eax + + mov eax,[edi+28] + adc eax,[esi+28] + mov [ebx+28],eax + + sbb eax,eax + neg eax + } + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov eax,[edi] + sbb eax,[esi] + mov [edi],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [edi+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [edi+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [edi+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [edi+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [edi+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [edi+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [edi+28],eax + sbb eax,eax + neg eax + } + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; + } + + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { + __asm { + mov edi,[x] + mov esi,[y] + xor eax,eax + sub eax,[carry] //force CF=1 iff *carry==1 + mov ebx,[z] + mov eax,[edi] + sbb eax,[esi] + mov [ebx],eax + mov eax,[edi+4] + sbb eax,[esi+4] + mov [ebx+4],eax + mov eax,[edi+8] + sbb eax,[esi+8] + mov [ebx+8],eax + mov eax,[edi+12] + sbb eax,[esi+12] + mov [ebx+12],eax + mov eax,[edi+16] + sbb eax,[esi+16] + mov [ebx+16],eax + mov eax,[edi+20] + sbb eax,[esi+20] + mov [ebx+20],eax + mov eax,[edi+24] + sbb eax,[esi+24] + mov [ebx+24],eax + mov eax,[edi+28] + sbb eax,[esi+28] + mov [ebx+28],eax + sbb eax,eax + neg eax + } + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { + __asm { + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [esi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [esi+28],eax //load a + + mov eax,edx //store carry + } + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_muladd(word z[8], const word x[8], + word y, word carry) + { + __asm { + mov esi,[x] + mov ebx,[y] + mov edi,[z] + mov eax,[esi] //load a + mul ebx //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + add eax,[edi] //sum lo z + adc edx,0 //sum hi z + mov ecx,edx //carry for next block = hi z + mov [edi],eax //save lo z + + mov eax,[esi+4] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+4] + adc edx,0 + mov ecx,edx + mov [edi+4],eax + + mov eax,[esi+8] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+8] + adc edx,0 + mov ecx,edx + mov [edi+8],eax + + mov eax,[esi+12] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+12] + adc edx,0 + mov ecx,edx + mov [edi+12],eax + + mov eax,[esi+16] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+16] + adc edx,0 + mov ecx,edx + mov [edi+16],eax + + mov eax,[esi+20] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+20] + adc edx,0 + mov ecx,edx + mov [edi+20],eax + + mov eax,[esi+24] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+24] + adc edx,0 + mov ecx,edx + mov [edi+24],eax + + mov eax,[esi+28] + mul ebx + add eax,ecx + adc edx,0 + add eax,[edi+28] + adc edx,0 + mov [edi+28],eax + mov eax,edx + } + } + +inline word word8_linmul3(word z[4], const word x[4], word y, word carry) + { + __asm { +#if 0 + //it's slower!!! + mov edx,[z] + mov eax,[x] + movd mm7,[y] + + movd mm0,[eax] + movd mm1,[eax+4] + movd mm2,[eax+8] + pmuludq mm0,mm7 + pmuludq mm1,mm7 + pmuludq mm2,mm7 + + movd mm6,[carry] + paddq mm0,mm6 + movd [edx],mm0 + + psrlq mm0,32 + paddq mm1,mm0 + movd [edx+4],mm1 + + movd mm3,[eax+12] + psrlq mm1,32 + paddq mm2,mm1 + movd [edx+8],mm2 + + pmuludq mm3,mm7 + movd mm4,[eax+16] + psrlq mm2,32 + paddq mm3,mm2 + movd [edx+12],mm3 + + pmuludq mm4,mm7 + movd mm5,[eax+20] + psrlq mm3,32 + paddq mm4,mm3 + movd [edx+16],mm4 + + pmuludq mm5,mm7 + movd mm0,[eax+24] + psrlq mm4,32 + paddq mm5,mm4 + movd [edx+20],mm5 + + pmuludq mm0,mm7 + movd mm1,[eax+28] + psrlq mm5,32 + paddq mm0,mm5 + movd [edx+24],mm0 + + pmuludq mm1,mm7 + psrlq mm0,32 + paddq mm1,mm0 + movd [edx+28],mm1 + psrlq mm1,32 + + movd eax,mm1 + emms +#else + mov edi,[z] + mov esi,[x] + mov eax,[esi] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,[carry] //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi],eax //load a + + mov eax,[esi+4] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+4],eax //load a + + mov eax,[esi+8] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+8],eax //load a + + mov eax,[esi+12] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+12],eax //load a + + mov eax,[esi+16] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+16],eax //load a + + mov eax,[esi+20] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+20],eax //load a + + mov eax,[esi+24] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov ecx,edx //store carry + mov [edi+24],eax //load a + + mov eax,[esi+28] //load a + mul [y] //edx(hi):eax(lo)=a*b + add eax,ecx //sum lo carry + adc edx,0 //sum hi carry + mov [edi+28],eax //load a + mov eax,edx //store carry +#endif + } + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b) + { + word carry = *w0; + *w0 = word_madd2(a, b, &carry); + *w1 += carry; + *w2 += (*w1 < carry) ? 1 : 0; + } + +/* +* Multiply-Add Accumulator +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) + { + word carry = 0; + a = word_madd2(a, b, &carry); + b = carry; + + word top = (b >> (BOTAN_MP_WORD_BITS-1)); + b <<= 1; + b |= (a >> (BOTAN_MP_WORD_BITS-1)); + a <<= 1; + + carry = 0; + *w0 = word_add(*w0, a, &carry); + *w1 = word_add(*w1, b, &carry); + *w2 = word_add(*w2, top, &carry); + } + +} + +} + +#endif diff --git a/src/math/mp/mp_karat.cpp b/src/math/mp/mp_karat.cpp new file mode 100644 index 000000000..8ae346f1e --- /dev/null +++ b/src/math/mp/mp_karat.cpp @@ -0,0 +1,340 @@ +/* +* Karatsuba Multiplication/Squaring +* (C) 1999-2008 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include + +namespace Botan { + +namespace { + +/* +* Karatsuba Multiplication Operation +*/ +void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, + word workspace[]) + { + if(N == 6) + bigint_comba_mul6(z, x, y); + else if(N == 8) + bigint_comba_mul8(z, x, y); + else if(N == 16) + bigint_comba_mul16(z, x, y); + else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) + bigint_simple_mul(z, x, N, y, N); + else + { + const u32bit N2 = N / 2; + + const word* x0 = x; + const word* x1 = x + N2; + const word* y0 = y; + const word* y1 = y + N2; + word* z0 = z; + word* z1 = z + N; + + const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2); + const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2); + + clear_mem(workspace, 2*N); + + if(cmp0 && cmp1) + { + if(cmp0 > 0) + bigint_sub3(z0, x0, N2, x1, N2); + else + bigint_sub3(z0, x1, N2, x0, N2); + + if(cmp1 > 0) + bigint_sub3(z1, y1, N2, y0, N2); + else + bigint_sub3(z1, y0, N2, y1, N2); + + karatsuba_mul(workspace, z0, z1, N2, workspace+N); + } + + karatsuba_mul(z0, x0, y0, N2, workspace+N); + karatsuba_mul(z1, x1, y1, N2, workspace+N); + + const u32bit blocks_of_8 = N - (N % 8); + + word carry = 0; + + for(u32bit j = 0; j != blocks_of_8; j += 8) + carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); + + for(u32bit j = blocks_of_8; j != N; ++j) + workspace[N + j] = word_add(z0[j], z1[j], &carry); + + word carry2 = 0; + + for(u32bit j = 0; j != blocks_of_8; j += 8) + carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); + + for(u32bit j = blocks_of_8; j != N; ++j) + z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); + + z[N + N2] = word_add(z[N + N2], carry2, &carry); + + if(carry) + for(u32bit j = 1; j != N2; ++j) + if(++z[N + N2 + j]) + break; + + if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0)) + bigint_add2(z + N2, 2*N-N2, workspace, N); + else + bigint_sub2(z + N2, 2*N-N2, workspace, N); + } + } + +/* +* Karatsuba Squaring Operation +*/ +void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[]) + { + if(N == 6) + bigint_comba_sqr6(z, x); + else if(N == 8) + bigint_comba_sqr8(z, x); + else if(N == 16) + bigint_comba_sqr16(z, x); + else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) + bigint_simple_sqr(z, x, N); + else + { + const u32bit N2 = N / 2; + + const word* x0 = x; + const word* x1 = x + N2; + word* z0 = z; + word* z1 = z + N; + + const s32bit cmp = bigint_cmp(x0, N2, x1, N2); + + clear_mem(workspace, 2*N); + + if(cmp) + { + if(cmp > 0) + bigint_sub3(z0, x0, N2, x1, N2); + else + bigint_sub3(z0, x1, N2, x0, N2); + + karatsuba_sqr(workspace, z0, N2, workspace+N); + } + + karatsuba_sqr(z0, x0, N2, workspace+N); + karatsuba_sqr(z1, x1, N2, workspace+N); + + const u32bit blocks_of_8 = N - (N % 8); + + word carry = 0; + + for(u32bit j = 0; j != blocks_of_8; j += 8) + carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); + + for(u32bit j = blocks_of_8; j != N; ++j) + workspace[N + j] = word_add(z0[j], z1[j], &carry); + + word carry2 = 0; + + for(u32bit j = 0; j != blocks_of_8; j += 8) + carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); + + for(u32bit j = blocks_of_8; j != N; ++j) + z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); + + z[N + N2] = word_add(z[N + N2], carry2, &carry); + + if(carry) + for(u32bit j = 1; j != N2; ++j) + if(++z[N + N2 + j]) + break; + + if(cmp == 0) + bigint_add2(z + N2, 2*N-N2, workspace, N); + else + bigint_sub2(z + N2, 2*N-N2, workspace, N); + } + } + +/* +* Pick a good size for the Karatsuba multiply +*/ +u32bit karatsuba_size(u32bit z_size, + u32bit x_size, u32bit x_sw, + u32bit y_size, u32bit y_sw) + { + if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) + return 0; + + if(((x_size == x_sw) && (x_size % 2)) || + ((y_size == y_sw) && (y_size % 2))) + return 0; + + const u32bit start = (x_sw > y_sw) ? x_sw : y_sw; + const u32bit end = (x_size < y_size) ? x_size : y_size; + + if(start == end) + { + if(start % 2) + return 0; + return start; + } + + for(u32bit j = start; j <= end; ++j) + { + if(j % 2) + continue; + + if(2*j > z_size) + return 0; + + if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) + { + if(j % 4 == 2 && + (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size) + return j+2; + return j; + } + } + + return 0; + } + +/* +* Pick a good size for the Karatsuba squaring +*/ +u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw) + { + if(x_sw == x_size) + { + if(x_sw % 2) + return 0; + return x_sw; + } + + for(u32bit j = x_sw; j <= x_size; ++j) + { + if(j % 2) + continue; + + if(2*j > z_size) + return 0; + + if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size) + return j+2; + return j; + } + + return 0; + } + +} + +/* +* Multiplication Algorithm Dispatcher +*/ +void bigint_mul(word z[], u32bit z_size, word workspace[], + const word x[], u32bit x_size, u32bit x_sw, + const word y[], u32bit y_size, u32bit y_sw) + { + if(x_sw == 1) + { + bigint_linmul3(z, y, y_sw, x[0]); + } + else if(y_sw == 1) + { + bigint_linmul3(z, x, x_sw, y[0]); + } + else if(x_sw <= 4 && x_size >= 4 && + y_sw <= 4 && y_size >= 4 && z_size >= 8) + { + bigint_comba_mul4(z, x, y); + } + else if(x_sw <= 6 && x_size >= 6 && + y_sw <= 6 && y_size >= 6 && z_size >= 12) + { + bigint_comba_mul6(z, x, y); + } + else if(x_sw <= 8 && x_size >= 8 && + y_sw <= 8 && y_size >= 8 && z_size >= 16) + { + bigint_comba_mul8(z, x, y); + } + else if(x_sw <= 16 && x_size >= 16 && + y_sw <= 16 && y_size >= 16 && z_size >= 32) + { + bigint_comba_mul16(z, x, y); + } + else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || + y_sw < BOTAN_KARAT_MUL_THRESHOLD || + !workspace) + { + bigint_simple_mul(z, x, x_sw, y, y_sw); + } + else + { + const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); + + if(N) + { + clear_mem(workspace, 2*N); + karatsuba_mul(z, x, y, N, workspace); + } + else + bigint_simple_mul(z, x, x_sw, y, y_sw); + } + } + +/* +* Squaring Algorithm Dispatcher +*/ +void bigint_sqr(word z[], u32bit z_size, word workspace[], + const word x[], u32bit x_size, u32bit x_sw) + { + if(x_sw == 1) + { + bigint_linmul3(z, x, x_sw, x[0]); + } + else if(x_sw <= 4 && x_size >= 4 && z_size >= 8) + { + bigint_comba_sqr4(z, x); + } + else if(x_sw <= 6 && x_size >= 6 && z_size >= 12) + { + bigint_comba_sqr6(z, x); + } + else if(x_sw <= 8 && x_size >= 8 && z_size >= 16) + { + bigint_comba_sqr8(z, x); + } + else if(x_sw <= 16 && x_size >= 16 && z_size >= 32) + { + bigint_comba_sqr16(z, x); + } + else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace) + { + bigint_simple_sqr(z, x, x_sw); + } + else + { + const u32bit N = karatsuba_size(z_size, x_size, x_sw); + + if(N) + { + clear_mem(workspace, 2*N); + karatsuba_sqr(z, x, N, workspace); + } + else + bigint_simple_sqr(z, x, x_sw); + } + } + +} diff --git a/src/math/mp/mp_misc.cpp b/src/math/mp/mp_misc.cpp new file mode 100644 index 000000000..77b8e6f51 --- /dev/null +++ b/src/math/mp/mp_misc.cpp @@ -0,0 +1,102 @@ +/* +* MP Misc Functions +* (C) 1999-2008 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Core Division Operation +*/ +u32bit bigint_divcore(word q, word y2, word y1, + word x3, word x2, word x1) + { + // Compute (y2,y1) * q + + word y3 = 0; + y1 = word_madd2(q, y1, &y3); + y2 = word_madd2(q, y2, &y3); + + // Return (y3,y2,y1) >? (x3,x2,x1) + + if(y3 > x3) return 1; + if(y3 < x3) return 0; + if(y2 > x2) return 1; + if(y2 < x2) return 0; + if(y1 > x1) return 1; + if(y1 < x1) return 0; + return 0; + } + +/* +* Compare two MP integers +*/ +s32bit bigint_cmp(const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); } + + while(x_size > y_size) + { + if(x[x_size-1]) + return 1; + x_size--; + } + + for(u32bit j = x_size; j > 0; --j) + { + if(x[j-1] > y[j-1]) + return 1; + if(x[j-1] < y[j-1]) + return -1; + } + + return 0; + } + +/* +* Do a 2-word/1-word Division +*/ +word bigint_divop(word n1, word n0, word d) + { + word high = n1 % d, quotient = 0; + + for(u32bit j = 0; j != MP_WORD_BITS; ++j) + { + word high_top_bit = (high & MP_WORD_TOP_BIT); + + high <<= 1; + high |= (n0 >> (MP_WORD_BITS-1-j)) & 1; + quotient <<= 1; + + if(high_top_bit || high >= d) + { + high -= d; + quotient |= 1; + } + } + + return quotient; + } + +/* +* Do a 2-word/1-word Modulo +*/ +word bigint_modop(word n1, word n0, word d) + { + word z = bigint_divop(n1, n0, d); + word dummy = 0; + z = word_madd2(z, d, &dummy); + return (n0-z); + } + +} + +} diff --git a/src/math/mp/mp_msvc64/info.txt b/src/math/mp/mp_msvc64/info.txt new file mode 100644 index 000000000..56ae05927 --- /dev/null +++ b/src/math/mp/mp_msvc64/info.txt @@ -0,0 +1,17 @@ +load_on dep + +mp_bits 64 + + +mp_asm.h +mp_generic:mp_asmi.h + + + +amd64 +ia64 + + + +msvc + diff --git a/src/math/mp/mp_msvc64/mp_asm.h b/src/math/mp/mp_msvc64/mp_asm.h new file mode 100644 index 000000000..8e4535c35 --- /dev/null +++ b/src/math/mp/mp_msvc64/mp_asm.h @@ -0,0 +1,61 @@ +/* +* Multiply-Add for 64-bit MSVC +* (C) 2010 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MP_ASM_H__ +#define BOTAN_MP_ASM_H__ + +#include +#include + +#if (BOTAN_MP_WORD_BITS != 64) + #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64 +#endif + +#pragma intrinsic(_umul128) + +namespace Botan { + +extern "C" { + +/* +* Word Multiply +*/ +inline word word_madd2(word a, word b, word* c) + { + word hi, lo; + lo = _umul128(a, b, &hi); + + lo += *c; + hi += (lo < *c); // carry? + + *c = hi; + return lo; + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { + word hi, lo; + lo = _umul128(a, b, &hi); + + lo += c; + hi += (lo < c); // carry? + + lo += *d; + hi += (lo < *d); // carry? + + *d = hi; + return lo; + } + +} + +} + +#endif diff --git a/src/math/mp/mp_shift.cpp b/src/math/mp/mp_shift.cpp new file mode 100644 index 000000000..f1d609bfb --- /dev/null +++ b/src/math/mp/mp_shift.cpp @@ -0,0 +1,138 @@ +/* +* MP Shift Algorithms +* (C) 1999-2007 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Single Operand Left Shift +*/ +void bigint_shl1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) + { + if(word_shift) + { + for(u32bit j = 1; j != x_size + 1; ++j) + x[(x_size - j) + word_shift] = x[x_size - j]; + clear_mem(x, word_shift); + } + + if(bit_shift) + { + word carry = 0; + for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) + { + word temp = x[j]; + x[j] = (temp << bit_shift) | carry; + carry = (temp >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Single Operand Right Shift +*/ +void bigint_shr1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift) + { + if(x_size < word_shift) + { + clear_mem(x, x_size); + return; + } + + if(word_shift) + { + copy_mem(x, x + word_shift, x_size - word_shift); + clear_mem(x + x_size - word_shift, word_shift); + } + + if(bit_shift) + { + word carry = 0; + + u32bit top = x_size - word_shift; + + while(top >= 4) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-2]; + x[top-2] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-3]; + x[top-3] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + w = x[top-4]; + x[top-4] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top -= 4; + } + + while(top) + { + word w = x[top-1]; + x[top-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + + top--; + } + } + } + +/* +* Two Operand Left Shift +*/ +void bigint_shl2(word y[], const word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift) + { + for(u32bit j = 0; j != x_size; ++j) + y[j + word_shift] = x[j]; + if(bit_shift) + { + word carry = 0; + for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j) + { + word w = y[j]; + y[j] = (w << bit_shift) | carry; + carry = (w >> (MP_WORD_BITS - bit_shift)); + } + } + } + +/* +* Two Operand Right Shift +*/ +void bigint_shr2(word y[], const word x[], u32bit x_size, + u32bit word_shift, u32bit bit_shift) + { + if(x_size < word_shift) return; + + for(u32bit j = 0; j != x_size - word_shift; ++j) + y[j] = x[j + word_shift]; + if(bit_shift) + { + word carry = 0; + for(u32bit j = x_size - word_shift; j > 0; --j) + { + word w = y[j-1]; + y[j-1] = (w >> bit_shift) | carry; + carry = (w << (MP_WORD_BITS - bit_shift)); + } + } + } + +} + +} diff --git a/src/math/mp/mp_types.h b/src/math/mp/mp_types.h new file mode 100644 index 000000000..1648713ed --- /dev/null +++ b/src/math/mp/mp_types.h @@ -0,0 +1,33 @@ +/* +* Low Level MPI Types +* (C) 1999-2007 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_MPI_TYPES_H__ +#define BOTAN_MPI_TYPES_H__ + +#include + +namespace Botan { + +#if (BOTAN_MP_WORD_BITS == 8) + typedef byte word; +#elif (BOTAN_MP_WORD_BITS == 16) + typedef u16bit word; +#elif (BOTAN_MP_WORD_BITS == 32) + typedef u32bit word; +#elif (BOTAN_MP_WORD_BITS == 64) + typedef u64bit word; +#else + #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64 +#endif + +const word MP_WORD_MASK = ~static_cast(0); +const word MP_WORD_TOP_BIT = static_cast(1) << (8*sizeof(word) - 1); +const word MP_WORD_MAX = MP_WORD_MASK; + +} + +#endif diff --git a/src/math/mp/mulop_generic/info.txt b/src/math/mp/mulop_generic/info.txt new file mode 100644 index 000000000..548d0f44b --- /dev/null +++ b/src/math/mp/mulop_generic/info.txt @@ -0,0 +1,5 @@ +load_on dep + + +mp_mulop.cpp + diff --git a/src/math/mp/mulop_generic/mp_mulop.cpp b/src/math/mp/mulop_generic/mp_mulop.cpp new file mode 100644 index 000000000..33ee2af32 --- /dev/null +++ b/src/math/mp/mulop_generic/mp_mulop.cpp @@ -0,0 +1,77 @@ +/* +* Simple O(N^2) Multiplication and Squaring +* (C) 1999-2008 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include +#include + +namespace Botan { + +extern "C" { + +/* +* Simple O(N^2) Multiplication +*/ +void bigint_simple_mul(word z[], const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + const u32bit x_size_8 = x_size - (x_size % 8); + + clear_mem(z, x_size + y_size); + + for(u32bit i = 0; i != y_size; ++i) + { + const word y_i = y[i]; + + word carry = 0; + + for(u32bit j = 0; j != x_size_8; j += 8) + carry = word8_madd3(z + i + j, x + j, y_i, carry); + + for(u32bit j = x_size_8; j != x_size; ++j) + z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry); + + z[x_size+i] = carry; + } + } + +/* +* Simple O(N^2) Squaring + +This is exactly the same algorithm as bigint_simple_mul, +however because C/C++ compilers suck at alias analysis it +is good to have the version where the compiler knows +that x == y + +There is an O(n^1.5) squaring algorithm specified in Handbook of +Applied Cryptography, chapter 14 +*/ +void bigint_simple_sqr(word z[], const word x[], u32bit x_size) + { + const u32bit x_size_8 = x_size - (x_size % 8); + + clear_mem(z, 2*x_size); + + for(u32bit i = 0; i != x_size; ++i) + { + const word x_i = x[i]; + word carry = 0; + + for(u32bit j = 0; j != x_size_8; j += 8) + carry = word8_madd3(z + i + j, x + j, x_i, carry); + + for(u32bit j = x_size_8; j != x_size; ++j) + z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); + + z[x_size+i] = carry; + } + } + +} + +} -- cgit v1.2.3