diff options
Diffstat (limited to 'src/asm/mp_amd64')
-rw-r--r-- | src/asm/mp_amd64/bswap.h | 36 | ||||
-rw-r--r-- | src/asm/mp_amd64/modinfo.txt | 21 | ||||
-rw-r--r-- | src/asm/mp_amd64/mp_asm.h | 67 | ||||
-rw-r--r-- | src/asm/mp_amd64/mp_asmi.h | 233 | ||||
-rw-r--r-- | src/asm/mp_amd64/mp_mulop.cpp | 94 |
5 files changed, 451 insertions, 0 deletions
diff --git a/src/asm/mp_amd64/bswap.h b/src/asm/mp_amd64/bswap.h new file mode 100644 index 000000000..3c77b460c --- /dev/null +++ b/src/asm/mp_amd64/bswap.h @@ -0,0 +1,36 @@ +/************************************************* +* Byte Swapping Operations Header File * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#ifndef BOTAN_BSWAP_H__ +#define BOTAN_BSWAP_H__ + +#include <botan/types.h> +#include <botan/rotate.h> + +namespace Botan { + +/************************************************* +* Byte Swapping Functions * +*************************************************/ +inline u16bit reverse_bytes(u16bit input) + { + return rotate_left(input, 8); + } + +inline u32bit reverse_bytes(u32bit input) + { + asm("bswapl %0" : "=r" (input) : "0" (input)); + return input; + } + +inline u64bit reverse_bytes(u64bit input) + { + asm("bswapq %0" : "=r" (input) : "0" (input)); + return input; + } + +} + +#endif diff --git a/src/asm/mp_amd64/modinfo.txt b/src/asm/mp_amd64/modinfo.txt new file mode 100644 index 000000000..a042a3976 --- /dev/null +++ b/src/asm/mp_amd64/modinfo.txt @@ -0,0 +1,21 @@ +realname "x86-64 MPI Assembler Core" + +mp_bits 64 + +load_on asm_ok + +<replace> +bswap.h +mp_asm.h +mp_asmi.h +#mp_mulop.cpp +</replace> + +<arch> +amd64 +</arch> + +<cc> +gcc +icc +</cc> diff --git a/src/asm/mp_amd64/mp_asm.h b/src/asm/mp_amd64/mp_asm.h new file mode 100644 index 000000000..eca7bae6c --- /dev/null +++ b/src/asm/mp_amd64/mp_asm.h @@ -0,0 +1,67 @@ +/************************************************* +* Lowest Level MPI Algorithms Header File * +* (C) 1999-2008 Jack Lloyd * +* 2006 Luca Piccarreta * +*************************************************/ + +#ifndef BOTAN_MP_ASM_H__ +#define BOTAN_MP_ASM_H__ + +#include <botan/mp_types.h> + +#if (BOTAN_MP_WORD_BITS != 64) + #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64 +#endif + +namespace Botan { + +extern "C" { + +/************************************************* +* Helper Macros for amd64 Assembly * +*************************************************/ +#define ASM(x) x "\n\t" + +/************************************************* +* Word Multiply * +*************************************************/ +inline word word_madd2(word a, word b, word* c) + { + asm( + ASM("mulq %[b]") + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + } + +/************************************************* +* Word Multiply/Add * +*************************************************/ +inline word word_madd3(word a, word b, word c, word* d) + { + asm( + ASM("mulq %[b]") + + ASM("addq %[c],%[a]") + ASM("adcq $0,%[carry]") + + ASM("addq %[d],%[a]") + ASM("adcq $0,%[carry]") + + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + } + +#undef ASM + +} + +} + +#endif diff --git a/src/asm/mp_amd64/mp_asmi.h b/src/asm/mp_amd64/mp_asmi.h new file mode 100644 index 000000000..16632a38d --- /dev/null +++ b/src/asm/mp_amd64/mp_asmi.h @@ -0,0 +1,233 @@ +/************************************************* +* Lowest Level MPI Algorithms Header File * +* (C) 1999-2007 Jack Lloyd * +* 2006 Luca Piccarreta * +*************************************************/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H__ +#define BOTAN_MP_ASM_INTERNAL_H__ + +#include <botan/mp_asm.h> + +namespace Botan { + +extern "C" { + +/************************************************* +* Helper Macros for amd64 Assembly * +*************************************************/ +#ifndef ASM + #define ASM(x) x "\n\t" +#endif + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +/************************************************* +* Word Addition * +*************************************************/ +inline word word_add(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/************************************************* +* Eight Word Block Addition, Two Argument * +*************************************************/ +inline word word8_add2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/************************************************* +* Eight Word Block Addition, Three Argument * +*************************************************/ +inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/************************************************* +* Word Subtraction * +*************************************************/ +inline word word_sub(word x, word y, word* carry) + { + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + } + +/************************************************* +* Eight Word Block Subtraction, Two Argument * +*************************************************/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + } + +/************************************************* +* Eight Word Block Subtraction, Three Argument * +*************************************************/ +inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) + { + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + } + +/************************************************* +* Eight Word Block Linear Multiplication * +*************************************************/ +inline word word8_linmul2(word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/************************************************* +* Eight Word Block Linear Multiplication * +*************************************************/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/************************************************* +* Eight Word Block Multiply/Add * +*************************************************/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + } + +/************************************************* +* Multiply-Add Accumulator * +*************************************************/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + +/************************************************* +* Multiply-Add Accumulator * +*************************************************/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { + asm( + ASM("mulq %[y]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + ASM("addq %[x],%[w0]") + ASM("adcq %[y],%[w1]") + ASM("adcq $0,%[w2]") + + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + } + + +#undef ASM +#undef DO_8_TIMES +#undef ADD_OR_SUBTRACT +#undef ADDSUB2_OP +#undef ADDSUB3_OP +#undef LINMUL_OP +#undef MULADD_OP + +} + +} +#endif diff --git a/src/asm/mp_amd64/mp_mulop.cpp b/src/asm/mp_amd64/mp_mulop.cpp new file mode 100644 index 000000000..d1aa51489 --- /dev/null +++ b/src/asm/mp_amd64/mp_mulop.cpp @@ -0,0 +1,94 @@ +/************************************************* +* Simple O(N^2) Multiplication and Squaring * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#include <botan/mp_asm.h> +#include <botan/mp_asmi.h> +#include <botan/mp_core.h> +#include <botan/mem_ops.h> + +namespace Botan { + +extern "C" { + +/************************************************* +* Simple O(N^2) Multiplication * +*************************************************/ +void bigint_simple_mul(word z[], const word x[], u32bit x_size, + const word y[], u32bit y_size) + { + const u32bit blocks = x_size - (x_size % 8); + + clear_mem(z, x_size + y_size); + + for(u32bit i = 0; i != y_size; ++i) + { + word carry = 0; + + for(u32bit j = 0; j != blocks; j += 8) + carry = word8_madd3(z + i + j, x + j, y[i], carry); + + for(u32bit j = blocks; j != x_size; ++j) + z[i+j] = word_madd3(x[j], y[i], z[i+j], &carry); + + z[x_size+i] = carry; + } + } + +inline word word_sqr(word x, + +/************************************************* +* Simple O(N^2) Squaring + +This is exactly the same algorithm as bigint_simple_mul, +however because C/C++ compilers suck at alias analysis it +is good to have the version where the compiler knows +that x == y +*************************************************/ +void bigint_simple_sqr(word z[], const word x[], u32bit x_size) + { + clear_mem(z, 2*x_size); + + for(u32bit i = 0; i != x_size; ++i) + { + const word x_i = x[i]; + + word carry = z[2*i]; + z[2*i] = word_madd2(x_i, x_i, z[2*i], &carry); + + for(u32bit j = i; j != x_size; ++j) + { + // z[i+j] = z[i+j] + 2 * x[j] * x_i + carry; + + /* + load z[i+j] into register + load x[j] into %hi + mulq %[x_i] -> x[i] * x[j] -> %lo:%hi + shlq %lo, $1 + + // put carry bit (cf) from %lo into %temp + xorl %temp + adcq $0, %temp + + // high bit of lo now in cf + shl %hi, $1 + // add in lowest bid from %lo + orl %temp, %hi + + addq %[c], %[lo] + adcq $0, %[hi] + addq %[z_ij], %[lo] + adcq $0, %[hi] + + */ + + } + + z[x_size+i] = carry; + } + } + +} + +} |