aboutsummaryrefslogtreecommitdiffstats
path: root/src/asm/mp_amd64
diff options
context:
space:
mode:
Diffstat (limited to 'src/asm/mp_amd64')
-rw-r--r--src/asm/mp_amd64/bswap.h36
-rw-r--r--src/asm/mp_amd64/modinfo.txt21
-rw-r--r--src/asm/mp_amd64/mp_asm.h67
-rw-r--r--src/asm/mp_amd64/mp_asmi.h233
-rw-r--r--src/asm/mp_amd64/mp_mulop.cpp94
5 files changed, 451 insertions, 0 deletions
diff --git a/src/asm/mp_amd64/bswap.h b/src/asm/mp_amd64/bswap.h
new file mode 100644
index 000000000..3c77b460c
--- /dev/null
+++ b/src/asm/mp_amd64/bswap.h
@@ -0,0 +1,36 @@
+/*************************************************
+* Byte Swapping Operations Header File *
+* (C) 1999-2008 Jack Lloyd *
+*************************************************/
+
+#ifndef BOTAN_BSWAP_H__
+#define BOTAN_BSWAP_H__
+
+#include <botan/types.h>
+#include <botan/rotate.h>
+
+namespace Botan {
+
+/*************************************************
+* Byte Swapping Functions *
+*************************************************/
+inline u16bit reverse_bytes(u16bit input)
+ {
+ return rotate_left(input, 8);
+ }
+
+inline u32bit reverse_bytes(u32bit input)
+ {
+ asm("bswapl %0" : "=r" (input) : "0" (input));
+ return input;
+ }
+
+inline u64bit reverse_bytes(u64bit input)
+ {
+ asm("bswapq %0" : "=r" (input) : "0" (input));
+ return input;
+ }
+
+}
+
+#endif
diff --git a/src/asm/mp_amd64/modinfo.txt b/src/asm/mp_amd64/modinfo.txt
new file mode 100644
index 000000000..a042a3976
--- /dev/null
+++ b/src/asm/mp_amd64/modinfo.txt
@@ -0,0 +1,21 @@
+realname "x86-64 MPI Assembler Core"
+
+mp_bits 64
+
+load_on asm_ok
+
+<replace>
+bswap.h
+mp_asm.h
+mp_asmi.h
+#mp_mulop.cpp
+</replace>
+
+<arch>
+amd64
+</arch>
+
+<cc>
+gcc
+icc
+</cc>
diff --git a/src/asm/mp_amd64/mp_asm.h b/src/asm/mp_amd64/mp_asm.h
new file mode 100644
index 000000000..eca7bae6c
--- /dev/null
+++ b/src/asm/mp_amd64/mp_asm.h
@@ -0,0 +1,67 @@
+/*************************************************
+* Lowest Level MPI Algorithms Header File *
+* (C) 1999-2008 Jack Lloyd *
+* 2006 Luca Piccarreta *
+*************************************************/
+
+#ifndef BOTAN_MP_ASM_H__
+#define BOTAN_MP_ASM_H__
+
+#include <botan/mp_types.h>
+
+#if (BOTAN_MP_WORD_BITS != 64)
+ #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64
+#endif
+
+namespace Botan {
+
+extern "C" {
+
+/*************************************************
+* Helper Macros for amd64 Assembly *
+*************************************************/
+#define ASM(x) x "\n\t"
+
+/*************************************************
+* Word Multiply *
+*************************************************/
+inline word word_madd2(word a, word b, word* c)
+ {
+ asm(
+ ASM("mulq %[b]")
+ ASM("addq %[c],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
+ : "0"(a), "1"(b), [c]"g"(*c) : "cc");
+
+ return a;
+ }
+
+/*************************************************
+* Word Multiply/Add *
+*************************************************/
+inline word word_madd3(word a, word b, word c, word* d)
+ {
+ asm(
+ ASM("mulq %[b]")
+
+ ASM("addq %[c],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ ASM("addq %[d],%[a]")
+ ASM("adcq $0,%[carry]")
+
+ : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
+ : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
+
+ return a;
+ }
+
+#undef ASM
+
+}
+
+}
+
+#endif
diff --git a/src/asm/mp_amd64/mp_asmi.h b/src/asm/mp_amd64/mp_asmi.h
new file mode 100644
index 000000000..16632a38d
--- /dev/null
+++ b/src/asm/mp_amd64/mp_asmi.h
@@ -0,0 +1,233 @@
+/*************************************************
+* Lowest Level MPI Algorithms Header File *
+* (C) 1999-2007 Jack Lloyd *
+* 2006 Luca Piccarreta *
+*************************************************/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*************************************************
+* Helper Macros for amd64 Assembly *
+*************************************************/
+#ifndef ASM
+ #define ASM(x) x "\n\t"
+#endif
+
+#define ADDSUB2_OP(OPERATION, INDEX) \
+ ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
+ ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
+
+#define ADDSUB3_OP(OPERATION, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
+ ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
+ ASM("movq %[carry], 8*" #INDEX "(%[z])") \
+
+#define LINMUL_OP(WRITE_TO, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]),%%rax") \
+ ASM("mulq %[y]") \
+ ASM("addq %[carry],%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("movq %%rdx,%[carry]") \
+ ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
+
+#define MULADD_OP(IGNORED, INDEX) \
+ ASM("movq 8*" #INDEX "(%[x]),%%rax") \
+ ASM("mulq %[y]") \
+ ASM("addq %[carry],%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("addq 8*" #INDEX "(%[z]),%%rax") \
+ ASM("adcq $0,%%rdx") \
+ ASM("movq %%rdx,%[carry]") \
+ ASM("movq %%rax, 8*" #INDEX " (%[z])")
+
+#define DO_8_TIMES(MACRO, ARG) \
+ MACRO(ARG, 0) \
+ MACRO(ARG, 1) \
+ MACRO(ARG, 2) \
+ MACRO(ARG, 3) \
+ MACRO(ARG, 4) \
+ MACRO(ARG, 5) \
+ MACRO(ARG, 6) \
+ MACRO(ARG, 7)
+
+#define ADD_OR_SUBTRACT(CORE_CODE) \
+ ASM("rorq %[carry]") \
+ CORE_CODE \
+ ASM("sbbq %[carry],%[carry]") \
+ ASM("negq %[carry]")
+
+/*************************************************
+* Word Addition *
+*************************************************/
+inline word word_add(word x, word y, word* carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+ }
+
+/*************************************************
+* Eight Word Block Addition, Two Argument *
+*************************************************/
+inline word word8_add2(word x[8], const word y[8], word carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+ }
+
+/*************************************************
+* Eight Word Block Addition, Three Argument *
+*************************************************/
+inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+ }
+
+/*************************************************
+* Word Subtraction *
+*************************************************/
+inline word word_sub(word x, word y, word* carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
+ : [x]"=r"(x), [carry]"=r"(*carry)
+ : "0"(x), [y]"rm"(y), "1"(*carry)
+ : "cc");
+ return x;
+ }
+
+/*************************************************
+* Eight Word Block Subtraction, Two Argument *
+*************************************************/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), "0"(carry)
+ : "cc", "memory");
+ return carry;
+ }
+
+/*************************************************
+* Eight Word Block Subtraction, Three Argument *
+*************************************************/
+inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
+ {
+ asm(
+ ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+ : "cc", "memory");
+ return carry;
+ }
+
+/*************************************************
+* Eight Word Block Linear Multiplication *
+*************************************************/
+inline word word8_linmul2(word x[8], word y, word carry)
+ {
+ asm(
+ DO_8_TIMES(LINMUL_OP, "x")
+ : [carry]"=r"(carry)
+ : [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+ }
+
+/*************************************************
+* Eight Word Block Linear Multiplication *
+*************************************************/
+inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
+ {
+ asm(
+ DO_8_TIMES(LINMUL_OP, "z")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+ }
+
+/*************************************************
+* Eight Word Block Multiply/Add *
+*************************************************/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+ {
+ asm(
+ DO_8_TIMES(MULADD_OP, "")
+ : [carry]"=r"(carry)
+ : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+ : "cc", "%rax", "%rdx");
+ return carry;
+ }
+
+/*************************************************
+* Multiply-Add Accumulator *
+*************************************************/
+inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
+ {
+ asm(
+ ASM("mulq %[y]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+ }
+
+/*************************************************
+* Multiply-Add Accumulator *
+*************************************************/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
+ {
+ asm(
+ ASM("mulq %[y]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ ASM("addq %[x],%[w0]")
+ ASM("adcq %[y],%[w1]")
+ ASM("adcq $0,%[w2]")
+
+ : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+ : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+ : "cc");
+ }
+
+
+#undef ASM
+#undef DO_8_TIMES
+#undef ADD_OR_SUBTRACT
+#undef ADDSUB2_OP
+#undef ADDSUB3_OP
+#undef LINMUL_OP
+#undef MULADD_OP
+
+}
+
+}
+#endif
diff --git a/src/asm/mp_amd64/mp_mulop.cpp b/src/asm/mp_amd64/mp_mulop.cpp
new file mode 100644
index 000000000..d1aa51489
--- /dev/null
+++ b/src/asm/mp_amd64/mp_mulop.cpp
@@ -0,0 +1,94 @@
+/*************************************************
+* Simple O(N^2) Multiplication and Squaring *
+* (C) 1999-2008 Jack Lloyd *
+*************************************************/
+
+#include <botan/mp_asm.h>
+#include <botan/mp_asmi.h>
+#include <botan/mp_core.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*************************************************
+* Simple O(N^2) Multiplication *
+*************************************************/
+void bigint_simple_mul(word z[], const word x[], u32bit x_size,
+ const word y[], u32bit y_size)
+ {
+ const u32bit blocks = x_size - (x_size % 8);
+
+ clear_mem(z, x_size + y_size);
+
+ for(u32bit i = 0; i != y_size; ++i)
+ {
+ word carry = 0;
+
+ for(u32bit j = 0; j != blocks; j += 8)
+ carry = word8_madd3(z + i + j, x + j, y[i], carry);
+
+ for(u32bit j = blocks; j != x_size; ++j)
+ z[i+j] = word_madd3(x[j], y[i], z[i+j], &carry);
+
+ z[x_size+i] = carry;
+ }
+ }
+
+inline word word_sqr(word x,
+
+/*************************************************
+* Simple O(N^2) Squaring
+
+This is exactly the same algorithm as bigint_simple_mul,
+however because C/C++ compilers suck at alias analysis it
+is good to have the version where the compiler knows
+that x == y
+*************************************************/
+void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
+ {
+ clear_mem(z, 2*x_size);
+
+ for(u32bit i = 0; i != x_size; ++i)
+ {
+ const word x_i = x[i];
+
+ word carry = z[2*i];
+ z[2*i] = word_madd2(x_i, x_i, z[2*i], &carry);
+
+ for(u32bit j = i; j != x_size; ++j)
+ {
+ // z[i+j] = z[i+j] + 2 * x[j] * x_i + carry;
+
+ /*
+ load z[i+j] into register
+ load x[j] into %hi
+ mulq %[x_i] -> x[i] * x[j] -> %lo:%hi
+ shlq %lo, $1
+
+ // put carry bit (cf) from %lo into %temp
+ xorl %temp
+ adcq $0, %temp
+
+ // high bit of lo now in cf
+ shl %hi, $1
+ // add in lowest bid from %lo
+ orl %temp, %hi
+
+ addq %[c], %[lo]
+ adcq $0, %[hi]
+ addq %[z_ij], %[lo]
+ adcq $0, %[hi]
+
+ */
+
+ }
+
+ z[x_size+i] = carry;
+ }
+ }
+
+}
+
+}