aboutsummaryrefslogtreecommitdiffstats
path: root/src/bigint/mulop_amd64/mp_mulop_amd64.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/bigint/mulop_amd64/mp_mulop_amd64.S')
-rw-r--r--src/bigint/mulop_amd64/mp_mulop_amd64.S128
1 files changed, 128 insertions, 0 deletions
diff --git a/src/bigint/mulop_amd64/mp_mulop_amd64.S b/src/bigint/mulop_amd64/mp_mulop_amd64.S
new file mode 100644
index 000000000..e5bba23fb
--- /dev/null
+++ b/src/bigint/mulop_amd64/mp_mulop_amd64.S
@@ -0,0 +1,128 @@
+/*************************************************
+* Simple O(N^2) Multiplication and Squaring *
+* (C) 1999-2008 Jack Lloyd *
+*************************************************/
+
+#include <botan/asm_macr.h>
+
+START_LISTING(mp_mulop.S)
+
+#if 0
+void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
+ {
+ const u32bit blocks = x_size - (x_size % 8);
+
+ clear_mem(z, 2*x_size);
+
+ for(u32bit i = 0; i != x_size; ++i)
+ {
+ word carry = 0;
+
+ /*
+ for(u32bit j = 0; j != blocks; j += 8)
+ carry = word8_madd3(z + i + j, x + j, x[i], carry);
+
+ for(u32bit j = blocks; j != x_size; ++j)
+ z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
+ */
+
+
+ for(u32bit j = 0; j != x_size; ++j)
+ z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
+
+ for(u32bit j = 0; j != x_size; ++j)
+ {
+ dword z = (dword)a * b + c + *d;
+ *d = (word)(z >> BOTAN_MP_WORD_BITS);
+ return (word)z;
+ }
+
+
+
+ z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
+
+ }
+
+
+
+ z[x_size+i] = carry;
+ }
+ }
+
+#endif
+
+START_FUNCTION(bigint_simple_sqr)
+
+#define Z_ARR ARG_1
+#define X_ARR ARG_2
+//#define X_SIZE ARG_3_32
+
+#define CARRY TEMP_1
+#define Z_WORD TEMP_2
+#define LOOP_I TEMP_3
+#define LOOP_J TEMP_4
+#define X_SIZE TEMP_5
+#define MUL_LO %rax
+// arg 3, xsize
+#define MUL_HI %rdx
+
+// need arg3 == rdx for multiply
+ ASSIGN(X_SIZE, ARG3_32)
+
+ ZEROIZE(CARRY)
+
+ ZEROIZE(LOOP_I)
+
+.LOOP_ZEROIZE_Z:
+
+ cmp LOOP_I, X_SIZE
+
+
+
+
+ JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
+ JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1)
+
+#define MULADD_OP(N) \
+ ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \
+ ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \
+ MUL(Y) ; \
+ ADD(Z_WORD, CARRY) ; \
+ ASSIGN(CARRY, MUL_HI) ; \
+ ADD_LAST_CARRY(CARRY) ; \
+ ADD(Z_WORD, MUL_LO) ; \
+ ADD_LAST_CARRY(CARRY) ; \
+ ASSIGN(ARRAY8(Z_ARR, N), Z_WORD)
+
+.LOOP_MULADD8:
+ MULADD_OP(0)
+ MULADD_OP(1)
+ MULADD_OP(2)
+ MULADD_OP(3)
+ MULADD_OP(4)
+ MULADD_OP(5)
+ MULADD_OP(6)
+ MULADD_OP(7)
+
+ SUB_IMM(LOOP_CTR, 8)
+ ADD_IMM(Z_ARR, 64)
+ ADD_IMM(X_ARR, 64)
+ cmp IMM(8), LOOP_CTR
+ jge .LOOP_MULADD8
+
+ JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
+
+ALIGN
+.LOOP_MULADD1:
+ MULADD_OP(0)
+
+ SUB_IMM(LOOP_CTR, 1)
+ ADD_IMM(Z_ARR, 8)
+ ADD_IMM(X_ARR, 8)
+
+ cmp IMM(0), LOOP_CTR
+ jne .LOOP_MULADD1
+
+.L_MULADD_DONE:
+ RETURN_VALUE_IS(CARRY)
+END_FUNCTION(bigint_simple_square)