diff options
Diffstat (limited to 'src/bigint/mulop_amd64/mp_mulop_amd64.S')
-rw-r--r-- | src/bigint/mulop_amd64/mp_mulop_amd64.S | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/src/bigint/mulop_amd64/mp_mulop_amd64.S b/src/bigint/mulop_amd64/mp_mulop_amd64.S new file mode 100644 index 000000000..e5bba23fb --- /dev/null +++ b/src/bigint/mulop_amd64/mp_mulop_amd64.S @@ -0,0 +1,128 @@ +/************************************************* +* Simple O(N^2) Multiplication and Squaring * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(mp_mulop.S) + +#if 0 +void bigint_simple_sqr(word z[], const word x[], u32bit x_size) + { + const u32bit blocks = x_size - (x_size % 8); + + clear_mem(z, 2*x_size); + + for(u32bit i = 0; i != x_size; ++i) + { + word carry = 0; + + /* + for(u32bit j = 0; j != blocks; j += 8) + carry = word8_madd3(z + i + j, x + j, x[i], carry); + + for(u32bit j = blocks; j != x_size; ++j) + z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry); + */ + + + for(u32bit j = 0; j != x_size; ++j) + z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry); + + for(u32bit j = 0; j != x_size; ++j) + { + dword z = (dword)a * b + c + *d; + *d = (word)(z >> BOTAN_MP_WORD_BITS); + return (word)z; + } + + + + z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry); + + } + + + + z[x_size+i] = carry; + } + } + +#endif + +START_FUNCTION(bigint_simple_sqr) + +#define Z_ARR ARG_1 +#define X_ARR ARG_2 +//#define X_SIZE ARG_3_32 + +#define CARRY TEMP_1 +#define Z_WORD TEMP_2 +#define LOOP_I TEMP_3 +#define LOOP_J TEMP_4 +#define X_SIZE TEMP_5 +#define MUL_LO %rax +// arg 3, xsize +#define MUL_HI %rdx + +// need arg3 == rdx for multiply + ASSIGN(X_SIZE, ARG3_32) + + ZEROIZE(CARRY) + + ZEROIZE(LOOP_I) + +.LOOP_ZEROIZE_Z: + + cmp LOOP_I, X_SIZE + + + + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1) + +#define MULADD_OP(N) \ + ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \ + ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \ + MUL(Y) ; \ + ADD(Z_WORD, CARRY) ; \ + ASSIGN(CARRY, MUL_HI) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ADD(Z_WORD, MUL_LO) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ASSIGN(ARRAY8(Z_ARR, N), Z_WORD) + +.LOOP_MULADD8: + MULADD_OP(0) + MULADD_OP(1) + MULADD_OP(2) + MULADD_OP(3) + MULADD_OP(4) + MULADD_OP(5) + MULADD_OP(6) + MULADD_OP(7) + + SUB_IMM(LOOP_CTR, 8) + ADD_IMM(Z_ARR, 64) + ADD_IMM(X_ARR, 64) + cmp IMM(8), LOOP_CTR + jge .LOOP_MULADD8 + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + +ALIGN +.LOOP_MULADD1: + MULADD_OP(0) + + SUB_IMM(LOOP_CTR, 1) + ADD_IMM(Z_ARR, 8) + ADD_IMM(X_ARR, 8) + + cmp IMM(0), LOOP_CTR + jne .LOOP_MULADD1 + +.L_MULADD_DONE: + RETURN_VALUE_IS(CARRY) +END_FUNCTION(bigint_simple_square) |