diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/asm_amd64/asm_macr.h (renamed from modules/alg_amd64/asm_macr.h) | 4 | ||||
-rw-r--r-- | modules/asm_amd64/modinfo.txt (renamed from modules/alg_amd64/modinfo.txt) | 8 | ||||
-rw-r--r-- | modules/asm_amd64/mp_monty.S | 103 | ||||
-rw-r--r-- | modules/asm_amd64/mp_mulop.S (renamed from modules/alg_amd64/mp_mulop.S) | 0 | ||||
-rw-r--r-- | modules/asm_amd64/sha160.cpp (renamed from modules/alg_amd64/sha160.cpp) | 4 | ||||
-rw-r--r-- | modules/asm_amd64/sha1_asm.S (renamed from modules/alg_amd64/sha1core.S) | 6 |
6 files changed, 117 insertions, 8 deletions
diff --git a/modules/alg_amd64/asm_macr.h b/modules/asm_amd64/asm_macr.h index 89e5bd028..087f3f44e 100644 --- a/modules/alg_amd64/asm_macr.h +++ b/modules/asm_amd64/asm_macr.h @@ -49,10 +49,12 @@ func_name: #define R0 %rax #define R1 %rbx #define R2 %rcx +#define R2_32 %ecx #define R3 %rdx #define R4 %rsp #define R5 %rbp #define R6 %rsi +#define R6_32 %esi #define R7 %rdi #define R8 %r8 #define R9 %r9 @@ -66,8 +68,10 @@ func_name: #define ARG_1 R7 #define ARG_2 R6 +#define ARG_2_32 R6_32 #define ARG_3 R3 #define ARG_4 R2 +#define ARG_4_32 R2_32 #define ARG_5 R8 #define ARG_6 R9 diff --git a/modules/alg_amd64/modinfo.txt b/modules/asm_amd64/modinfo.txt index 929d696d0..625c9f162 100644 --- a/modules/alg_amd64/modinfo.txt +++ b/modules/asm_amd64/modinfo.txt @@ -9,13 +9,15 @@ sha160.cpp </replace> <ignore> -#mp_mulop.cpp +mp_mulop.cpp +#mp_monty.cpp </ignore> <add> asm_macr.h -#mp_mulop.S -sha1core.S +mp_mulop.S +#mp_monty.S +sha1_asm.S </add> <arch> diff --git a/modules/asm_amd64/mp_monty.S b/modules/asm_amd64/mp_monty.S new file mode 100644 index 000000000..9c94d2736 --- /dev/null +++ b/modules/asm_amd64/mp_monty.S @@ -0,0 +1,103 @@ +/************************************************* +* Montgomery Reduction Source File * +* (C) 2008 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(mp_monty.S) + +START_FUNCTION(bigint_monty_redc) + +#define Z_ARR ARG_1 // rdi +#define Z_SIZE ARG_2_32 // esi +#define X_ARR ARG_3 // rdx +#define X_SIZE ARG_4_32 // ecx +#define U ARG_5 // r8 + +/* + We need all arguments for a while (we can reuse U eventually) + So only temp registers are + TEMP_1 %r10 + TEMP_2 %r11 + TEMP_3 = ARG_6 = %r9 + void return, so also + R0 %rax (aka TEMP_9) + is free + + Can push: + %rbx (base pointer, callee saved) + %rpb (frame pointer, callee saved) + %r12-%r15 (callee saved) + + Can push base/frame pointers since this is a leaf function + and does not reference any data. +*/ + + push %r12 + push %r13 + push %r14 + push %r15 + +#define LOOP_CTR R0 + +#define CARRY TEMP_1 +#define Z_WORD TEMP_2 +#define MUL_LO %rax +#define MUL_HI %rdx + + /* + ZEROIZE(CARRY) + + ASSIGN(LOOP_CTR, X_SIZE) + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1) + +#define MULADD_OP(N) \ + ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \ + ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \ + MUL(Y) ; \ + ADD(Z_WORD, CARRY) ; \ + ASSIGN(CARRY, MUL_HI) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ADD(Z_WORD, MUL_LO) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ASSIGN(ARRAY8(Z_ARR, N), Z_WORD) + +ALIGN +.LOOP_MULADD8: + MULADD_OP(0) + MULADD_OP(1) + MULADD_OP(2) + MULADD_OP(3) + MULADD_OP(4) + MULADD_OP(5) + MULADD_OP(6) + MULADD_OP(7) + + SUB_IMM(LOOP_CTR, 8) + ADD_IMM(Z_ARR, 64) + ADD_IMM(X_ARR, 64) + cmp IMM(8), LOOP_CTR + jge .LOOP_MULADD8 + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + +ALIGN +.LOOP_MULADD1: + MULADD_OP(0) + + SUB_IMM(LOOP_CTR, 1) + ADD_IMM(Z_ARR, 8) + ADD_IMM(X_ARR, 8) + + cmp IMM(0), LOOP_CTR + jne .LOOP_MULADD1 +*/ + + pop %r15 + pop %r14 + pop %r13 + pop %r12 +END_FUNCTION(bigint_monty_redc) diff --git a/modules/alg_amd64/mp_mulop.S b/modules/asm_amd64/mp_mulop.S index 983a34a82..983a34a82 100644 --- a/modules/alg_amd64/mp_mulop.S +++ b/modules/asm_amd64/mp_mulop.S diff --git a/modules/alg_amd64/sha160.cpp b/modules/asm_amd64/sha160.cpp index 97ec9d522..cfac02f45 100644 --- a/modules/alg_amd64/sha160.cpp +++ b/modules/asm_amd64/sha160.cpp @@ -8,14 +8,14 @@ namespace Botan { -extern "C" void botan_asm_sha160_core(u32bit[5], const byte[64], u32bit[80]); +extern "C" void botan_sha160_asm_amd64(u32bit[5], const byte[64], u32bit[80]); /************************************************* * SHA-160 Compression Function * *************************************************/ void SHA_160::hash(const byte input[]) { - botan_asm_sha160_core(digest, input, W); + botan_sha160_asm_amd64(digest, input, W); } /************************************************* diff --git a/modules/alg_amd64/sha1core.S b/modules/asm_amd64/sha1_asm.S index f145f8517..ecf4a18ce 100644 --- a/modules/alg_amd64/sha1core.S +++ b/modules/asm_amd64/sha1_asm.S @@ -5,9 +5,9 @@ #include <botan/asm_macr.h> -START_LISTING(sha1core.S) +START_LISTING(sha1_asm.S) -START_FUNCTION(botan_asm_sha160_core) +START_FUNCTION(botan_sha160_asm_amd64) #define DIGEST_ARR %rdi #define INPUT %rsi @@ -255,4 +255,4 @@ ALIGN; ADD(ARRAY4(DIGEST_ARR, 3), B) ADD(ARRAY4(DIGEST_ARR, 4), C) -END_FUNCTION(botan_asm_sha160_core) +END_FUNCTION(botan_sha160_asm_amd64) |