diff options
author | lloyd <[email protected]> | 2008-09-07 18:42:32 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2008-09-07 18:42:32 +0000 |
commit | dceda75e7e5bc22e5c67c4e3d782edfe5ede55c0 (patch) | |
tree | 10f763a89978e9b191897b3a5397dc2e505ce6fc /modules/asm_amd64 | |
parent | 67f1970cf168c4d6b0c773555039a6308694ef9f (diff) |
Rename alg_amd64 to asm_amd64
Namespace exposed SHA-1 function with botan_ prefix.
Add start of mp_monty for x86-64 assembly
Diffstat (limited to 'modules/asm_amd64')
-rw-r--r-- | modules/asm_amd64/asm_macr.h | 120 | ||||
-rw-r--r-- | modules/asm_amd64/modinfo.txt | 39 | ||||
-rw-r--r-- | modules/asm_amd64/mp_monty.S | 103 | ||||
-rw-r--r-- | modules/asm_amd64/mp_mulop.S | 73 | ||||
-rw-r--r-- | modules/asm_amd64/sha160.cpp | 52 | ||||
-rw-r--r-- | modules/asm_amd64/sha1_asm.S | 258 |
6 files changed, 645 insertions, 0 deletions
diff --git a/modules/asm_amd64/asm_macr.h b/modules/asm_amd64/asm_macr.h new file mode 100644 index 000000000..087f3f44e --- /dev/null +++ b/modules/asm_amd64/asm_macr.h @@ -0,0 +1,120 @@ +/************************************************* +* Assembly Macros Header File * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#ifndef BOTAN_EXT_AMD64_ASM_MACROS_H__ +#define BOTAN_EXT_AMD64_ASM_MACROS_H__ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif + +/************************************************* +* General/Global Macros * +*************************************************/ +#define ALIGN .p2align 4,,15 + +#define START_LISTING(FILENAME) \ + .file #FILENAME; \ + .text; \ + ALIGN; + +/************************************************* +* Function Definitions * +*************************************************/ +#define START_FUNCTION(func_name) \ + ALIGN; \ + .global func_name; \ + .type func_name,@function; \ +func_name: + +#define END_FUNCTION(func_name) \ + ret + +/************************************************* +* Conditional Jumps * +*************************************************/ +#define JUMP_IF_ZERO(REG, LABEL) \ + cmp IMM(0), REG; \ + jz LABEL + +#define JUMP_IF_LT(REG, NUM, LABEL) \ + cmp IMM(NUM), REG; \ + jl LABEL + +/************************************************* +* Register Names * +*************************************************/ +#define R0 %rax +#define R1 %rbx +#define R2 %rcx +#define R2_32 %ecx +#define R3 %rdx +#define R4 %rsp +#define R5 %rbp +#define R6 %rsi +#define R6_32 %esi +#define R7 %rdi +#define R8 %r8 +#define R9 %r9 +#define R10 %r10 +#define R11 %r11 +#define R12 %r12 +#define R13 %r13 +#define R14 %r14 +#define R15 %r15 +#define R16 %r16 + +#define ARG_1 R7 +#define ARG_2 R6 +#define ARG_2_32 R6_32 +#define ARG_3 R3 +#define ARG_4 R2 +#define ARG_4_32 R2_32 +#define ARG_5 R8 +#define ARG_6 R9 + +#define TEMP_1 R10 +#define TEMP_2 R11 +#define TEMP_3 ARG_6 +#define TEMP_4 ARG_5 +#define TEMP_5 ARG_4 +#define TEMP_6 ARG_3 +#define TEMP_7 ARG_2 +#define TEMP_8 ARG_1 +#define TEMP_9 R0 + +/************************************************* +* Memory Access Operations * +*************************************************/ +#define ARRAY8(REG, NUM) 8*(NUM)(REG) +#define ARRAY4(REG, NUM) 4*(NUM)(REG) + +#define ASSIGN(TO, FROM) mov FROM, TO + +/************************************************* +* ALU Operations * +*************************************************/ +#define IMM(VAL) $VAL + +#define ADD(TO, FROM) add FROM, TO +#define ADD_LAST_CARRY(REG) adc IMM(0), REG +#define ADD_IMM(TO, NUM) ADD(TO, IMM(NUM)) +#define ADD_W_CARRY(TO1, TO2, FROM) add FROM, TO1; adc IMM(0), TO2; +#define SUB_IMM(TO, NUM) sub IMM(NUM), TO +#define MUL(REG) mul REG + +#define XOR(TO, FROM) xor FROM, TO +#define AND(TO, FROM) and FROM, TO +#define OR(TO, FROM) or FROM, TO +#define NOT(REG) not REG +#define ZEROIZE(REG) XOR(REG, REG) + +#define RETURN_VALUE_IS(V) ASSIGN(%rax, V) + +#define ROTL_IMM(REG, NUM) rol IMM(NUM), REG +#define ROTR_IMM(REG, NUM) ror IMM(NUM), REG +#define ADD3_IMM(TO, FROM, NUM) lea NUM(TO,FROM,1), TO + +#endif diff --git a/modules/asm_amd64/modinfo.txt b/modules/asm_amd64/modinfo.txt new file mode 100644 index 000000000..625c9f162 --- /dev/null +++ b/modules/asm_amd64/modinfo.txt @@ -0,0 +1,39 @@ +realname "Algorithm x86-64 Assembler" + +mp_bits 64 + +load_on asm_ok + +<replace> +sha160.cpp +</replace> + +<ignore> +mp_mulop.cpp +#mp_monty.cpp +</ignore> + +<add> +asm_macr.h +mp_mulop.S +#mp_monty.S +sha1_asm.S +</add> + +<arch> +amd64 +</arch> + +<cc> +gcc +icc +</cc> + +# ELF systems +<os> +linux +freebsd +netbsd +openbsd +solaris +</os> diff --git a/modules/asm_amd64/mp_monty.S b/modules/asm_amd64/mp_monty.S new file mode 100644 index 000000000..9c94d2736 --- /dev/null +++ b/modules/asm_amd64/mp_monty.S @@ -0,0 +1,103 @@ +/************************************************* +* Montgomery Reduction Source File * +* (C) 2008 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(mp_monty.S) + +START_FUNCTION(bigint_monty_redc) + +#define Z_ARR ARG_1 // rdi +#define Z_SIZE ARG_2_32 // esi +#define X_ARR ARG_3 // rdx +#define X_SIZE ARG_4_32 // ecx +#define U ARG_5 // r8 + +/* + We need all arguments for a while (we can reuse U eventually) + So only temp registers are + TEMP_1 %r10 + TEMP_2 %r11 + TEMP_3 = ARG_6 = %r9 + void return, so also + R0 %rax (aka TEMP_9) + is free + + Can push: + %rbx (base pointer, callee saved) + %rpb (frame pointer, callee saved) + %r12-%r15 (callee saved) + + Can push base/frame pointers since this is a leaf function + and does not reference any data. +*/ + + push %r12 + push %r13 + push %r14 + push %r15 + +#define LOOP_CTR R0 + +#define CARRY TEMP_1 +#define Z_WORD TEMP_2 +#define MUL_LO %rax +#define MUL_HI %rdx + + /* + ZEROIZE(CARRY) + + ASSIGN(LOOP_CTR, X_SIZE) + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1) + +#define MULADD_OP(N) \ + ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \ + ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \ + MUL(Y) ; \ + ADD(Z_WORD, CARRY) ; \ + ASSIGN(CARRY, MUL_HI) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ADD(Z_WORD, MUL_LO) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ASSIGN(ARRAY8(Z_ARR, N), Z_WORD) + +ALIGN +.LOOP_MULADD8: + MULADD_OP(0) + MULADD_OP(1) + MULADD_OP(2) + MULADD_OP(3) + MULADD_OP(4) + MULADD_OP(5) + MULADD_OP(6) + MULADD_OP(7) + + SUB_IMM(LOOP_CTR, 8) + ADD_IMM(Z_ARR, 64) + ADD_IMM(X_ARR, 64) + cmp IMM(8), LOOP_CTR + jge .LOOP_MULADD8 + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + +ALIGN +.LOOP_MULADD1: + MULADD_OP(0) + + SUB_IMM(LOOP_CTR, 1) + ADD_IMM(Z_ARR, 8) + ADD_IMM(X_ARR, 8) + + cmp IMM(0), LOOP_CTR + jne .LOOP_MULADD1 +*/ + + pop %r15 + pop %r14 + pop %r13 + pop %r12 +END_FUNCTION(bigint_monty_redc) diff --git a/modules/asm_amd64/mp_mulop.S b/modules/asm_amd64/mp_mulop.S new file mode 100644 index 000000000..983a34a82 --- /dev/null +++ b/modules/asm_amd64/mp_mulop.S @@ -0,0 +1,73 @@ +/************************************************* +* Multiply/Add Algorithm Source File * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(mp_muladd.S) + +START_FUNCTION(bigint_mul_add_words) + +#define Z_ARR ARG_1 +#define X_ARR ARG_2 +#define X_SIZE %edx +#define Y ARG_4 + +#define CARRY TEMP_1 +#define Z_WORD TEMP_2 +#define LOOP_CTR %r9d +#define MUL_LO %rax +#define MUL_HI %rdx + + ZEROIZE(CARRY) + + ASSIGN(LOOP_CTR, X_SIZE) + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1) + +#define MULADD_OP(N) \ + ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \ + ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \ + MUL(Y) ; \ + ADD(Z_WORD, CARRY) ; \ + ASSIGN(CARRY, MUL_HI) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ADD(Z_WORD, MUL_LO) ; \ + ADD_LAST_CARRY(CARRY) ; \ + ASSIGN(ARRAY8(Z_ARR, N), Z_WORD) + +ALIGN +.LOOP_MULADD8: + MULADD_OP(0) + MULADD_OP(1) + MULADD_OP(2) + MULADD_OP(3) + MULADD_OP(4) + MULADD_OP(5) + MULADD_OP(6) + MULADD_OP(7) + + SUB_IMM(LOOP_CTR, 8) + ADD_IMM(Z_ARR, 64) + ADD_IMM(X_ARR, 64) + cmp IMM(8), LOOP_CTR + jge .LOOP_MULADD8 + + JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE) + +ALIGN +.LOOP_MULADD1: + MULADD_OP(0) + + SUB_IMM(LOOP_CTR, 1) + ADD_IMM(Z_ARR, 8) + ADD_IMM(X_ARR, 8) + + cmp IMM(0), LOOP_CTR + jne .LOOP_MULADD1 + +.L_MULADD_DONE: + RETURN_VALUE_IS(CARRY) +END_FUNCTION(bigint_mul_add_words) diff --git a/modules/asm_amd64/sha160.cpp b/modules/asm_amd64/sha160.cpp new file mode 100644 index 000000000..cfac02f45 --- /dev/null +++ b/modules/asm_amd64/sha160.cpp @@ -0,0 +1,52 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/sha160.h> +#include <botan/loadstor.h> + +namespace Botan { + +extern "C" void botan_sha160_asm_amd64(u32bit[5], const byte[64], u32bit[80]); + +/************************************************* +* SHA-160 Compression Function * +*************************************************/ +void SHA_160::hash(const byte input[]) + { + botan_sha160_asm_amd64(digest, input, W); + } + +/************************************************* +* Copy out the digest * +*************************************************/ +void SHA_160::copy_out(byte output[]) + { + for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) + output[j] = get_byte(j % 4, digest[j/4]); + } + +/************************************************* +* Clear memory of sensitive data * +*************************************************/ +void SHA_160::clear() throw() + { + MDx_HashFunction::clear(); + W.clear(); + digest[0] = 0x67452301; + digest[1] = 0xEFCDAB89; + digest[2] = 0x98BADCFE; + digest[3] = 0x10325476; + digest[4] = 0xC3D2E1F0; + } + +/************************************************* +* SHA_160 Constructor * +*************************************************/ +SHA_160::SHA_160() : MDx_HashFunction(20, 64, true, true), W(80) + { + clear(); + } + +} diff --git a/modules/asm_amd64/sha1_asm.S b/modules/asm_amd64/sha1_asm.S new file mode 100644 index 000000000..ecf4a18ce --- /dev/null +++ b/modules/asm_amd64/sha1_asm.S @@ -0,0 +1,258 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(sha1_asm.S) + +START_FUNCTION(botan_sha160_asm_amd64) + +#define DIGEST_ARR %rdi +#define INPUT %rsi +#define W %rdx +#define LOOP_CTR %eax + +#define A %r8d +#define B %r9d +#define C %r10d +#define D %r11d +#define E %ecx + + ZEROIZE(LOOP_CTR) + +ALIGN; +.LOOP_LOAD_INPUT: + addl $8, %eax + + movq ARRAY8(INPUT, 0), %r8 + movq ARRAY8(INPUT, 1), %r9 + movq ARRAY8(INPUT, 2), %r10 + movq ARRAY8(INPUT, 3), %r11 + + bswap %r8 + bswap %r9 + bswap %r10 + bswap %r11 + + rolq $32, %r8 + rolq $32, %r9 + rolq $32, %r10 + rolq $32, %r11 + + movq %r8, ARRAY8(W, 0) + movq %r9, ARRAY8(W, 1) + movq %r10, ARRAY8(W, 2) + movq %r11, ARRAY8(W, 3) + + addq $32, W + addq $32, INPUT + + cmp IMM(16), LOOP_CTR + jne .LOOP_LOAD_INPUT + +/* +#define A %r8d +#define B %r9d +#define C %r10d +#define D %r11d +#define E %ecx +*/ + +ALIGN; +.LOOP_EXPANSION: + addl $4, LOOP_CTR + + ZEROIZE(A) + ASSIGN(B, ARRAY4(W, -1)) + ASSIGN(C, ARRAY4(W, -2)) + ASSIGN(D, ARRAY4(W, -3)) + + XOR(A, ARRAY4(W, -5)) + XOR(B, ARRAY4(W, -6)) + XOR(C, ARRAY4(W, -7)) + XOR(D, ARRAY4(W, -8)) + + XOR(A, ARRAY4(W, -11)) + XOR(B, ARRAY4(W, -12)) + XOR(C, ARRAY4(W, -13)) + XOR(D, ARRAY4(W, -14)) + + XOR(A, ARRAY4(W, -13)) + XOR(B, ARRAY4(W, -14)) + XOR(C, ARRAY4(W, -15)) + XOR(D, ARRAY4(W, -16)) + + ROTL_IMM(D, 1) + ROTL_IMM(C, 1) + ROTL_IMM(B, 1) + XOR(A, D) + ROTL_IMM(A, 1) + + ASSIGN(ARRAY4(W, 0), D) + ASSIGN(ARRAY4(W, 1), C) + ASSIGN(ARRAY4(W, 2), B) + ASSIGN(ARRAY4(W, 3), A) + + addq $16, W + cmp IMM(80), LOOP_CTR + jne .LOOP_EXPANSION + + subq $320, W + +#define MAGIC1 0x5A827999 +#define MAGIC2 0x6ED9EBA1 +#define MAGIC3 0x8F1BBCDC +#define MAGIC4 0xCA62C1D6 + +#define T %esi +#define T2 %eax + +#define F1(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, C) ; \ + XOR(E, D) ; \ + ADD3_IMM(F, T2, MAGIC1) ; \ + AND(E, B) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2_4(A, B, C, D, E, F, N, MAGIC) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + XOR(E, C) ; \ + ADD3_IMM(F, T2, MAGIC) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F3(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + OR(E, C) ; \ + AND(E, D) ; \ + ADD3_IMM(F, T2, MAGIC3) ; \ + ASSIGN(T2, B) ; \ + AND(T2, C) ; \ + OR(E, T2) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2(A, B, C, D, E, F, W) \ + F2_4(A, B, C, D, E, F, W, MAGIC2) + +#define F4(A, B, C, D, E, F, W) \ + F2_4(A, B, C, D, E, F, W, MAGIC4) + + ASSIGN(T, ARRAY4(DIGEST_ARR, 0)) + ASSIGN(B, ARRAY4(DIGEST_ARR, 1)) + ASSIGN(C, ARRAY4(DIGEST_ARR, 2)) + ASSIGN(D, ARRAY4(DIGEST_ARR, 3)) + ASSIGN(E, ARRAY4(DIGEST_ARR, 4)) + + /* First Round */ + F1(A, B, C, D, E, T, 0) + F1(T, A, B, C, D, E, 1) + F1(E, T, A, B, C, D, 2) + F1(D, E, T, A, B, C, 3) + F1(C, D, E, T, A, B, 4) + F1(B, C, D, E, T, A, 5) + F1(A, B, C, D, E, T, 6) + F1(T, A, B, C, D, E, 7) + F1(E, T, A, B, C, D, 8) + F1(D, E, T, A, B, C, 9) + F1(C, D, E, T, A, B, 10) + F1(B, C, D, E, T, A, 11) + F1(A, B, C, D, E, T, 12) + F1(T, A, B, C, D, E, 13) + F1(E, T, A, B, C, D, 14) + F1(D, E, T, A, B, C, 15) + F1(C, D, E, T, A, B, 16) + F1(B, C, D, E, T, A, 17) + F1(A, B, C, D, E, T, 18) + F1(T, A, B, C, D, E, 19) + + /* Second Round */ + F2(E, T, A, B, C, D, 20) + F2(D, E, T, A, B, C, 21) + F2(C, D, E, T, A, B, 22) + F2(B, C, D, E, T, A, 23) + F2(A, B, C, D, E, T, 24) + F2(T, A, B, C, D, E, 25) + F2(E, T, A, B, C, D, 26) + F2(D, E, T, A, B, C, 27) + F2(C, D, E, T, A, B, 28) + F2(B, C, D, E, T, A, 29) + F2(A, B, C, D, E, T, 30) + F2(T, A, B, C, D, E, 31) + F2(E, T, A, B, C, D, 32) + F2(D, E, T, A, B, C, 33) + F2(C, D, E, T, A, B, 34) + F2(B, C, D, E, T, A, 35) + F2(A, B, C, D, E, T, 36) + F2(T, A, B, C, D, E, 37) + F2(E, T, A, B, C, D, 38) + F2(D, E, T, A, B, C, 39) + + /* Third Round */ + F3(C, D, E, T, A, B, 40) + F3(B, C, D, E, T, A, 41) + F3(A, B, C, D, E, T, 42) + F3(T, A, B, C, D, E, 43) + F3(E, T, A, B, C, D, 44) + F3(D, E, T, A, B, C, 45) + F3(C, D, E, T, A, B, 46) + F3(B, C, D, E, T, A, 47) + F3(A, B, C, D, E, T, 48) + F3(T, A, B, C, D, E, 49) + F3(E, T, A, B, C, D, 50) + F3(D, E, T, A, B, C, 51) + F3(C, D, E, T, A, B, 52) + F3(B, C, D, E, T, A, 53) + F3(A, B, C, D, E, T, 54) + F3(T, A, B, C, D, E, 55) + F3(E, T, A, B, C, D, 56) + F3(D, E, T, A, B, C, 57) + F3(C, D, E, T, A, B, 58) + F3(B, C, D, E, T, A, 59) + + /* Fourth Round */ + F4(A, B, C, D, E, T, 60) + F4(T, A, B, C, D, E, 61) + F4(E, T, A, B, C, D, 62) + F4(D, E, T, A, B, C, 63) + F4(C, D, E, T, A, B, 64) + F4(B, C, D, E, T, A, 65) + F4(A, B, C, D, E, T, 66) + F4(T, A, B, C, D, E, 67) + F4(E, T, A, B, C, D, 68) + F4(D, E, T, A, B, C, 69) + F4(C, D, E, T, A, B, 70) + F4(B, C, D, E, T, A, 71) + F4(A, B, C, D, E, T, 72) + F4(T, A, B, C, D, E, 73) + F4(E, T, A, B, C, D, 74) + F4(D, E, T, A, B, C, 75) + F4(C, D, E, T, A, B, 76) + F4(B, C, D, E, T, A, 77) + F4(A, B, C, D, E, T, 78) + F4(T, A, B, C, D, E, 79) + + ADD(ARRAY4(DIGEST_ARR, 0), D) + ADD(ARRAY4(DIGEST_ARR, 1), T) + ADD(ARRAY4(DIGEST_ARR, 2), A) + ADD(ARRAY4(DIGEST_ARR, 3), B) + ADD(ARRAY4(DIGEST_ARR, 4), C) + +END_FUNCTION(botan_sha160_asm_amd64) |