diff options
author | lloyd <[email protected]> | 2007-03-12 02:38:26 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2007-03-12 02:38:26 +0000 |
commit | bfda1bc3734183d2023f00a8ef4841c4ed31dc8c (patch) | |
tree | 9fae39096bbf1929816079bea072f850e35069d5 /modules | |
parent | 615e45ccc278ee8109c2badb8f666abc720f0dca (diff) |
Check in an initial assembler implementation of SHA-1 for x86-64 systems.
It is not amazingly optimized, about 5% faster than what GCC 4.1.1 does
on my Core2 with the normal C++ code, but it's a start.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/alg_amd64/asm_macr.h | 21 | ||||
-rw-r--r-- | modules/alg_amd64/modinfo.txt | 5 | ||||
-rw-r--r-- | modules/alg_amd64/sha160.cpp | 52 | ||||
-rw-r--r-- | modules/alg_amd64/sha1core.S | 252 |
4 files changed, 321 insertions, 9 deletions
diff --git a/modules/alg_amd64/asm_macr.h b/modules/alg_amd64/asm_macr.h index 0b931daaa..6b9aaba75 100644 --- a/modules/alg_amd64/asm_macr.h +++ b/modules/alg_amd64/asm_macr.h @@ -96,6 +96,7 @@ func_name: * Memory Access Operations * *************************************************/ #define ARRAY8(REG, NUM) 8*(NUM)(REG) +#define ARRAY4(REG, NUM) 4*(NUM)(REG) #define ASSIGN(TO, FROM) mov FROM, TO @@ -104,19 +105,23 @@ func_name: *************************************************/ #define IMM(VAL) $VAL -#define ADD(TO, FROM) addq FROM, TO -#define ADD_LAST_CARRY(REG) adcq IMM(0), REG +#define ADD(TO, FROM) add FROM, TO +#define ADD_LAST_CARRY(REG) adc IMM(0), REG #define ADD_IMM(TO, NUM) ADD(TO, IMM(NUM)) -#define ADD_W_CARRY(TO1, TO2, FROM) addq FROM, TO1; adcq IMM(0), TO2; +#define ADD_W_CARRY(TO1, TO2, FROM) add FROM, TO1; adc IMM(0), TO2; #define SUB_IMM(TO, NUM) sub IMM(NUM), TO -#define MUL(REG) mulq REG +#define MUL(REG) mul REG -#define XOR(TO, FROM) xorq FROM, TO -#define AND(TO, FROM) andq FROM, TO -#define OR(TO, FROM) orq FROM, TO -#define NOT(REG) notq REG +#define XOR(TO, FROM) xor FROM, TO +#define AND(TO, FROM) and FROM, TO +#define OR(TO, FROM) or FROM, TO +#define NOT(REG) not REG #define ZEROIZE(REG) XOR(REG, REG) #define RETURN_VALUE_IS(V) ASSIGN(%rax, V) +#define ROTL_IMM(REG, NUM) rol IMM(NUM), REG +#define ROTR_IMM(REG, NUM) ror IMM(NUM), REG +#define ADD3_IMM(TO, FROM, NUM) lea NUM(TO,FROM,1), TO + #endif diff --git a/modules/alg_amd64/modinfo.txt b/modules/alg_amd64/modinfo.txt index 04ee56d19..f9023b273 100644 --- a/modules/alg_amd64/modinfo.txt +++ b/modules/alg_amd64/modinfo.txt @@ -4,9 +4,12 @@ mp_bits 64 load_on: asm_ok +<replace> +sha160.cpp +</replace> + <ignore> mp_mulop.cpp -sha160.cpp </ignore> <add> diff --git a/modules/alg_amd64/sha160.cpp b/modules/alg_amd64/sha160.cpp new file mode 100644 index 000000000..754f8a01c --- /dev/null +++ b/modules/alg_amd64/sha160.cpp @@ -0,0 +1,52 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2006 The Botan Project * +*************************************************/ + +#include <botan/sha160.h> +#include <botan/bit_ops.h> + +namespace Botan { + +extern "C" void sha160_core(u32bit[5], const byte[64], u32bit[80]); + +/************************************************* +* SHA-160 Compression Function * +*************************************************/ +void SHA_160::hash(const byte input[]) + { + sha160_core(digest, input, W); + } + +/************************************************* +* Copy out the digest * +*************************************************/ +void SHA_160::copy_out(byte output[]) + { + for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) + output[j] = get_byte(j % 4, digest[j/4]); + } + +/************************************************* +* Clear memory of sensitive data * +*************************************************/ +void SHA_160::clear() throw() + { + MDx_HashFunction::clear(); + W.clear(); + digest[0] = 0x67452301; + digest[1] = 0xEFCDAB89; + digest[2] = 0x98BADCFE; + digest[3] = 0x10325476; + digest[4] = 0xC3D2E1F0; + } + +/************************************************* +* SHA_160 Constructor * +*************************************************/ +SHA_160::SHA_160() : MDx_HashFunction(20, 64, true, true), W(80) + { + clear(); + } + +} diff --git a/modules/alg_amd64/sha1core.S b/modules/alg_amd64/sha1core.S new file mode 100644 index 000000000..7097ac8ec --- /dev/null +++ b/modules/alg_amd64/sha1core.S @@ -0,0 +1,252 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2006 The Botan Project * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(sha1core.S) + +START_FUNCTION(sha160_core) + +#define DIGEST_ARR %rdi +#define INPUT %rsi +#define W %rdx +#define LOOP_CTR %eax + +#define A %r8d +#define B %r9d +#define C %r10d +#define D %r11d +#define E %ecx + + ZEROIZE(LOOP_CTR) + +START_LOOP(.LOAD_INPUT) + addl $8, %eax + + movq ARRAY8(INPUT, 0), %r8 + movq ARRAY8(INPUT, 1), %r9 + movq ARRAY8(INPUT, 2), %r10 + movq ARRAY8(INPUT, 3), %r11 + + bswap %r8 + bswap %r9 + bswap %r10 + bswap %r11 + + rolq $32, %r8 + rolq $32, %r9 + rolq $32, %r10 + rolq $32, %r11 + + movq %r8, ARRAY8(W, 0) + movq %r9, ARRAY8(W, 1) + movq %r10, ARRAY8(W, 2) + movq %r11, ARRAY8(W, 3) + + addq $32, W + addq $32, INPUT +LOOP_UNTIL_EQ(LOOP_CTR, 16, .LOAD_INPUT) + +/* + #define A %r8d +#define B %r9d +#define C %r10d +#define D %r11d +#define E %ecx +*/ +START_LOOP(.EXPANSION) + addl $4, LOOP_CTR + + ZEROIZE(A) + ASSIGN(B, ARRAY4(W, -1)) + ASSIGN(C, ARRAY4(W, -2)) + ASSIGN(D, ARRAY4(W, -3)) + + XOR(A, ARRAY4(W, -5)) + XOR(B, ARRAY4(W, -6)) + XOR(C, ARRAY4(W, -7)) + XOR(D, ARRAY4(W, -8)) + + XOR(A, ARRAY4(W, -11)) + XOR(B, ARRAY4(W, -12)) + XOR(C, ARRAY4(W, -13)) + XOR(D, ARRAY4(W, -14)) + + XOR(A, ARRAY4(W, -13)) + XOR(B, ARRAY4(W, -14)) + XOR(C, ARRAY4(W, -15)) + XOR(D, ARRAY4(W, -16)) + + ROTL_IMM(D, 1) + ROTL_IMM(C, 1) + ROTL_IMM(B, 1) + XOR(A, D) + ROTL_IMM(A, 1) + + ASSIGN(ARRAY4(W, 0), D) + ASSIGN(ARRAY4(W, 1), C) + ASSIGN(ARRAY4(W, 2), B) + ASSIGN(ARRAY4(W, 3), A) + + addq $16, W +LOOP_UNTIL_EQ(LOOP_CTR, 80, .EXPANSION) + + subq $320, W + +#define MAGIC1 0x5A827999 +#define MAGIC2 0x6ED9EBA1 +#define MAGIC3 0x8F1BBCDC +#define MAGIC4 0xCA62C1D6 + +#define T %esi +#define T2 %eax + +#define F1(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, C) ; \ + XOR(E, D) ; \ + ADD3_IMM(F, T2, MAGIC1) ; \ + AND(E, B) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2_4(A, B, C, D, E, F, N, MAGIC) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + XOR(E, C) ; \ + ADD3_IMM(F, T2, MAGIC) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F3(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(W, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + OR(E, C) ; \ + AND(E, D) ; \ + ADD3_IMM(F, T2, MAGIC3) ; \ + ASSIGN(T2, B) ; \ + AND(T2, C) ; \ + OR(E, T2) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2(A, B, C, D, E, F, W) \ + F2_4(A, B, C, D, E, F, W, MAGIC2) + +#define F4(A, B, C, D, E, F, W) \ + F2_4(A, B, C, D, E, F, W, MAGIC4) + + ASSIGN(T, ARRAY4(DIGEST_ARR, 0)) + ASSIGN(B, ARRAY4(DIGEST_ARR, 1)) + ASSIGN(C, ARRAY4(DIGEST_ARR, 2)) + ASSIGN(D, ARRAY4(DIGEST_ARR, 3)) + ASSIGN(E, ARRAY4(DIGEST_ARR, 4)) + + /* First Round */ + F1(A, B, C, D, E, T, 0) + F1(T, A, B, C, D, E, 1) + F1(E, T, A, B, C, D, 2) + F1(D, E, T, A, B, C, 3) + F1(C, D, E, T, A, B, 4) + F1(B, C, D, E, T, A, 5) + F1(A, B, C, D, E, T, 6) + F1(T, A, B, C, D, E, 7) + F1(E, T, A, B, C, D, 8) + F1(D, E, T, A, B, C, 9) + F1(C, D, E, T, A, B, 10) + F1(B, C, D, E, T, A, 11) + F1(A, B, C, D, E, T, 12) + F1(T, A, B, C, D, E, 13) + F1(E, T, A, B, C, D, 14) + F1(D, E, T, A, B, C, 15) + F1(C, D, E, T, A, B, 16) + F1(B, C, D, E, T, A, 17) + F1(A, B, C, D, E, T, 18) + F1(T, A, B, C, D, E, 19) + + /* Second Round */ + F2(E, T, A, B, C, D, 20) + F2(D, E, T, A, B, C, 21) + F2(C, D, E, T, A, B, 22) + F2(B, C, D, E, T, A, 23) + F2(A, B, C, D, E, T, 24) + F2(T, A, B, C, D, E, 25) + F2(E, T, A, B, C, D, 26) + F2(D, E, T, A, B, C, 27) + F2(C, D, E, T, A, B, 28) + F2(B, C, D, E, T, A, 29) + F2(A, B, C, D, E, T, 30) + F2(T, A, B, C, D, E, 31) + F2(E, T, A, B, C, D, 32) + F2(D, E, T, A, B, C, 33) + F2(C, D, E, T, A, B, 34) + F2(B, C, D, E, T, A, 35) + F2(A, B, C, D, E, T, 36) + F2(T, A, B, C, D, E, 37) + F2(E, T, A, B, C, D, 38) + F2(D, E, T, A, B, C, 39) + + /* Third Round */ + F3(C, D, E, T, A, B, 40) + F3(B, C, D, E, T, A, 41) + F3(A, B, C, D, E, T, 42) + F3(T, A, B, C, D, E, 43) + F3(E, T, A, B, C, D, 44) + F3(D, E, T, A, B, C, 45) + F3(C, D, E, T, A, B, 46) + F3(B, C, D, E, T, A, 47) + F3(A, B, C, D, E, T, 48) + F3(T, A, B, C, D, E, 49) + F3(E, T, A, B, C, D, 50) + F3(D, E, T, A, B, C, 51) + F3(C, D, E, T, A, B, 52) + F3(B, C, D, E, T, A, 53) + F3(A, B, C, D, E, T, 54) + F3(T, A, B, C, D, E, 55) + F3(E, T, A, B, C, D, 56) + F3(D, E, T, A, B, C, 57) + F3(C, D, E, T, A, B, 58) + F3(B, C, D, E, T, A, 59) + + /* Fourth Round */ + F4(A, B, C, D, E, T, 60) + F4(T, A, B, C, D, E, 61) + F4(E, T, A, B, C, D, 62) + F4(D, E, T, A, B, C, 63) + F4(C, D, E, T, A, B, 64) + F4(B, C, D, E, T, A, 65) + F4(A, B, C, D, E, T, 66) + F4(T, A, B, C, D, E, 67) + F4(E, T, A, B, C, D, 68) + F4(D, E, T, A, B, C, 69) + F4(C, D, E, T, A, B, 70) + F4(B, C, D, E, T, A, 71) + F4(A, B, C, D, E, T, 72) + F4(T, A, B, C, D, E, 73) + F4(E, T, A, B, C, D, 74) + F4(D, E, T, A, B, C, 75) + F4(C, D, E, T, A, B, 76) + F4(B, C, D, E, T, A, 77) + F4(A, B, C, D, E, T, 78) + F4(T, A, B, C, D, E, 79) + + ADD(ARRAY4(DIGEST_ARR, 0), D) + ADD(ARRAY4(DIGEST_ARR, 1), T) + ADD(ARRAY4(DIGEST_ARR, 2), A) + ADD(ARRAY4(DIGEST_ARR, 3), B) + ADD(ARRAY4(DIGEST_ARR, 4), C) + +END_FUNCTION(sha160_core) |