aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorlloyd <[email protected]>2007-03-12 02:38:26 +0000
committerlloyd <[email protected]>2007-03-12 02:38:26 +0000
commitbfda1bc3734183d2023f00a8ef4841c4ed31dc8c (patch)
tree9fae39096bbf1929816079bea072f850e35069d5 /modules
parent615e45ccc278ee8109c2badb8f666abc720f0dca (diff)
Check in an initial assembler implementation of SHA-1 for x86-64 systems.
It is not amazingly optimized, about 5% faster than what GCC 4.1.1 does on my Core2 with the normal C++ code, but it's a start.
Diffstat (limited to 'modules')
-rw-r--r--modules/alg_amd64/asm_macr.h21
-rw-r--r--modules/alg_amd64/modinfo.txt5
-rw-r--r--modules/alg_amd64/sha160.cpp52
-rw-r--r--modules/alg_amd64/sha1core.S252
4 files changed, 321 insertions, 9 deletions
diff --git a/modules/alg_amd64/asm_macr.h b/modules/alg_amd64/asm_macr.h
index 0b931daaa..6b9aaba75 100644
--- a/modules/alg_amd64/asm_macr.h
+++ b/modules/alg_amd64/asm_macr.h
@@ -96,6 +96,7 @@ func_name:
* Memory Access Operations *
*************************************************/
#define ARRAY8(REG, NUM) 8*(NUM)(REG)
+#define ARRAY4(REG, NUM) 4*(NUM)(REG)
#define ASSIGN(TO, FROM) mov FROM, TO
@@ -104,19 +105,23 @@ func_name:
*************************************************/
#define IMM(VAL) $VAL
-#define ADD(TO, FROM) addq FROM, TO
-#define ADD_LAST_CARRY(REG) adcq IMM(0), REG
+#define ADD(TO, FROM) add FROM, TO
+#define ADD_LAST_CARRY(REG) adc IMM(0), REG
#define ADD_IMM(TO, NUM) ADD(TO, IMM(NUM))
-#define ADD_W_CARRY(TO1, TO2, FROM) addq FROM, TO1; adcq IMM(0), TO2;
+#define ADD_W_CARRY(TO1, TO2, FROM) add FROM, TO1; adc IMM(0), TO2;
#define SUB_IMM(TO, NUM) sub IMM(NUM), TO
-#define MUL(REG) mulq REG
+#define MUL(REG) mul REG
-#define XOR(TO, FROM) xorq FROM, TO
-#define AND(TO, FROM) andq FROM, TO
-#define OR(TO, FROM) orq FROM, TO
-#define NOT(REG) notq REG
+#define XOR(TO, FROM) xor FROM, TO
+#define AND(TO, FROM) and FROM, TO
+#define OR(TO, FROM) or FROM, TO
+#define NOT(REG) not REG
#define ZEROIZE(REG) XOR(REG, REG)
#define RETURN_VALUE_IS(V) ASSIGN(%rax, V)
+#define ROTL_IMM(REG, NUM) rol IMM(NUM), REG
+#define ROTR_IMM(REG, NUM) ror IMM(NUM), REG
+#define ADD3_IMM(TO, FROM, NUM) lea NUM(TO,FROM,1), TO
+
#endif
diff --git a/modules/alg_amd64/modinfo.txt b/modules/alg_amd64/modinfo.txt
index 04ee56d19..f9023b273 100644
--- a/modules/alg_amd64/modinfo.txt
+++ b/modules/alg_amd64/modinfo.txt
@@ -4,9 +4,12 @@ mp_bits 64
load_on: asm_ok
+<replace>
+sha160.cpp
+</replace>
+
<ignore>
mp_mulop.cpp
-sha160.cpp
</ignore>
<add>
diff --git a/modules/alg_amd64/sha160.cpp b/modules/alg_amd64/sha160.cpp
new file mode 100644
index 000000000..754f8a01c
--- /dev/null
+++ b/modules/alg_amd64/sha160.cpp
@@ -0,0 +1,52 @@
+/*************************************************
+* SHA-160 Source File *
+* (C) 1999-2006 The Botan Project *
+*************************************************/
+
+#include <botan/sha160.h>
+#include <botan/bit_ops.h>
+
+namespace Botan {
+
+extern "C" void sha160_core(u32bit[5], const byte[64], u32bit[80]);
+
+/*************************************************
+* SHA-160 Compression Function *
+*************************************************/
+void SHA_160::hash(const byte input[])
+ {
+ sha160_core(digest, input, W);
+ }
+
+/*************************************************
+* Copy out the digest *
+*************************************************/
+void SHA_160::copy_out(byte output[])
+ {
+ for(u32bit j = 0; j != OUTPUT_LENGTH; ++j)
+ output[j] = get_byte(j % 4, digest[j/4]);
+ }
+
+/*************************************************
+* Clear memory of sensitive data *
+*************************************************/
+void SHA_160::clear() throw()
+ {
+ MDx_HashFunction::clear();
+ W.clear();
+ digest[0] = 0x67452301;
+ digest[1] = 0xEFCDAB89;
+ digest[2] = 0x98BADCFE;
+ digest[3] = 0x10325476;
+ digest[4] = 0xC3D2E1F0;
+ }
+
+/*************************************************
+* SHA_160 Constructor *
+*************************************************/
+SHA_160::SHA_160() : MDx_HashFunction(20, 64, true, true), W(80)
+ {
+ clear();
+ }
+
+}
diff --git a/modules/alg_amd64/sha1core.S b/modules/alg_amd64/sha1core.S
new file mode 100644
index 000000000..7097ac8ec
--- /dev/null
+++ b/modules/alg_amd64/sha1core.S
@@ -0,0 +1,252 @@
+/*************************************************
+* SHA-160 Source File *
+* (C) 1999-2006 The Botan Project *
+*************************************************/
+
+#include <botan/asm_macr.h>
+
+START_LISTING(sha1core.S)
+
+START_FUNCTION(sha160_core)
+
+#define DIGEST_ARR %rdi
+#define INPUT %rsi
+#define W %rdx
+#define LOOP_CTR %eax
+
+#define A %r8d
+#define B %r9d
+#define C %r10d
+#define D %r11d
+#define E %ecx
+
+ ZEROIZE(LOOP_CTR)
+
+START_LOOP(.LOAD_INPUT)
+ addl $8, %eax
+
+ movq ARRAY8(INPUT, 0), %r8
+ movq ARRAY8(INPUT, 1), %r9
+ movq ARRAY8(INPUT, 2), %r10
+ movq ARRAY8(INPUT, 3), %r11
+
+ bswap %r8
+ bswap %r9
+ bswap %r10
+ bswap %r11
+
+ rolq $32, %r8
+ rolq $32, %r9
+ rolq $32, %r10
+ rolq $32, %r11
+
+ movq %r8, ARRAY8(W, 0)
+ movq %r9, ARRAY8(W, 1)
+ movq %r10, ARRAY8(W, 2)
+ movq %r11, ARRAY8(W, 3)
+
+ addq $32, W
+ addq $32, INPUT
+LOOP_UNTIL_EQ(LOOP_CTR, 16, .LOAD_INPUT)
+
+/*
+ #define A %r8d
+#define B %r9d
+#define C %r10d
+#define D %r11d
+#define E %ecx
+*/
+START_LOOP(.EXPANSION)
+ addl $4, LOOP_CTR
+
+ ZEROIZE(A)
+ ASSIGN(B, ARRAY4(W, -1))
+ ASSIGN(C, ARRAY4(W, -2))
+ ASSIGN(D, ARRAY4(W, -3))
+
+ XOR(A, ARRAY4(W, -5))
+ XOR(B, ARRAY4(W, -6))
+ XOR(C, ARRAY4(W, -7))
+ XOR(D, ARRAY4(W, -8))
+
+ XOR(A, ARRAY4(W, -11))
+ XOR(B, ARRAY4(W, -12))
+ XOR(C, ARRAY4(W, -13))
+ XOR(D, ARRAY4(W, -14))
+
+ XOR(A, ARRAY4(W, -13))
+ XOR(B, ARRAY4(W, -14))
+ XOR(C, ARRAY4(W, -15))
+ XOR(D, ARRAY4(W, -16))
+
+ ROTL_IMM(D, 1)
+ ROTL_IMM(C, 1)
+ ROTL_IMM(B, 1)
+ XOR(A, D)
+ ROTL_IMM(A, 1)
+
+ ASSIGN(ARRAY4(W, 0), D)
+ ASSIGN(ARRAY4(W, 1), C)
+ ASSIGN(ARRAY4(W, 2), B)
+ ASSIGN(ARRAY4(W, 3), A)
+
+ addq $16, W
+LOOP_UNTIL_EQ(LOOP_CTR, 80, .EXPANSION)
+
+ subq $320, W
+
+#define MAGIC1 0x5A827999
+#define MAGIC2 0x6ED9EBA1
+#define MAGIC3 0x8F1BBCDC
+#define MAGIC4 0xCA62C1D6
+
+#define T %esi
+#define T2 %eax
+
+#define F1(A, B, C, D, E, F, N) \
+ ASSIGN(T2, ARRAY4(W, N)) ; \
+ ASSIGN(A, F) ; \
+ ROTL_IMM(F, 5) ; \
+ ADD(F, E) ; \
+ ASSIGN(E, C) ; \
+ XOR(E, D) ; \
+ ADD3_IMM(F, T2, MAGIC1) ; \
+ AND(E, B) ; \
+ XOR(E, D) ; \
+ ROTR_IMM(B, 2) ; \
+ ADD(E, F) ;
+
+#define F2_4(A, B, C, D, E, F, N, MAGIC) \
+ ASSIGN(T2, ARRAY4(W, N)) ; \
+ ASSIGN(A, F) ; \
+ ROTL_IMM(F, 5) ; \
+ ADD(F, E) ; \
+ ASSIGN(E, B) ; \
+ XOR(E, C) ; \
+ ADD3_IMM(F, T2, MAGIC) ; \
+ XOR(E, D) ; \
+ ROTR_IMM(B, 2) ; \
+ ADD(E, F) ;
+
+#define F3(A, B, C, D, E, F, N) \
+ ASSIGN(T2, ARRAY4(W, N)) ; \
+ ASSIGN(A, F) ; \
+ ROTL_IMM(F, 5) ; \
+ ADD(F, E) ; \
+ ASSIGN(E, B) ; \
+ OR(E, C) ; \
+ AND(E, D) ; \
+ ADD3_IMM(F, T2, MAGIC3) ; \
+ ASSIGN(T2, B) ; \
+ AND(T2, C) ; \
+ OR(E, T2) ; \
+ ROTR_IMM(B, 2) ; \
+ ADD(E, F) ;
+
+#define F2(A, B, C, D, E, F, W) \
+ F2_4(A, B, C, D, E, F, W, MAGIC2)
+
+#define F4(A, B, C, D, E, F, W) \
+ F2_4(A, B, C, D, E, F, W, MAGIC4)
+
+ ASSIGN(T, ARRAY4(DIGEST_ARR, 0))
+ ASSIGN(B, ARRAY4(DIGEST_ARR, 1))
+ ASSIGN(C, ARRAY4(DIGEST_ARR, 2))
+ ASSIGN(D, ARRAY4(DIGEST_ARR, 3))
+ ASSIGN(E, ARRAY4(DIGEST_ARR, 4))
+
+ /* First Round */
+ F1(A, B, C, D, E, T, 0)
+ F1(T, A, B, C, D, E, 1)
+ F1(E, T, A, B, C, D, 2)
+ F1(D, E, T, A, B, C, 3)
+ F1(C, D, E, T, A, B, 4)
+ F1(B, C, D, E, T, A, 5)
+ F1(A, B, C, D, E, T, 6)
+ F1(T, A, B, C, D, E, 7)
+ F1(E, T, A, B, C, D, 8)
+ F1(D, E, T, A, B, C, 9)
+ F1(C, D, E, T, A, B, 10)
+ F1(B, C, D, E, T, A, 11)
+ F1(A, B, C, D, E, T, 12)
+ F1(T, A, B, C, D, E, 13)
+ F1(E, T, A, B, C, D, 14)
+ F1(D, E, T, A, B, C, 15)
+ F1(C, D, E, T, A, B, 16)
+ F1(B, C, D, E, T, A, 17)
+ F1(A, B, C, D, E, T, 18)
+ F1(T, A, B, C, D, E, 19)
+
+ /* Second Round */
+ F2(E, T, A, B, C, D, 20)
+ F2(D, E, T, A, B, C, 21)
+ F2(C, D, E, T, A, B, 22)
+ F2(B, C, D, E, T, A, 23)
+ F2(A, B, C, D, E, T, 24)
+ F2(T, A, B, C, D, E, 25)
+ F2(E, T, A, B, C, D, 26)
+ F2(D, E, T, A, B, C, 27)
+ F2(C, D, E, T, A, B, 28)
+ F2(B, C, D, E, T, A, 29)
+ F2(A, B, C, D, E, T, 30)
+ F2(T, A, B, C, D, E, 31)
+ F2(E, T, A, B, C, D, 32)
+ F2(D, E, T, A, B, C, 33)
+ F2(C, D, E, T, A, B, 34)
+ F2(B, C, D, E, T, A, 35)
+ F2(A, B, C, D, E, T, 36)
+ F2(T, A, B, C, D, E, 37)
+ F2(E, T, A, B, C, D, 38)
+ F2(D, E, T, A, B, C, 39)
+
+ /* Third Round */
+ F3(C, D, E, T, A, B, 40)
+ F3(B, C, D, E, T, A, 41)
+ F3(A, B, C, D, E, T, 42)
+ F3(T, A, B, C, D, E, 43)
+ F3(E, T, A, B, C, D, 44)
+ F3(D, E, T, A, B, C, 45)
+ F3(C, D, E, T, A, B, 46)
+ F3(B, C, D, E, T, A, 47)
+ F3(A, B, C, D, E, T, 48)
+ F3(T, A, B, C, D, E, 49)
+ F3(E, T, A, B, C, D, 50)
+ F3(D, E, T, A, B, C, 51)
+ F3(C, D, E, T, A, B, 52)
+ F3(B, C, D, E, T, A, 53)
+ F3(A, B, C, D, E, T, 54)
+ F3(T, A, B, C, D, E, 55)
+ F3(E, T, A, B, C, D, 56)
+ F3(D, E, T, A, B, C, 57)
+ F3(C, D, E, T, A, B, 58)
+ F3(B, C, D, E, T, A, 59)
+
+ /* Fourth Round */
+ F4(A, B, C, D, E, T, 60)
+ F4(T, A, B, C, D, E, 61)
+ F4(E, T, A, B, C, D, 62)
+ F4(D, E, T, A, B, C, 63)
+ F4(C, D, E, T, A, B, 64)
+ F4(B, C, D, E, T, A, 65)
+ F4(A, B, C, D, E, T, 66)
+ F4(T, A, B, C, D, E, 67)
+ F4(E, T, A, B, C, D, 68)
+ F4(D, E, T, A, B, C, 69)
+ F4(C, D, E, T, A, B, 70)
+ F4(B, C, D, E, T, A, 71)
+ F4(A, B, C, D, E, T, 72)
+ F4(T, A, B, C, D, E, 73)
+ F4(E, T, A, B, C, D, 74)
+ F4(D, E, T, A, B, C, 75)
+ F4(C, D, E, T, A, B, 76)
+ F4(B, C, D, E, T, A, 77)
+ F4(A, B, C, D, E, T, 78)
+ F4(T, A, B, C, D, E, 79)
+
+ ADD(ARRAY4(DIGEST_ARR, 0), D)
+ ADD(ARRAY4(DIGEST_ARR, 1), T)
+ ADD(ARRAY4(DIGEST_ARR, 2), A)
+ ADD(ARRAY4(DIGEST_ARR, 3), B)
+ ADD(ARRAY4(DIGEST_ARR, 4), C)
+
+END_FUNCTION(sha160_core)