diff options
Diffstat (limited to 'modules/asm_ia32')
-rw-r--r-- | modules/asm_ia32/asm_macr.h | 131 | ||||
-rw-r--r-- | modules/asm_ia32/md4.cpp | 43 | ||||
-rw-r--r-- | modules/asm_ia32/md4core.S | 135 | ||||
-rw-r--r-- | modules/asm_ia32/md5.cpp | 43 | ||||
-rw-r--r-- | modules/asm_ia32/md5core.S | 164 | ||||
-rw-r--r-- | modules/asm_ia32/modinfo.txt | 43 | ||||
-rw-r--r-- | modules/asm_ia32/mp_mulop.S | 62 | ||||
-rw-r--r-- | modules/asm_ia32/serp_asm.S | 667 | ||||
-rw-r--r-- | modules/asm_ia32/serpent.cpp | 49 | ||||
-rw-r--r-- | modules/asm_ia32/sha160.cpp | 52 | ||||
-rw-r--r-- | modules/asm_ia32/sha1_asm.S | 242 |
11 files changed, 1631 insertions, 0 deletions
diff --git a/modules/asm_ia32/asm_macr.h b/modules/asm_ia32/asm_macr.h new file mode 100644 index 000000000..392b05d5b --- /dev/null +++ b/modules/asm_ia32/asm_macr.h @@ -0,0 +1,131 @@ +/************************************************* +* Assembly Macros Header File * +* (C) 1999-2008 Jack Lloyd * +*************************************************/ + +#ifndef BOTAN_EXT_IA32_ASM_MACROS_H__ +#define BOTAN_EXT_IA32_ASM_MACROS_H__ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif + +/************************************************* +* General/Global Macros * +*************************************************/ +#define ALIGN .p2align 4,,15 + +#define START_LISTING(FILENAME) \ + .file #FILENAME; \ + .text; \ + .p2align 4,,15; + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif + +/************************************************* +* Function Definitions * +*************************************************/ +#define START_FUNCTION(func_name) \ + .align 8; \ + ALIGN; \ + .global func_name; \ + .type func_name,@function; \ +func_name: + +#define END_FUNCTION(func_name) \ + ret + +/************************************************* +* Loop Control * +*************************************************/ +#define START_LOOP(LABEL) \ + ALIGN; \ + LABEL##_LOOP: + +#define LOOP_UNTIL_EQ(REG, NUM, LABEL) \ + cmpl IMM(NUM), REG; \ + jne LABEL##_LOOP + +#define LOOP_UNTIL_LT(REG, NUM, LABEL) \ + cmpl IMM(NUM), REG; \ + jge LABEL##_LOOP + +/************************************************* + Conditional Jumps * +*************************************************/ +#define JUMP_IF_ZERO(REG, LABEL) \ + cmpl IMM(0), REG; \ + jz LABEL + +#define JUMP_IF_LT(REG, NUM, LABEL) \ + cmpl IMM(NUM), REG; \ + jl LABEL + +/************************************************* +* Register Names * +*************************************************/ +#define EAX %eax +#define EBX %ebx +#define ECX %ecx +#define EDX %edx +#define EBP %ebp +#define EDI %edi +#define ESI %esi +#define ESP %esp + +/************************************************* +* Memory Access Operations * +*************************************************/ +#define ARRAY1(REG, NUM) (NUM)(REG) +#define ARRAY4(REG, NUM) 4*(NUM)(REG) +#define ARRAY4_INDIRECT(BASE, OFFSET, NUM) 4*(NUM)(BASE,OFFSET,4) +#define ARG(NUM) 4*(PUSHED) + ARRAY4(ESP, NUM) + +#define ASSIGN(TO, FROM) movl FROM, TO +#define ASSIGN_BYTE(TO, FROM) movzbl FROM, TO + +#define PUSH(REG) pushl REG +#define POP(REG) popl REG + +#define SPILL_REGS() \ + PUSH(EBP) ; \ + PUSH(EDI) ; \ + PUSH(ESI) ; \ + PUSH(EBX) + +#define RESTORE_REGS() \ + POP(EBX) ; \ + POP(ESI) ; \ + POP(EDI) ; \ + POP(EBP) + +/************************************************* +* ALU Operations * +*************************************************/ +#define IMM(VAL) $VAL + +#define ADD(TO, FROM) addl FROM, TO +#define ADD_IMM(TO, NUM) ADD(TO, IMM(NUM)) +#define ADD_W_CARRY(TO1, TO2, FROM) addl FROM, TO1; adcl IMM(0), TO2; +#define SUB_IMM(TO, NUM) subl IMM(NUM), TO +#define ADD2_IMM(TO, FROM, NUM) leal NUM(FROM), TO +#define ADD3_IMM(TO, FROM, NUM) leal NUM(TO,FROM,1), TO +#define MUL(REG) mull REG + +#define SHL_IMM(REG, SHIFT) shll IMM(SHIFT), REG +#define SHR_IMM(REG, SHIFT) shrl IMM(SHIFT), REG +#define SHL2_3(TO, FROM) leal 0(,FROM,8), TO + +#define XOR(TO, FROM) xorl FROM, TO +#define AND(TO, FROM) andl FROM, TO +#define OR(TO, FROM) orl FROM, TO +#define NOT(REG) notl REG +#define ZEROIZE(REG) XOR(REG, REG) + +#define ROTL_IMM(REG, NUM) roll IMM(NUM), REG +#define ROTR_IMM(REG, NUM) rorl IMM(NUM), REG +#define BSWAP(REG) bswapl REG + +#endif diff --git a/modules/asm_ia32/md4.cpp b/modules/asm_ia32/md4.cpp new file mode 100644 index 000000000..e3dc79012 --- /dev/null +++ b/modules/asm_ia32/md4.cpp @@ -0,0 +1,43 @@ +/************************************************* +* MD4 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/md4.h> +#include <botan/loadstor.h> + +namespace Botan { + +extern "C" void md4_core(u32bit[4], const byte[64], u32bit[16]); + +/************************************************* +* MD4 Compression Function * +*************************************************/ +void MD4::hash(const byte input[]) + { + md4_core(digest, input, M); + } + +/************************************************* +* Copy out the digest * +*************************************************/ +void MD4::copy_out(byte output[]) + { + for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) + output[j] = get_byte(3 - (j % 4), digest[j/4]); + } + +/************************************************* +* Clear memory of sensitive data * +*************************************************/ +void MD4::clear() throw() + { + MDx_HashFunction::clear(); + M.clear(); + digest[0] = 0x67452301; + digest[1] = 0xEFCDAB89; + digest[2] = 0x98BADCFE; + digest[3] = 0x10325476; + } + +} diff --git a/modules/asm_ia32/md4core.S b/modules/asm_ia32/md4core.S new file mode 100644 index 000000000..662e9924a --- /dev/null +++ b/modules/asm_ia32/md4core.S @@ -0,0 +1,135 @@ +/************************************************* +* MD4 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(md4core.S) + +START_FUNCTION(md4_core) + SPILL_REGS() + +#define PUSHED 4 + + ASSIGN(EBP, ARG(2)) /* input block */ + ASSIGN(EDI, ARG(3)) /* expanded words */ + + ZEROIZE(ESI) + +START_LOOP(.LOAD_INPUT) + ADD_IMM(ESI, 4) + + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + + ADD_IMM(EBP, 16) + + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-4), EAX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-3), EBX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-2), ECX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-1), EDX) +LOOP_UNTIL_EQ(ESI, 16, .LOAD_INPUT) + + ASSIGN(EBP, ARG(1)) + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + +#define MSG EDI +#define T1 ESI +#define T2 EBP + +#define FF(A, B, C, D, N, S) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, C) ; \ + XOR(T2, D) ; \ + AND(T2, B) ; \ + XOR(T2, D) ; \ + ADD(A, T1) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; + +#define GG(A, B, C, D, N, S) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, B) ; \ + OR(T2, C) ; \ + AND(T2, D) ; \ + ADD3_IMM(A, T1, 0x5A827999) ; \ + ASSIGN(T1, B) ; \ + AND(T1, C) ; \ + OR(T2, T1) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; + +#define HH(A, B, C, D, N, S) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, B) ; \ + XOR(T2, C) ; \ + XOR(T2, D) ; \ + ADD3_IMM(A, T1, 0x6ED9EBA1) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; + + FF(EAX,EBX,ECX,EDX, 0, 3); + FF(EDX,EAX,EBX,ECX, 1, 7); + FF(ECX,EDX,EAX,EBX, 2,11); + FF(EBX,ECX,EDX,EAX, 3,19); + FF(EAX,EBX,ECX,EDX, 4, 3); + FF(EDX,EAX,EBX,ECX, 5, 7); + FF(ECX,EDX,EAX,EBX, 6,11); + FF(EBX,ECX,EDX,EAX, 7,19); + FF(EAX,EBX,ECX,EDX, 8, 3); + FF(EDX,EAX,EBX,ECX, 9, 7); + FF(ECX,EDX,EAX,EBX,10,11); + FF(EBX,ECX,EDX,EAX,11,19); + FF(EAX,EBX,ECX,EDX,12, 3); + FF(EDX,EAX,EBX,ECX,13, 7); + FF(ECX,EDX,EAX,EBX,14,11); + FF(EBX,ECX,EDX,EAX,15,19); + + GG(EAX,EBX,ECX,EDX, 0, 3); + GG(EDX,EAX,EBX,ECX, 4, 5); + GG(ECX,EDX,EAX,EBX, 8, 9); + GG(EBX,ECX,EDX,EAX,12,13); + GG(EAX,EBX,ECX,EDX, 1, 3); + GG(EDX,EAX,EBX,ECX, 5, 5); + GG(ECX,EDX,EAX,EBX, 9, 9); + GG(EBX,ECX,EDX,EAX,13,13); + GG(EAX,EBX,ECX,EDX, 2, 3); + GG(EDX,EAX,EBX,ECX, 6, 5); + GG(ECX,EDX,EAX,EBX,10, 9); + GG(EBX,ECX,EDX,EAX,14,13); + GG(EAX,EBX,ECX,EDX, 3, 3); + GG(EDX,EAX,EBX,ECX, 7, 5); + GG(ECX,EDX,EAX,EBX,11, 9); + GG(EBX,ECX,EDX,EAX,15,13); + + HH(EAX,EBX,ECX,EDX, 0, 3); + HH(EDX,EAX,EBX,ECX, 8, 9); + HH(ECX,EDX,EAX,EBX, 4,11); + HH(EBX,ECX,EDX,EAX,12,15); + HH(EAX,EBX,ECX,EDX, 2, 3); + HH(EDX,EAX,EBX,ECX,10, 9); + HH(ECX,EDX,EAX,EBX, 6,11); + HH(EBX,ECX,EDX,EAX,14,15); + HH(EAX,EBX,ECX,EDX, 1, 3); + HH(EDX,EAX,EBX,ECX, 9, 9); + HH(ECX,EDX,EAX,EBX, 5,11); + HH(EBX,ECX,EDX,EAX,13,15); + HH(EAX,EBX,ECX,EDX, 3, 3); + HH(EDX,EAX,EBX,ECX,11, 9); + HH(ECX,EDX,EAX,EBX, 7,11); + HH(EBX,ECX,EDX,EAX,15,15); + + ASSIGN(EBP, ARG(1)) + ADD(ARRAY4(EBP, 0), EAX) + ADD(ARRAY4(EBP, 1), EBX) + ADD(ARRAY4(EBP, 2), ECX) + ADD(ARRAY4(EBP, 3), EDX) + + RESTORE_REGS() +END_FUNCTION(md4_core) diff --git a/modules/asm_ia32/md5.cpp b/modules/asm_ia32/md5.cpp new file mode 100644 index 000000000..cfe48e7e9 --- /dev/null +++ b/modules/asm_ia32/md5.cpp @@ -0,0 +1,43 @@ +/************************************************* +* MD5 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/md5.h> +#include <botan/loadstor.h> + +namespace Botan { + +extern "C" void md5_core(u32bit[4], const byte[64], u32bit[16]); + +/************************************************* +* MD5 Compression Function * +*************************************************/ +void MD5::hash(const byte input[]) + { + md5_core(digest, input, M); + } + +/************************************************* +* Copy out the digest * +*************************************************/ +void MD5::copy_out(byte output[]) + { + for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) + output[j] = get_byte(3 - (j % 4), digest[j/4]); + } + +/************************************************* +* Clear memory of sensitive data * +*************************************************/ +void MD5::clear() throw() + { + MDx_HashFunction::clear(); + M.clear(); + digest[0] = 0x67452301; + digest[1] = 0xEFCDAB89; + digest[2] = 0x98BADCFE; + digest[3] = 0x10325476; + } + +} diff --git a/modules/asm_ia32/md5core.S b/modules/asm_ia32/md5core.S new file mode 100644 index 000000000..8ebe469f3 --- /dev/null +++ b/modules/asm_ia32/md5core.S @@ -0,0 +1,164 @@ +/************************************************* +* MD5 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(md5core.S) + +START_FUNCTION(md5_core) + SPILL_REGS() + +#define PUSHED 4 + + ASSIGN(EBP, ARG(2)) /* input block */ + ASSIGN(EDI, ARG(3)) /* expanded words */ + + ZEROIZE(ESI) + +START_LOOP(.LOAD_INPUT) + ADD_IMM(ESI, 4) + + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + + ADD_IMM(EBP, 16) + + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-4), EAX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-3), EBX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-2), ECX) + ASSIGN(ARRAY4_INDIRECT(EDI,ESI,-1), EDX) +LOOP_UNTIL_EQ(ESI, 16, .LOAD_INPUT) + + ASSIGN(EBP, ARG(1)) + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + +#define MSG EDI +#define T1 ESI +#define T2 EBP + +#define FF(A, B, C, D, N, S, MAGIC) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, C) ; \ + XOR(T2, D) ; \ + AND(T2, B) ; \ + XOR(T2, D) ; \ + ADD3_IMM(A, T1, MAGIC) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; \ + ADD(A, B) ; + +#define GG(A, B, C, D, N, S, MAGIC) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, B) ; \ + XOR(T2, C) ; \ + AND(T2, D) ; \ + XOR(T2, C) ; \ + ADD3_IMM(A, T1, MAGIC) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; \ + ADD(A, B) ; + +#define HH(A, B, C, D, N, S, MAGIC) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, B) ; \ + XOR(T2, C) ; \ + XOR(T2, D) ; \ + ADD3_IMM(A, T1, MAGIC) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; \ + ADD(A, B) ; + +#define II(A, B, C, D, N, S, MAGIC) \ + ASSIGN(T1, ARRAY4(MSG, N)) ; \ + ASSIGN(T2, D) ; \ + NOT(T2) ; \ + OR(T2, B) ; \ + XOR(T2, C) ; \ + ADD3_IMM(A, T1, MAGIC) ; \ + ADD(A, T2) ; \ + ROTL_IMM(A, S) ; \ + ADD(A, B) ; + + FF(EAX,EBX,ECX,EDX, 0, 7,0xD76AA478); + FF(EDX,EAX,EBX,ECX, 1,12,0xE8C7B756); + FF(ECX,EDX,EAX,EBX, 2,17,0x242070DB); + FF(EBX,ECX,EDX,EAX, 3,22,0xC1BDCEEE); + FF(EAX,EBX,ECX,EDX, 4, 7,0xF57C0FAF); + FF(EDX,EAX,EBX,ECX, 5,12,0x4787C62A); + FF(ECX,EDX,EAX,EBX, 6,17,0xA8304613); + FF(EBX,ECX,EDX,EAX, 7,22,0xFD469501); + FF(EAX,EBX,ECX,EDX, 8, 7,0x698098D8); + FF(EDX,EAX,EBX,ECX, 9,12,0x8B44F7AF); + FF(ECX,EDX,EAX,EBX,10,17,0xFFFF5BB1); + FF(EBX,ECX,EDX,EAX,11,22,0x895CD7BE); + FF(EAX,EBX,ECX,EDX,12, 7,0x6B901122); + FF(EDX,EAX,EBX,ECX,13,12,0xFD987193); + FF(ECX,EDX,EAX,EBX,14,17,0xA679438E); + FF(EBX,ECX,EDX,EAX,15,22,0x49B40821); + + GG(EAX,EBX,ECX,EDX, 1, 5,0xF61E2562); + GG(EDX,EAX,EBX,ECX, 6, 9,0xC040B340); + GG(ECX,EDX,EAX,EBX,11,14,0x265E5A51); + GG(EBX,ECX,EDX,EAX, 0,20,0xE9B6C7AA); + GG(EAX,EBX,ECX,EDX, 5, 5,0xD62F105D); + GG(EDX,EAX,EBX,ECX,10, 9,0x02441453); + GG(ECX,EDX,EAX,EBX,15,14,0xD8A1E681); + GG(EBX,ECX,EDX,EAX, 4,20,0xE7D3FBC8); + GG(EAX,EBX,ECX,EDX, 9, 5,0x21E1CDE6); + GG(EDX,EAX,EBX,ECX,14, 9,0xC33707D6); + GG(ECX,EDX,EAX,EBX, 3,14,0xF4D50D87); + GG(EBX,ECX,EDX,EAX, 8,20,0x455A14ED); + GG(EAX,EBX,ECX,EDX,13, 5,0xA9E3E905); + GG(EDX,EAX,EBX,ECX, 2, 9,0xFCEFA3F8); + GG(ECX,EDX,EAX,EBX, 7,14,0x676F02D9); + GG(EBX,ECX,EDX,EAX,12,20,0x8D2A4C8A); + + HH(EAX,EBX,ECX,EDX, 5, 4,0xFFFA3942); + HH(EDX,EAX,EBX,ECX, 8,11,0x8771F681); + HH(ECX,EDX,EAX,EBX,11,16,0x6D9D6122); + HH(EBX,ECX,EDX,EAX,14,23,0xFDE5380C); + HH(EAX,EBX,ECX,EDX, 1, 4,0xA4BEEA44); + HH(EDX,EAX,EBX,ECX, 4,11,0x4BDECFA9); + HH(ECX,EDX,EAX,EBX, 7,16,0xF6BB4B60); + HH(EBX,ECX,EDX,EAX,10,23,0xBEBFBC70); + HH(EAX,EBX,ECX,EDX,13, 4,0x289B7EC6); + HH(EDX,EAX,EBX,ECX, 0,11,0xEAA127FA); + HH(ECX,EDX,EAX,EBX, 3,16,0xD4EF3085); + HH(EBX,ECX,EDX,EAX, 6,23,0x04881D05); + HH(EAX,EBX,ECX,EDX, 9, 4,0xD9D4D039); + HH(EDX,EAX,EBX,ECX,12,11,0xE6DB99E5); + HH(ECX,EDX,EAX,EBX,15,16,0x1FA27CF8); + HH(EBX,ECX,EDX,EAX, 2,23,0xC4AC5665); + + II(EAX,EBX,ECX,EDX, 0, 6,0xF4292244); + II(EDX,EAX,EBX,ECX, 7,10,0x432AFF97); + II(ECX,EDX,EAX,EBX,14,15,0xAB9423A7); + II(EBX,ECX,EDX,EAX, 5,21,0xFC93A039); + II(EAX,EBX,ECX,EDX,12, 6,0x655B59C3); + II(EDX,EAX,EBX,ECX, 3,10,0x8F0CCC92); + II(ECX,EDX,EAX,EBX,10,15,0xFFEFF47D); + II(EBX,ECX,EDX,EAX, 1,21,0x85845DD1); + II(EAX,EBX,ECX,EDX, 8, 6,0x6FA87E4F); + II(EDX,EAX,EBX,ECX,15,10,0xFE2CE6E0); + II(ECX,EDX,EAX,EBX, 6,15,0xA3014314); + II(EBX,ECX,EDX,EAX,13,21,0x4E0811A1); + II(EAX,EBX,ECX,EDX, 4, 6,0xF7537E82); + II(EDX,EAX,EBX,ECX,11,10,0xBD3AF235); + II(ECX,EDX,EAX,EBX, 2,15,0x2AD7D2BB); + II(EBX,ECX,EDX,EAX, 9,21,0xEB86D391); + + ASSIGN(EBP, ARG(1)) + ADD(ARRAY4(EBP, 0), EAX) + ADD(ARRAY4(EBP, 1), EBX) + ADD(ARRAY4(EBP, 2), ECX) + ADD(ARRAY4(EBP, 3), EDX) + + RESTORE_REGS() +END_FUNCTION(md5_core) diff --git a/modules/asm_ia32/modinfo.txt b/modules/asm_ia32/modinfo.txt new file mode 100644 index 000000000..cca46635b --- /dev/null +++ b/modules/asm_ia32/modinfo.txt @@ -0,0 +1,43 @@ +realname "Algorithm x86 Assembler" + +mp_bits 32 + +load_on asm_ok + +<replace> +md4.cpp +md5.cpp +sha160.cpp +serpent.cpp +</replace> + +<ignore> +mp_mulop.cpp +</ignore> + +<add> +asm_macr.h +md4core.S +md5core.S +sha1_asm.S +serp_asm.S +mp_mulop.S +</add> + +<arch> +ia32 +</arch> + +<cc> +gcc +icc +</cc> + +# ELF systems +<os> +linux +freebsd +netbsd +openbsd +solaris +</os> diff --git a/modules/asm_ia32/mp_mulop.S b/modules/asm_ia32/mp_mulop.S new file mode 100644 index 000000000..a5f0d3b27 --- /dev/null +++ b/modules/asm_ia32/mp_mulop.S @@ -0,0 +1,62 @@ +/************************************************* +* Multiply/Add Algorithm Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(mp_muladd.S) + +START_FUNCTION(bigint_mul_add_words) + SPILL_REGS() +#define PUSHED 4 + +#define LOOP_CTR ESI + ASSIGN(LOOP_CTR, ARG(3)) /* x_size */ + ZEROIZE(EDI) + + ASSIGN(ECX, ARG(1)) /* z[] */ + ASSIGN(EBX, ARG(2)) /* x[] */ + ASSIGN(EBP, ARG(4)) /* y */ + +#define MULADD_OP(N) \ + ASSIGN(EAX, ARRAY4(EBX, N)) ; \ + MUL(EBP) ; \ + ADD_W_CARRY(EAX, EDX, EDI) ; \ + ASSIGN(EDI, EDX) ; \ + ADD_W_CARRY(ARRAY4(ECX, N), EDI, EAX) ; + + JUMP_IF_ZERO(LOOP_CTR, .MUL_ADD_DONE) + JUMP_IF_LT(LOOP_CTR, 8, .MULADD1_LOOP) + +START_LOOP(.MULADD8) + MULADD_OP(0) + MULADD_OP(1) + MULADD_OP(2) + MULADD_OP(3) + MULADD_OP(4) + MULADD_OP(5) + MULADD_OP(6) + MULADD_OP(7) + + SUB_IMM(LOOP_CTR, 8) + ADD_IMM(EBX, 32) + ADD_IMM(ECX, 32) +LOOP_UNTIL_LT(LOOP_CTR, 8, .MULADD8) + + JUMP_IF_ZERO(LOOP_CTR, .MUL_ADD_DONE) + +START_LOOP(.MULADD1) + MULADD_OP(0) + + SUB_IMM(LOOP_CTR, 1) + ADD_IMM(EBX, 4) + ADD_IMM(ECX, 4) +LOOP_UNTIL_EQ(LOOP_CTR, 0, .MULADD1) + +.MUL_ADD_DONE: + + ASSIGN(EAX, EDI) +#undef PUSHED + RESTORE_REGS() +END_FUNCTION(bigint_mul_add_words) diff --git a/modules/asm_ia32/serp_asm.S b/modules/asm_ia32/serp_asm.S new file mode 100644 index 000000000..c8915382d --- /dev/null +++ b/modules/asm_ia32/serp_asm.S @@ -0,0 +1,667 @@ +/************************************************* +* Serpent Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(serp_asm.S) + +#define SBOX_E1(A, B, C, D, T) \ + XOR(D, A) ; \ + ASSIGN(T, B) ; \ + AND(B, D) ; \ + XOR(T, C) ; \ + XOR(B, A) ; \ + OR(A, D) ; \ + XOR(A, T) ; \ + XOR(T, D) ; \ + XOR(D, C) ; \ + OR(C, B) ; \ + XOR(C, T) ; \ + NOT(T) ; \ + OR(T, B) ; \ + XOR(B, D) ; \ + XOR(B, T) ; \ + OR(D, A) ; \ + XOR(B, D) ; \ + XOR(T, D) ; \ + ASSIGN(D, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, T) ; + +#define SBOX_E2(A, B, C, D, T) \ + NOT(A) ; \ + NOT(C) ; \ + ASSIGN(T, A) ; \ + AND(A, B) ; \ + XOR(C, A) ; \ + OR(A, D) ; \ + XOR(D, C) ; \ + XOR(B, A) ; \ + XOR(A, T) ; \ + OR(T, B) ; \ + XOR(B, D) ; \ + OR(C, A) ; \ + AND(C, T) ; \ + XOR(A, B) ; \ + AND(B, C) ; \ + XOR(B, A) ; \ + AND(A, C) ; \ + XOR(T, A) ; \ + ASSIGN(A, C) ; \ + ASSIGN(C, D) ; \ + ASSIGN(D, B) ; \ + ASSIGN(B, T) ; + +#define SBOX_E3(A, B, C, D, T) \ + ASSIGN(T, A) ; \ + AND(A, C) ; \ + XOR(A, D) ; \ + XOR(C, B) ; \ + XOR(C, A) ; \ + OR(D, T) ; \ + XOR(D, B) ; \ + XOR(T, C) ; \ + ASSIGN(B, D) ; \ + OR(D, T) ; \ + XOR(D, A) ; \ + AND(A, B) ; \ + XOR(T, A) ; \ + XOR(B, D) ; \ + XOR(B, T) ; \ + NOT(T) ; \ + ASSIGN(A, C) ; \ + ASSIGN(C, B) ; \ + ASSIGN(B, D) ; \ + ASSIGN(D, T) ; + +#define SBOX_E4(A, B, C, D, T) \ + ASSIGN(T, A) ; \ + OR(A, D) ; \ + XOR(D, B) ; \ + AND(B, T) ; \ + XOR(T, C) ; \ + XOR(C, D) ; \ + AND(D, A) ; \ + OR(T, B) ; \ + XOR(D, T) ; \ + XOR(A, B) ; \ + AND(T, A) ; \ + XOR(B, D) ; \ + XOR(T, C) ; \ + OR(B, A) ; \ + XOR(B, C) ; \ + XOR(A, D) ; \ + ASSIGN(C, B) ; \ + OR(B, D) ; \ + XOR(B, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, C) ; \ + ASSIGN(C, D) ; \ + ASSIGN(D, T) ; + +#define SBOX_E5(A, B, C, D, T) \ + XOR(B, D) ; \ + NOT(D) ; \ + XOR(C, D) ; \ + XOR(D, A) ; \ + ASSIGN(T, B) ; \ + AND(B, D) ; \ + XOR(B, C) ; \ + XOR(T, D) ; \ + XOR(A, T) ; \ + AND(C, T) ; \ + XOR(C, A) ; \ + AND(A, B) ; \ + XOR(D, A) ; \ + OR(T, B) ; \ + XOR(T, A) ; \ + OR(A, D) ; \ + XOR(A, C) ; \ + AND(C, D) ; \ + NOT(A) ; \ + XOR(T, C) ; \ + ASSIGN(C, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, T) ; + +#define SBOX_E6(A, B, C, D, T) \ + XOR(A, B) ; \ + XOR(B, D) ; \ + NOT(D) ; \ + ASSIGN(T, B) ; \ + AND(B, A) ; \ + XOR(C, D) ; \ + XOR(B, C) ; \ + OR(C, T) ; \ + XOR(T, D) ; \ + AND(D, B) ; \ + XOR(D, A) ; \ + XOR(T, B) ; \ + XOR(T, C) ; \ + XOR(C, A) ; \ + AND(A, D) ; \ + NOT(C) ; \ + XOR(A, T) ; \ + OR(T, D) ; \ + XOR(T, C) ; \ + ASSIGN(C, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, D) ; \ + ASSIGN(D, T) ; + +#define SBOX_E7(A, B, C, D, T) \ + NOT(C) ; \ + ASSIGN(T, D) ; \ + AND(D, A) ; \ + XOR(A, T) ; \ + XOR(D, C) ; \ + OR(C, T) ; \ + XOR(B, D) ; \ + XOR(C, A) ; \ + OR(A, B) ; \ + XOR(C, B) ; \ + XOR(T, A) ; \ + OR(A, D) ; \ + XOR(A, C) ; \ + XOR(T, D) ; \ + XOR(T, A) ; \ + NOT(D) ; \ + AND(C, T) ; \ + XOR(C, D) ; \ + ASSIGN(D, C) ; \ + ASSIGN(C, T) ; + +#define SBOX_E8(A, B, C, D, T) \ + ASSIGN(T, B) ; \ + OR(B, C) ; \ + XOR(B, D) ; \ + XOR(T, C) ; \ + XOR(C, B) ; \ + OR(D, T) ; \ + AND(D, A) ; \ + XOR(T, C) ; \ + XOR(D, B) ; \ + OR(B, T) ; \ + XOR(B, A) ; \ + OR(A, T) ; \ + XOR(A, C) ; \ + XOR(B, T) ; \ + XOR(C, B) ; \ + AND(B, A) ; \ + XOR(B, T) ; \ + NOT(C) ; \ + OR(C, A) ; \ + XOR(T, C) ; \ + ASSIGN(C, B) ; \ + ASSIGN(B, D) ; \ + ASSIGN(D, A) ; \ + ASSIGN(A, T) ; + +#define SBOX_D1(A, B, C, D, T) \ + NOT(C) ; \ + ASSIGN(T, B) ; \ + OR(B, A) ; \ + NOT(T) ; \ + XOR(B, C) ; \ + OR(C, T) ; \ + XOR(B, D) ; \ + XOR(A, T) ; \ + XOR(C, A) ; \ + AND(A, D) ; \ + XOR(T, A) ; \ + OR(A, B) ; \ + XOR(A, C) ; \ + XOR(D, T) ; \ + XOR(C, B) ; \ + XOR(D, A) ; \ + XOR(D, B) ; \ + AND(C, D) ; \ + XOR(T, C) ; \ + ASSIGN(C, B) ; \ + ASSIGN(B, T) ; + +#define SBOX_D2(A, B, C, D, T) \ + ASSIGN(T, B) ; \ + XOR(B, D) ; \ + AND(D, B) ; \ + XOR(T, C) ; \ + XOR(D, A) ; \ + OR(A, B) ; \ + XOR(C, D) ; \ + XOR(A, T) ; \ + OR(A, C) ; \ + XOR(B, D) ; \ + XOR(A, B) ; \ + OR(B, D) ; \ + XOR(B, A) ; \ + NOT(T) ; \ + XOR(T, B) ; \ + OR(B, A) ; \ + XOR(B, A) ; \ + OR(B, T) ; \ + XOR(D, B) ; \ + ASSIGN(B, A) ; \ + ASSIGN(A, T) ; \ + ASSIGN(T, D) ; \ + ASSIGN(D, C) ; \ + ASSIGN(C, T) ; + +#define SBOX_D3(A, B, C, D, T) \ + XOR(C, D) ; \ + XOR(D, A) ; \ + ASSIGN(T, D) ; \ + AND(D, C) ; \ + XOR(D, B) ; \ + OR(B, C) ; \ + XOR(B, T) ; \ + AND(T, D) ; \ + XOR(C, D) ; \ + AND(T, A) ; \ + XOR(T, C) ; \ + AND(C, B) ; \ + OR(C, A) ; \ + NOT(D) ; \ + XOR(C, D) ; \ + XOR(A, D) ; \ + AND(A, B) ; \ + XOR(D, T) ; \ + XOR(D, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, T) ; + +#define SBOX_D4(A, B, C, D, T) \ + ASSIGN(T, C) ; \ + XOR(C, B) ; \ + XOR(A, C) ; \ + AND(T, C) ; \ + XOR(T, A) ; \ + AND(A, B) ; \ + XOR(B, D) ; \ + OR(D, T) ; \ + XOR(C, D) ; \ + XOR(A, D) ; \ + XOR(B, T) ; \ + AND(D, C) ; \ + XOR(D, B) ; \ + XOR(B, A) ; \ + OR(B, C) ; \ + XOR(A, D) ; \ + XOR(B, T) ; \ + XOR(A, B) ; \ + ASSIGN(T, A) ; \ + ASSIGN(A, C) ; \ + ASSIGN(C, D) ; \ + ASSIGN(D, T) ; + +#define SBOX_D5(A, B, C, D, T) \ + ASSIGN(T, C) ; \ + AND(C, D) ; \ + XOR(C, B) ; \ + OR(B, D) ; \ + AND(B, A) ; \ + XOR(T, C) ; \ + XOR(T, B) ; \ + AND(B, C) ; \ + NOT(A) ; \ + XOR(D, T) ; \ + XOR(B, D) ; \ + AND(D, A) ; \ + XOR(D, C) ; \ + XOR(A, B) ; \ + AND(C, A) ; \ + XOR(D, A) ; \ + XOR(C, T) ; \ + OR(C, D) ; \ + XOR(D, A) ; \ + XOR(C, B) ; \ + ASSIGN(B, D) ; \ + ASSIGN(D, T) ; + +#define SBOX_D6(A, B, C, D, T) \ + NOT(B) ; \ + ASSIGN(T, D) ; \ + XOR(C, B) ; \ + OR(D, A) ; \ + XOR(D, C) ; \ + OR(C, B) ; \ + AND(C, A) ; \ + XOR(T, D) ; \ + XOR(C, T) ; \ + OR(T, A) ; \ + XOR(T, B) ; \ + AND(B, C) ; \ + XOR(B, D) ; \ + XOR(T, C) ; \ + AND(D, T) ; \ + XOR(T, B) ; \ + XOR(D, T) ; \ + NOT(T) ; \ + XOR(D, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, T) ; \ + ASSIGN(T, D) ; \ + ASSIGN(D, C) ; \ + ASSIGN(C, T) ; + +#define SBOX_D7(A, B, C, D, T) \ + XOR(A, C) ; \ + ASSIGN(T, C) ; \ + AND(C, A) ; \ + XOR(T, D) ; \ + NOT(C) ; \ + XOR(D, B) ; \ + XOR(C, D) ; \ + OR(T, A) ; \ + XOR(A, C) ; \ + XOR(D, T) ; \ + XOR(T, B) ; \ + AND(B, D) ; \ + XOR(B, A) ; \ + XOR(A, D) ; \ + OR(A, C) ; \ + XOR(D, B) ; \ + XOR(T, A) ; \ + ASSIGN(A, B) ; \ + ASSIGN(B, C) ; \ + ASSIGN(C, T) ; + +#define SBOX_D8(A, B, C, D, T) \ + ASSIGN(T, C) ; \ + XOR(C, A) ; \ + AND(A, D) ; \ + OR(T, D) ; \ + NOT(C) ; \ + XOR(D, B) ; \ + OR(B, A) ; \ + XOR(A, C) ; \ + AND(C, T) ; \ + AND(D, T) ; \ + XOR(B, C) ; \ + XOR(C, A) ; \ + OR(A, C) ; \ + XOR(T, B) ; \ + XOR(A, D) ; \ + XOR(D, T) ; \ + OR(T, A) ; \ + XOR(D, C) ; \ + XOR(T, C) ; \ + ASSIGN(C, B) ; \ + ASSIGN(B, A) ; \ + ASSIGN(A, D) ; \ + ASSIGN(D, T) ; + +#define TRANSFORM(A, B, C, D, T) \ + ROTL_IMM(A, 13) ; \ + ROTL_IMM(C, 3) ; \ + SHL2_3(T, A) ; \ + XOR(B, A) ; \ + XOR(D, C) ; \ + XOR(B, C) ; \ + XOR(D, T) ; \ + ROTL_IMM(B, 1) ; \ + ROTL_IMM(D, 7) ; \ + ASSIGN(T, B) ; \ + SHL_IMM(T, 7) ; \ + XOR(A, B) ; \ + XOR(C, D) ; \ + XOR(A, D) ; \ + XOR(C, T) ; \ + ROTL_IMM(A, 5) ; \ + ROTL_IMM(C, 22) ; + +#define I_TRANSFORM(A, B, C, D, T) \ + ROTR_IMM(C, 22) ; \ + ROTR_IMM(A, 5) ; \ + ASSIGN(T, B) ; \ + SHL_IMM(T, 7) ; \ + XOR(A, B) ; \ + XOR(C, D) ; \ + XOR(A, D) ; \ + XOR(C, T) ; \ + ROTR_IMM(D, 7) ; \ + ROTR_IMM(B, 1) ; \ + SHL2_3(T, A) ; \ + XOR(B, C) ; \ + XOR(D, C) ; \ + XOR(B, A) ; \ + XOR(D, T) ; \ + ROTR_IMM(C, 3) ; \ + ROTR_IMM(A, 13) ; + +#define KEY_XOR(A, B, C, D, N) \ + XOR(A, ARRAY4(EDI, (4*N ))) ; \ + XOR(B, ARRAY4(EDI, (4*N+1))) ; \ + XOR(C, ARRAY4(EDI, (4*N+2))) ; \ + XOR(D, ARRAY4(EDI, (4*N+3))) ; + +/************************************************* +* Serpent Encryption * +*************************************************/ +START_FUNCTION(serpent_encrypt) + SPILL_REGS() +#define PUSHED 4 + + ASSIGN(EBP, ARG(1)) /* input block */ + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + + ASSIGN(EDI, ARG(3)) /* round keys */ + ZEROIZE(EBP) + +#define E_ROUND(A, B, C, D, T, N, SBOX) \ + KEY_XOR(A, B, C, D, N) \ + SBOX(A, B, C, D, T) \ + TRANSFORM(A, B, C, D, T) + + + E_ROUND(EAX, EBX, ECX, EDX, EBP, 0, SBOX_E1) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 1, SBOX_E2) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 2, SBOX_E3) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 3, SBOX_E4) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 4, SBOX_E5) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 5, SBOX_E6) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 6, SBOX_E7) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 7, SBOX_E8) + + E_ROUND(EAX, EBX, ECX, EDX, EBP, 8, SBOX_E1) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 9, SBOX_E2) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 10, SBOX_E3) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 11, SBOX_E4) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 12, SBOX_E5) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 13, SBOX_E6) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 14, SBOX_E7) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 15, SBOX_E8) + + E_ROUND(EAX, EBX, ECX, EDX, EBP, 16, SBOX_E1) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 17, SBOX_E2) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 18, SBOX_E3) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 19, SBOX_E4) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 20, SBOX_E5) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 21, SBOX_E6) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 22, SBOX_E7) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 23, SBOX_E8) + + E_ROUND(EAX, EBX, ECX, EDX, EBP, 24, SBOX_E1) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 25, SBOX_E2) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 26, SBOX_E3) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 27, SBOX_E4) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 28, SBOX_E5) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 29, SBOX_E6) + E_ROUND(EAX, EBX, ECX, EDX, EBP, 30, SBOX_E7) + + KEY_XOR(EAX, EBX, ECX, EDX, 31) + SBOX_E8(EAX, EBX, ECX, EDX, EBP) + KEY_XOR(EAX, EBX, ECX, EDX, 32) + + ASSIGN(EBP, ARG(2)) /* output block */ + ASSIGN(ARRAY4(EBP, 0), EAX) + ASSIGN(ARRAY4(EBP, 1), EBX) + ASSIGN(ARRAY4(EBP, 2), ECX) + ASSIGN(ARRAY4(EBP, 3), EDX) + + RESTORE_REGS() +#undef PUSHED +END_FUNCTION(serpent_encrypt) + +/************************************************* +* Serpent Decryption * +*************************************************/ +START_FUNCTION(serpent_decrypt) + SPILL_REGS() +#define PUSHED 4 + + ASSIGN(EBP, ARG(1)) /* input block */ + ASSIGN(EAX, ARRAY4(EBP, 0)) + ASSIGN(EBX, ARRAY4(EBP, 1)) + ASSIGN(ECX, ARRAY4(EBP, 2)) + ASSIGN(EDX, ARRAY4(EBP, 3)) + + ASSIGN(EDI, ARG(3)) /* round keys */ + + ZEROIZE(EBP) + +#define D_ROUND(A, B, C, D, T, N, SBOX) \ + I_TRANSFORM(A, B, C, D, T) \ + SBOX(A, B, C, D, T) \ + KEY_XOR(A, B, C, D, N) \ + + KEY_XOR(EAX, EBX, ECX, EDX, 32) + SBOX_D8(EAX, EBX, ECX, EDX, EBP) + KEY_XOR(EAX, EBX, ECX, EDX, 31) + + D_ROUND(EAX, EBX, ECX, EDX, EBP, 30, SBOX_D7) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 29, SBOX_D6) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 28, SBOX_D5) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 27, SBOX_D4) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 26, SBOX_D3) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 25, SBOX_D2) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 24, SBOX_D1) + + D_ROUND(EAX, EBX, ECX, EDX, EBP, 23, SBOX_D8) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 22, SBOX_D7) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 21, SBOX_D6) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 20, SBOX_D5) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 19, SBOX_D4) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 18, SBOX_D3) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 17, SBOX_D2) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 16, SBOX_D1) + + D_ROUND(EAX, EBX, ECX, EDX, EBP, 15, SBOX_D8) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 14, SBOX_D7) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 13, SBOX_D6) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 12, SBOX_D5) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 11, SBOX_D4) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 10, SBOX_D3) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 9, SBOX_D2) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 8, SBOX_D1) + + D_ROUND(EAX, EBX, ECX, EDX, EBP, 7, SBOX_D8) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 6, SBOX_D7) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 5, SBOX_D6) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 4, SBOX_D5) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 3, SBOX_D4) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 2, SBOX_D3) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 1, SBOX_D2) + D_ROUND(EAX, EBX, ECX, EDX, EBP, 0, SBOX_D1) + + ASSIGN(EBP, ARG(2)) /* output block */ + ASSIGN(ARRAY4(EBP, 0), EAX) + ASSIGN(ARRAY4(EBP, 1), EBX) + ASSIGN(ARRAY4(EBP, 2), ECX) + ASSIGN(ARRAY4(EBP, 3), EDX) + + RESTORE_REGS() +#undef PUSHED +END_FUNCTION(serpent_decrypt) + +/************************************************* +* Serpent Key Schedule * +*************************************************/ +START_FUNCTION(serpent_key_schedule) + SPILL_REGS() +#define PUSHED 4 + + ASSIGN(EDI, ARG(1)) /* round keys */ + ASSIGN(ESI, IMM(8)) + ADD_IMM(EDI, 32) + +START_LOOP(.EXPANSION) + ASSIGN(EAX, ARRAY4(EDI, -1)) + ASSIGN(EBX, ARRAY4(EDI, -3)) + ASSIGN(ECX, ARRAY4(EDI, -5)) + ASSIGN(EDX, ARRAY4(EDI, -8)) + + ASSIGN(EBP, ESI) + SUB_IMM(EBP, 8) + XOR(EBP, IMM(0x9E3779B9)) + XOR(EAX, EBX) + XOR(ECX, EDX) + XOR(EAX, EBP) + XOR(EAX, ECX) + + ROTL_IMM(EAX, 11) + + ASSIGN(ARRAY4(EDI, 0), EAX) + + ADD_IMM(ESI, 1) + ADD_IMM(EDI, 4) +LOOP_UNTIL_EQ(ESI, 140, .EXPANSION) + + ASSIGN(EDI, ARG(1)) /* round keys */ + +#define LOAD_AND_SBOX(MSG, SBOX) \ + ASSIGN(EAX, ARRAY4(EDI, (4*MSG+ 8))) ; \ + ASSIGN(EBX, ARRAY4(EDI, (4*MSG+ 9))) ; \ + ASSIGN(ECX, ARRAY4(EDI, (4*MSG+10))) ; \ + ASSIGN(EDX, ARRAY4(EDI, (4*MSG+11))) ; \ + SBOX(EAX, EBX, ECX, EDX, EBP) ; \ + ASSIGN(ARRAY4(EDI, (4*MSG+ 8)), EAX) ; \ + ASSIGN(ARRAY4(EDI, (4*MSG+ 9)), EBX) ; \ + ASSIGN(ARRAY4(EDI, (4*MSG+10)), ECX) ; \ + ASSIGN(ARRAY4(EDI, (4*MSG+11)), EDX) + + LOAD_AND_SBOX( 0, SBOX_E4) + LOAD_AND_SBOX( 1, SBOX_E3) + LOAD_AND_SBOX( 2, SBOX_E2) + LOAD_AND_SBOX( 3, SBOX_E1) + + LOAD_AND_SBOX( 4, SBOX_E8) + LOAD_AND_SBOX( 5, SBOX_E7) + LOAD_AND_SBOX( 6, SBOX_E6) + LOAD_AND_SBOX( 7, SBOX_E5) + LOAD_AND_SBOX( 8, SBOX_E4) + LOAD_AND_SBOX( 9, SBOX_E3) + LOAD_AND_SBOX(10, SBOX_E2) + LOAD_AND_SBOX(11, SBOX_E1) + + LOAD_AND_SBOX(12, SBOX_E8) + LOAD_AND_SBOX(13, SBOX_E7) + LOAD_AND_SBOX(14, SBOX_E6) + LOAD_AND_SBOX(15, SBOX_E5) + LOAD_AND_SBOX(16, SBOX_E4) + LOAD_AND_SBOX(17, SBOX_E3) + LOAD_AND_SBOX(18, SBOX_E2) + LOAD_AND_SBOX(19, SBOX_E1) + + LOAD_AND_SBOX(20, SBOX_E8) + LOAD_AND_SBOX(21, SBOX_E7) + LOAD_AND_SBOX(22, SBOX_E6) + LOAD_AND_SBOX(23, SBOX_E5) + LOAD_AND_SBOX(24, SBOX_E4) + LOAD_AND_SBOX(25, SBOX_E3) + LOAD_AND_SBOX(26, SBOX_E2) + LOAD_AND_SBOX(27, SBOX_E1) + + LOAD_AND_SBOX(28, SBOX_E8) + LOAD_AND_SBOX(29, SBOX_E7) + LOAD_AND_SBOX(30, SBOX_E6) + LOAD_AND_SBOX(31, SBOX_E5) + LOAD_AND_SBOX(32, SBOX_E4) + + RESTORE_REGS() +#undef PUSHED +END_FUNCTION(serpent_key_schedule) diff --git a/modules/asm_ia32/serpent.cpp b/modules/asm_ia32/serpent.cpp new file mode 100644 index 000000000..aacb72b0f --- /dev/null +++ b/modules/asm_ia32/serpent.cpp @@ -0,0 +1,49 @@ +/************************************************* +* Serpent Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/serpent.h> +#include <botan/loadstor.h> + +namespace Botan { + +extern "C" { + +void serpent_encrypt(const byte[16], byte[16], const u32bit[132]); +void serpent_decrypt(const byte[16], byte[16], const u32bit[132]); +void serpent_key_schedule(u32bit[140]); + +} + +/************************************************* +* Serpent Encryption * +*************************************************/ +void Serpent::enc(const byte in[], byte out[]) const + { + serpent_encrypt(in, out, round_key); + } + +/************************************************* +* Serpent Decryption * +*************************************************/ +void Serpent::dec(const byte in[], byte out[]) const + { + serpent_decrypt(in, out, round_key); + } + +/************************************************* +* Serpent Key Schedule * +*************************************************/ +void Serpent::key(const byte key[], u32bit length) + { + SecureBuffer<u32bit, 140> W; + for(u32bit j = 0; j != length / 4; ++j) + W[j] = make_u32bit(key[4*j+3], key[4*j+2], key[4*j+1], key[4*j]); + W[length / 4] |= u32bit(1) << ((length%4)*8); + + serpent_key_schedule(W); + round_key.copy(W + 8, 132); + } + +} diff --git a/modules/asm_ia32/sha160.cpp b/modules/asm_ia32/sha160.cpp new file mode 100644 index 000000000..54e85fd26 --- /dev/null +++ b/modules/asm_ia32/sha160.cpp @@ -0,0 +1,52 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/sha160.h> +#include <botan/loadstor.h> + +namespace Botan { + +extern "C" void botan_sha160_asm_ia32(u32bit[5], const byte[64], u32bit[81]); + +/************************************************* +* SHA-160 Compression Function * +*************************************************/ +void SHA_160::hash(const byte input[]) + { + sha160_core(digest, input, W); + } + +/************************************************* +* Copy out the digest * +*************************************************/ +void SHA_160::copy_out(byte output[]) + { + for(u32bit j = 0; j != OUTPUT_LENGTH; ++j) + output[j] = get_byte(j % 4, digest[j/4]); + } + +/************************************************* +* Clear memory of sensitive data * +*************************************************/ +void SHA_160::clear() throw() + { + MDx_HashFunction::clear(); + W.clear(); + digest[0] = 0x67452301; + digest[1] = 0xEFCDAB89; + digest[2] = 0x98BADCFE; + digest[3] = 0x10325476; + digest[4] = 0xC3D2E1F0; + } + +/************************************************* +* SHA_160 Constructor * +*************************************************/ +SHA_160::SHA_160() : MDx_HashFunction(20, 64, true, true), W(81) + { + clear(); + } + +} diff --git a/modules/asm_ia32/sha1_asm.S b/modules/asm_ia32/sha1_asm.S new file mode 100644 index 000000000..85bc9dc2c --- /dev/null +++ b/modules/asm_ia32/sha1_asm.S @@ -0,0 +1,242 @@ +/************************************************* +* SHA-160 Source File * +* (C) 1999-2007 Jack Lloyd * +*************************************************/ + +#include <botan/asm_macr.h> + +START_LISTING(sha1_asm.S) + +START_FUNCTION(botan_sha160_asm_ia32) + SPILL_REGS() + +#define PUSHED 4 + + ASSIGN(EDI, ARG(2)) + ASSIGN(EBP, ARG(3)) + + ZEROIZE(ESI) + +START_LOOP(.LOAD_INPUT) + ADD_IMM(ESI, 4) + + ASSIGN(EAX, ARRAY4(EDI, 0)) + ASSIGN(EBX, ARRAY4(EDI, 1)) + ASSIGN(ECX, ARRAY4(EDI, 2)) + ASSIGN(EDX, ARRAY4(EDI, 3)) + + ADD_IMM(EDI, 16) + + BSWAP(EAX) + BSWAP(EBX) + BSWAP(ECX) + BSWAP(EDX) + + ASSIGN(ARRAY4_INDIRECT(EBP,ESI,-4), EAX) + ASSIGN(ARRAY4_INDIRECT(EBP,ESI,-3), EBX) + ASSIGN(ARRAY4_INDIRECT(EBP,ESI,-2), ECX) + ASSIGN(ARRAY4_INDIRECT(EBP,ESI,-1), EDX) +LOOP_UNTIL_EQ(ESI, 16, .LOAD_INPUT) + + ADD2_IMM(EDI, EBP, 64) + +START_LOOP(.EXPANSION) + ADD_IMM(ESI, 4) + + ZEROIZE(EAX) + ASSIGN(EBX, ARRAY4(EDI, -1)) + ASSIGN(ECX, ARRAY4(EDI, -2)) + ASSIGN(EDX, ARRAY4(EDI, -3)) + + XOR(EAX, ARRAY4(EDI, -5)) + XOR(EBX, ARRAY4(EDI, -6)) + XOR(ECX, ARRAY4(EDI, -7)) + XOR(EDX, ARRAY4(EDI, -8)) + + XOR(EAX, ARRAY4(EDI, -11)) + XOR(EBX, ARRAY4(EDI, -12)) + XOR(ECX, ARRAY4(EDI, -13)) + XOR(EDX, ARRAY4(EDI, -14)) + + XOR(EAX, ARRAY4(EDI, -13)) + XOR(EBX, ARRAY4(EDI, -14)) + XOR(ECX, ARRAY4(EDI, -15)) + XOR(EDX, ARRAY4(EDI, -16)) + + ROTL_IMM(EDX, 1) + ROTL_IMM(ECX, 1) + ROTL_IMM(EBX, 1) + XOR(EAX, EDX) + ROTL_IMM(EAX, 1) + + ASSIGN(ARRAY4(EDI, 0), EDX) + ASSIGN(ARRAY4(EDI, 1), ECX) + ASSIGN(ARRAY4(EDI, 2), EBX) + ASSIGN(ARRAY4(EDI, 3), EAX) + + ADD_IMM(EDI, 16) +LOOP_UNTIL_EQ(ESI, 80, .EXPANSION) + +#define MAGIC1 0x5A827999 +#define MAGIC2 0x6ED9EBA1 +#define MAGIC3 0x8F1BBCDC +#define MAGIC4 0xCA62C1D6 + +#define MSG ESP +#define T2 EBP + +#define F1(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(MSG, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, C) ; \ + XOR(E, D) ; \ + ADD3_IMM(F, T2, MAGIC1) ; \ + AND(E, B) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2_4(A, B, C, D, E, F, N, MAGIC) \ + ASSIGN(T2, ARRAY4(MSG, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + XOR(E, C) ; \ + ADD3_IMM(F, T2, MAGIC) ; \ + XOR(E, D) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F3(A, B, C, D, E, F, N) \ + ASSIGN(T2, ARRAY4(MSG, N)) ; \ + ASSIGN(A, F) ; \ + ROTL_IMM(F, 5) ; \ + ADD(F, E) ; \ + ASSIGN(E, B) ; \ + OR(E, C) ; \ + AND(E, D) ; \ + ADD3_IMM(F, T2, MAGIC3) ; \ + ASSIGN(T2, B) ; \ + AND(T2, C) ; \ + OR(E, T2) ; \ + ROTR_IMM(B, 2) ; \ + ADD(E, F) ; + +#define F2(A, B, C, D, E, F, MSG) \ + F2_4(A, B, C, D, E, F, MSG, MAGIC2) + +#define F4(A, B, C, D, E, F, MSG) \ + F2_4(A, B, C, D, E, F, MSG, MAGIC4) + + ASSIGN(EAX, ARG(1)) + ASSIGN(EDI, ARRAY4(EAX, 0)) + ASSIGN(EBX, ARRAY4(EAX, 1)) + ASSIGN(ECX, ARRAY4(EAX, 2)) + ASSIGN(EDX, ARRAY4(EAX, 3)) + ASSIGN(ESI, ARRAY4(EAX, 4)) + + ASSIGN(ARRAY4(EBP, 80), ESP) + ASSIGN(ESP, EBP) + + /* First Round */ + F1(EAX, EBX, ECX, EDX, ESI, EDI, 0) + F1(EDI, EAX, EBX, ECX, EDX, ESI, 1) + F1(ESI, EDI, EAX, EBX, ECX, EDX, 2) + F1(EDX, ESI, EDI, EAX, EBX, ECX, 3) + F1(ECX, EDX, ESI, EDI, EAX, EBX, 4) + F1(EBX, ECX, EDX, ESI, EDI, EAX, 5) + F1(EAX, EBX, ECX, EDX, ESI, EDI, 6) + F1(EDI, EAX, EBX, ECX, EDX, ESI, 7) + F1(ESI, EDI, EAX, EBX, ECX, EDX, 8) + F1(EDX, ESI, EDI, EAX, EBX, ECX, 9) + F1(ECX, EDX, ESI, EDI, EAX, EBX, 10) + F1(EBX, ECX, EDX, ESI, EDI, EAX, 11) + F1(EAX, EBX, ECX, EDX, ESI, EDI, 12) + F1(EDI, EAX, EBX, ECX, EDX, ESI, 13) + F1(ESI, EDI, EAX, EBX, ECX, EDX, 14) + F1(EDX, ESI, EDI, EAX, EBX, ECX, 15) + F1(ECX, EDX, ESI, EDI, EAX, EBX, 16) + F1(EBX, ECX, EDX, ESI, EDI, EAX, 17) + F1(EAX, EBX, ECX, EDX, ESI, EDI, 18) + F1(EDI, EAX, EBX, ECX, EDX, ESI, 19) + + /* Second Round */ + F2(ESI, EDI, EAX, EBX, ECX, EDX, 20) + F2(EDX, ESI, EDI, EAX, EBX, ECX, 21) + F2(ECX, EDX, ESI, EDI, EAX, EBX, 22) + F2(EBX, ECX, EDX, ESI, EDI, EAX, 23) + F2(EAX, EBX, ECX, EDX, ESI, EDI, 24) + F2(EDI, EAX, EBX, ECX, EDX, ESI, 25) + F2(ESI, EDI, EAX, EBX, ECX, EDX, 26) + F2(EDX, ESI, EDI, EAX, EBX, ECX, 27) + F2(ECX, EDX, ESI, EDI, EAX, EBX, 28) + F2(EBX, ECX, EDX, ESI, EDI, EAX, 29) + F2(EAX, EBX, ECX, EDX, ESI, EDI, 30) + F2(EDI, EAX, EBX, ECX, EDX, ESI, 31) + F2(ESI, EDI, EAX, EBX, ECX, EDX, 32) + F2(EDX, ESI, EDI, EAX, EBX, ECX, 33) + F2(ECX, EDX, ESI, EDI, EAX, EBX, 34) + F2(EBX, ECX, EDX, ESI, EDI, EAX, 35) + F2(EAX, EBX, ECX, EDX, ESI, EDI, 36) + F2(EDI, EAX, EBX, ECX, EDX, ESI, 37) + F2(ESI, EDI, EAX, EBX, ECX, EDX, 38) + F2(EDX, ESI, EDI, EAX, EBX, ECX, 39) + + /* Third Round */ + F3(ECX, EDX, ESI, EDI, EAX, EBX, 40) + F3(EBX, ECX, EDX, ESI, EDI, EAX, 41) + F3(EAX, EBX, ECX, EDX, ESI, EDI, 42) + F3(EDI, EAX, EBX, ECX, EDX, ESI, 43) + F3(ESI, EDI, EAX, EBX, ECX, EDX, 44) + F3(EDX, ESI, EDI, EAX, EBX, ECX, 45) + F3(ECX, EDX, ESI, EDI, EAX, EBX, 46) + F3(EBX, ECX, EDX, ESI, EDI, EAX, 47) + F3(EAX, EBX, ECX, EDX, ESI, EDI, 48) + F3(EDI, EAX, EBX, ECX, EDX, ESI, 49) + F3(ESI, EDI, EAX, EBX, ECX, EDX, 50) + F3(EDX, ESI, EDI, EAX, EBX, ECX, 51) + F3(ECX, EDX, ESI, EDI, EAX, EBX, 52) + F3(EBX, ECX, EDX, ESI, EDI, EAX, 53) + F3(EAX, EBX, ECX, EDX, ESI, EDI, 54) + F3(EDI, EAX, EBX, ECX, EDX, ESI, 55) + F3(ESI, EDI, EAX, EBX, ECX, EDX, 56) + F3(EDX, ESI, EDI, EAX, EBX, ECX, 57) + F3(ECX, EDX, ESI, EDI, EAX, EBX, 58) + F3(EBX, ECX, EDX, ESI, EDI, EAX, 59) + + /* Fourth Round */ + F4(EAX, EBX, ECX, EDX, ESI, EDI, 60) + F4(EDI, EAX, EBX, ECX, EDX, ESI, 61) + F4(ESI, EDI, EAX, EBX, ECX, EDX, 62) + F4(EDX, ESI, EDI, EAX, EBX, ECX, 63) + F4(ECX, EDX, ESI, EDI, EAX, EBX, 64) + F4(EBX, ECX, EDX, ESI, EDI, EAX, 65) + F4(EAX, EBX, ECX, EDX, ESI, EDI, 66) + F4(EDI, EAX, EBX, ECX, EDX, ESI, 67) + F4(ESI, EDI, EAX, EBX, ECX, EDX, 68) + F4(EDX, ESI, EDI, EAX, EBX, ECX, 69) + F4(ECX, EDX, ESI, EDI, EAX, EBX, 70) + F4(EBX, ECX, EDX, ESI, EDI, EAX, 71) + F4(EAX, EBX, ECX, EDX, ESI, EDI, 72) + F4(EDI, EAX, EBX, ECX, EDX, ESI, 73) + F4(ESI, EDI, EAX, EBX, ECX, EDX, 74) + F4(EDX, ESI, EDI, EAX, EBX, ECX, 75) + F4(ECX, EDX, ESI, EDI, EAX, EBX, 76) + F4(EBX, ECX, EDX, ESI, EDI, EAX, 77) + F4(EAX, EBX, ECX, EDX, ESI, EDI, 78) + F4(EDI, EAX, EBX, ECX, EDX, ESI, 79) + + ASSIGN(ESP, ARRAY4(ESP, 80)) + + ASSIGN(EBP, ARG(1)) + ADD(ARRAY4(EBP, 0), EDX) + ADD(ARRAY4(EBP, 1), EDI) + ADD(ARRAY4(EBP, 2), EAX) + ADD(ARRAY4(EBP, 3), EBX) + ADD(ARRAY4(EBP, 4), ECX) + + RESTORE_REGS() +END_FUNCTION(botan_sha160_asm_ia32) |