diff options
Diffstat (limited to 'module/icp')
-rw-r--r-- | module/icp/Makefile.in | 3 | ||||
-rw-r--r-- | module/icp/algs/aes/aes_impl.c | 21 | ||||
-rw-r--r-- | module/icp/algs/modes/gcm.c | 21 | ||||
-rw-r--r-- | module/icp/asm-x86_64/aes/aes_amd64.S | 10 | ||||
-rw-r--r-- | module/icp/asm-x86_64/aes/aes_intel.S | 226 | ||||
-rw-r--r-- | module/icp/asm-x86_64/modes/gcm_intel.S | 88 | ||||
-rw-r--r-- | module/icp/asm-x86_64/sha1/sha1-x86_64.S | 59 | ||||
-rw-r--r-- | module/icp/asm-x86_64/sha2/sha256_impl.S | 3 | ||||
-rw-r--r-- | module/icp/asm-x86_64/sha2/sha512_impl.S | 1 | ||||
-rw-r--r-- | module/icp/include/sys/asm_linkage.h | 10 |
10 files changed, 135 insertions, 307 deletions
diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index b822635b7..77b2ec1b5 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -18,12 +18,13 @@ endif ifeq ($(TARGET_ASM_DIR), asm-i386) ASM_SOURCES := endif - + ifeq ($(TARGET_ASM_DIR), asm-generic) ASM_SOURCES := endif EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@ +EXTRA_AFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@ obj-$(CONFIG_ZFS) := $(MODULE).o diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index 8592386dd..4c17e2a30 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -22,23 +22,22 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ -#include <sys/zfs_context.h> -#include <sys/crypto/spi.h> -#include <modes/modes.h> -#include <aes/aes_impl.h> - -#ifdef __amd64 +#if defined(_KERNEL) && defined(__amd64) +#include <linux/simd_x86.h> -#ifdef _KERNEL -/* Workaround for no XMM kernel thread save/restore */ -#define KPREEMPT_DISABLE kpreempt_disable() -#define KPREEMPT_ENABLE kpreempt_enable() +#define KPREEMPT_DISABLE kfpu_begin() +#define KPREEMPT_ENABLE kfpu_end() #else #define KPREEMPT_DISABLE #define KPREEMPT_ENABLE #endif /* _KERNEL */ -#endif /* __amd64 */ + +#include <sys/zfs_context.h> +#include <sys/crypto/spi.h> +#include <modes/modes.h> +#include <aes/aes_impl.h> + /* diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 80acb6636..c0a26f524 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -22,6 +22,17 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ +#if defined(_KERNEL) && defined(__amd64) +#include <linux/simd_x86.h> + +#define KPREEMPT_DISABLE kfpu_begin() +#define KPREEMPT_ENABLE kfpu_end() + +#else +#define KPREEMPT_DISABLE +#define KPREEMPT_ENABLE +#endif /* _KERNEL */ + #include <sys/zfs_context.h> #include <modes/modes.h> #include <sys/crypto/common.h> @@ -30,16 +41,6 @@ #ifdef __amd64 -#ifdef _KERNEL -/* Workaround for no XMM kernel thread save/restore */ -#define KPREEMPT_DISABLE kpreempt_disable() -#define KPREEMPT_ENABLE kpreempt_enable() - -#else -#define KPREEMPT_DISABLE -#define KPREEMPT_ENABLE -#endif /* _KERNEL */ - extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res); static int intel_pclmulqdq_instruction_present(void); #endif /* __amd64 */ diff --git a/module/icp/asm-x86_64/aes/aes_amd64.S b/module/icp/asm-x86_64/aes/aes_amd64.S index fa66dc321..9db3a3179 100644 --- a/module/icp/asm-x86_64/aes/aes_amd64.S +++ b/module/icp/asm-x86_64/aes/aes_amd64.S @@ -693,7 +693,8 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], * int aes_encrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ - .align 64 +.data +.align 64 enc_tab: enc_vals(u8) #ifdef LAST_ROUND_TABLES @@ -702,7 +703,7 @@ enc_tab: #endif - ENTRY_NP(aes_encrypt_amd64) +ENTRY_NP(aes_encrypt_amd64) #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface @@ -797,7 +798,8 @@ enc_tab: * int aes_decrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ - .align 64 +.data +.align 64 dec_tab: dec_vals(v8) #ifdef LAST_ROUND_TABLES @@ -806,7 +808,7 @@ dec_tab: #endif - ENTRY_NP(aes_decrypt_amd64) +ENTRY_NP(aes_decrypt_amd64) #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface diff --git a/module/icp/asm-x86_64/aes/aes_intel.S b/module/icp/asm-x86_64/aes/aes_intel.S index 6c5c0f919..ed0df75c5 100644 --- a/module/icp/asm-x86_64/aes/aes_intel.S +++ b/module/icp/asm-x86_64/aes/aes_intel.S @@ -149,6 +149,7 @@ * ==================================================================== */ + #if defined(lint) || defined(__lint) #include <sys/types.h> @@ -182,111 +183,6 @@ rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], #define _ASM #include <sys/asm_linkage.h> -#ifdef _KERNEL - /* - * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is, - * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it - * uses it to pass P2 to syscall. - * This also occurs with the STTS macro, but we dont care if - * P2 (%rsi) is modified just before function exit. - * The CLTS and STTS macros push and pop P1 (%rdi) already. - */ -#ifdef __xpv -#define PROTECTED_CLTS \ - push %rsi; \ - CLTS; \ - pop %rsi -#else -#define PROTECTED_CLTS \ - CLTS -#endif /* __xpv */ - -#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \ - push %rbp; \ - mov %rsp, %rbp; \ - movq %cr0, tmpreg; \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - and $-XMM_ALIGN, %rsp; \ - sub $[XMM_SIZE * 2], %rsp; \ - movaps %xmm0, 16(%rsp); \ - movaps %xmm1, (%rsp); \ - jmp 2f; \ -1: \ - PROTECTED_CLTS; \ -2: - - /* - * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack, - * otherwise set CR0_TS. - */ -#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - movaps (%rsp), %xmm1; \ - movaps 16(%rsp), %xmm0; \ - jmp 2f; \ -1: \ - STTS(tmpreg); \ -2: \ - mov %rbp, %rsp; \ - pop %rbp - - /* - * If CR0_TS is not set, align stack (with push %rbp) and push - * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS - */ -#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \ - push %rbp; \ - mov %rsp, %rbp; \ - movq %cr0, tmpreg; \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - and $-XMM_ALIGN, %rsp; \ - sub $[XMM_SIZE * 7], %rsp; \ - movaps %xmm0, 96(%rsp); \ - movaps %xmm1, 80(%rsp); \ - movaps %xmm2, 64(%rsp); \ - movaps %xmm3, 48(%rsp); \ - movaps %xmm4, 32(%rsp); \ - movaps %xmm5, 16(%rsp); \ - movaps %xmm6, (%rsp); \ - jmp 2f; \ -1: \ - PROTECTED_CLTS; \ -2: - - - /* - * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack, - * otherwise set CR0_TS. - */ -#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - movaps (%rsp), %xmm6; \ - movaps 16(%rsp), %xmm5; \ - movaps 32(%rsp), %xmm4; \ - movaps 48(%rsp), %xmm3; \ - movaps 64(%rsp), %xmm2; \ - movaps 80(%rsp), %xmm1; \ - movaps 96(%rsp), %xmm0; \ - jmp 2f; \ -1: \ - STTS(tmpreg); \ -2: \ - mov %rbp, %rsp; \ - pop %rbp - - -#else -#define PROTECTED_CLTS -#define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) -#define SET_TS_OR_POP_XMM0_XMM1(tmpreg) -#define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) -#define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) -#endif /* _KERNEL */ - /* * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), @@ -302,9 +198,9 @@ rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], * (%rcx) AES key */ -.align 16 -_key_expansion_128: -_key_expansion_256a: +ENTRY_NP2(_key_expansion_128, _key_expansion_256a) +_key_expansion_128_local: +_key_expansion_256a_local: pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -314,11 +210,13 @@ _key_expansion_256a: movaps %xmm0, (%rcx) add $0x10, %rcx ret - SET_SIZE(_key_expansion_128) - SET_SIZE(_key_expansion_256a) + nop +SET_SIZE(_key_expansion_128) +SET_SIZE(_key_expansion_256a) -.align 16 -_key_expansion_192a: + +ENTRY_NP(_key_expansion_192a) +_key_expansion_192a_local: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -340,10 +238,11 @@ _key_expansion_192a: movaps %xmm1, 0x10(%rcx) add $0x20, %rcx ret - SET_SIZE(_key_expansion_192a) +SET_SIZE(_key_expansion_192a) + -.align 16 -_key_expansion_192b: +ENTRY_NP(_key_expansion_192b) +_key_expansion_192b_local: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 @@ -360,10 +259,11 @@ _key_expansion_192b: movaps %xmm0, (%rcx) add $0x10, %rcx ret - SET_SIZE(_key_expansion_192b) +SET_SIZE(_key_expansion_192b) + -.align 16 -_key_expansion_256b: +ENTRY_NP(_key_expansion_256b) +_key_expansion_256b_local: pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 pxor %xmm4, %xmm2 @@ -373,7 +273,7 @@ _key_expansion_256b: movaps %xmm2, (%rcx) add $0x10, %rcx ret - SET_SIZE(_key_expansion_256b) +SET_SIZE(_key_expansion_256b) /* @@ -419,8 +319,7 @@ _key_expansion_256b: ENTRY_NP(rijndael_key_setup_enc_intel) rijndael_key_setup_enc_intel_local: - CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10) - + FRAME_BEGIN // NULL pointer sanity check test %USERCIPHERKEY, %USERCIPHERKEY jz .Lenc_key_invalid_param @@ -446,38 +345,38 @@ rijndael_key_setup_enc_intel_local: add $0x10, %rcx aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x1, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x2, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x4, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x8, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x10, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local aeskeygenassist $0x20, %xmm0, %xmm1 - call _key_expansion_256b + call _key_expansion_256b_local aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key - call _key_expansion_256a + call _key_expansion_256a_local - SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* Open Solaris Interface */ mov $14, %rax // return # rounds = 14 #endif + FRAME_END ret .align 4 @@ -493,28 +392,28 @@ rijndael_key_setup_enc_intel_local: movq 0x10(%USERCIPHERKEY), %xmm2 // other user key aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key - call _key_expansion_192a + call _key_expansion_192a_local aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key - call _key_expansion_192b + call _key_expansion_192b_local aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key - call _key_expansion_192a + call _key_expansion_192a_local aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key - call _key_expansion_192b + call _key_expansion_192b_local aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key - call _key_expansion_192a + call _key_expansion_192a_local aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key - call _key_expansion_192b + call _key_expansion_192b_local aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key - call _key_expansion_192a + call _key_expansion_192a_local aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key - call _key_expansion_192b + call _key_expansion_192b_local - SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* OpenSolaris Interface */ mov $12, %rax // return # rounds = 12 #endif + FRAME_END ret .align 4 @@ -529,51 +428,50 @@ rijndael_key_setup_enc_intel_local: #endif /* OPENSSL_INTERFACE */ aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key - call _key_expansion_128 + call _key_expansion_128_local - SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* OpenSolaris Interface */ mov $10, %rax // return # rounds = 10 #endif + FRAME_END ret .Lenc_key_invalid_param: #ifdef OPENSSL_INTERFACE - SET_TS_OR_POP_XMM0_TO_XMM6(%r10) mov $-1, %rax // user key or AES key pointer is NULL + FRAME_END ret #else /* FALLTHROUGH */ #endif /* OPENSSL_INTERFACE */ .Lenc_key_invalid_key_bits: - SET_TS_OR_POP_XMM0_TO_XMM6(%r10) #ifdef OPENSSL_INTERFACE mov $-2, %rax // keysize is invalid #else /* Open Solaris Interface */ xor %rax, %rax // a key pointer is NULL or invalid keysize #endif /* OPENSSL_INTERFACE */ - + FRAME_END ret SET_SIZE(rijndael_key_setup_enc_intel) @@ -599,7 +497,9 @@ rijndael_key_setup_enc_intel_local: * const int bits, AES_KEY *key); * Return value is non-zero on error, 0 on success. */ + ENTRY_NP(rijndael_key_setup_dec_intel) +FRAME_BEGIN // Generate round keys used for encryption call rijndael_key_setup_enc_intel_local test %rax, %rax @@ -609,8 +509,6 @@ ENTRY_NP(rijndael_key_setup_dec_intel) jz .Ldec_key_exit // Failed if returned 0 #endif /* OPENSSL_INTERFACE */ - CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) - /* * Convert round keys used for encryption * to a form usable for decryption @@ -647,11 +545,10 @@ ENTRY_NP(rijndael_key_setup_dec_intel) cmp %ENDAESKEY, %rcx jnz .Ldec_key_inv_loop - SET_TS_OR_POP_XMM0_XMM1(%r10) - .Ldec_key_exit: // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error // OpenSSL: rax = 0 for OK, or non-zero for error + FRAME_END ret SET_SIZE(rijndael_key_setup_dec_intel) @@ -701,8 +598,8 @@ ENTRY_NP(rijndael_key_setup_dec_intel) #define STATE xmm0 /* temporary, 128 bits */ #define KEY xmm1 /* temporary, 128 bits */ + ENTRY_NP(aes_encrypt_intel) - CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) movups (%INP), %STATE // input movaps (%KEYP), %KEY // key @@ -759,7 +656,6 @@ ENTRY_NP(aes_encrypt_intel) aesenclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output - SET_TS_OR_POP_XMM0_XMM1(%r10) ret SET_SIZE(aes_encrypt_intel) @@ -787,7 +683,6 @@ ENTRY_NP(aes_encrypt_intel) * const AES_KEY *key); */ ENTRY_NP(aes_decrypt_intel) - CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) movups (%INP), %STATE // input movaps (%KEYP), %KEY // key @@ -844,7 +739,6 @@ ENTRY_NP(aes_decrypt_intel) aesdeclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output - SET_TS_OR_POP_XMM0_XMM1(%r10) ret SET_SIZE(aes_decrypt_intel) diff --git a/module/icp/asm-x86_64/modes/gcm_intel.S b/module/icp/asm-x86_64/modes/gcm_intel.S index 109f9b47b..a43b5ebcb 100644 --- a/module/icp/asm-x86_64/modes/gcm_intel.S +++ b/module/icp/asm-x86_64/modes/gcm_intel.S @@ -95,99 +95,18 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { #define _ASM #include <sys/asm_linkage.h> -#ifdef _KERNEL - /* - * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is, - * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it - * uses it to pass P2 to syscall. - * This also occurs with the STTS macro, but we dont care if - * P2 (%rsi) is modified just before function exit. - * The CLTS and STTS macros push and pop P1 (%rdi) already. - */ -#ifdef __xpv -#define PROTECTED_CLTS \ - push %rsi; \ - CLTS; \ - pop %rsi -#else -#define PROTECTED_CLTS \ - CLTS -#endif /* __xpv */ - - /* - * If CR0_TS is not set, align stack (with push %rbp) and push - * %xmm0 - %xmm10 on stack, otherwise clear CR0_TS - */ -#define CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg) \ - push %rbp; \ - mov %rsp, %rbp; \ - movq %cr0, tmpreg; \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - and $-XMM_ALIGN, %rsp; \ - sub $[XMM_SIZE * 11], %rsp; \ - movaps %xmm0, 160(%rsp); \ - movaps %xmm1, 144(%rsp); \ - movaps %xmm2, 128(%rsp); \ - movaps %xmm3, 112(%rsp); \ - movaps %xmm4, 96(%rsp); \ - movaps %xmm5, 80(%rsp); \ - movaps %xmm6, 64(%rsp); \ - movaps %xmm7, 48(%rsp); \ - movaps %xmm8, 32(%rsp); \ - movaps %xmm9, 16(%rsp); \ - movaps %xmm10, (%rsp); \ - jmp 2f; \ -1: \ - PROTECTED_CLTS; \ -2: - - - /* - * If CR0_TS was not set above, pop %xmm0 - %xmm10 off stack, - * otherwise set CR0_TS. - */ -#define SET_TS_OR_POP_XMM_REGISTERS(tmpreg) \ - testq $CR0_TS, tmpreg; \ - jnz 1f; \ - movaps (%rsp), %xmm10; \ - movaps 16(%rsp), %xmm9; \ - movaps 32(%rsp), %xmm8; \ - movaps 48(%rsp), %xmm7; \ - movaps 64(%rsp), %xmm6; \ - movaps 80(%rsp), %xmm5; \ - movaps 96(%rsp), %xmm4; \ - movaps 112(%rsp), %xmm3; \ - movaps 128(%rsp), %xmm2; \ - movaps 144(%rsp), %xmm1; \ - movaps 160(%rsp), %xmm0; \ - jmp 2f; \ -1: \ - STTS(tmpreg); \ -2: \ - mov %rbp, %rsp; \ - pop %rbp - - -#else -#define PROTECTED_CLTS -#define CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg) -#define SET_TS_OR_POP_XMM_REGISTERS(tmpreg) -#endif /* _KERNEL */ - /* * Use this mask to byte-swap a 16-byte integer with the pshufb instruction */ // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; -.text +.data .align XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - /* * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res); * @@ -221,8 +140,6 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { */ ENTRY_NP(gcm_mul_pclmulqdq) - CLEAR_TS_OR_PUSH_XMM_REGISTERS(%r10) - // // Copy Parameters // @@ -325,9 +242,8 @@ ENTRY_NP(gcm_mul_pclmulqdq) // - // Cleanup and Return + // Return // - SET_TS_OR_POP_XMM_REGISTERS(%r10) ret SET_SIZE(gcm_mul_pclmulqdq) diff --git a/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/module/icp/asm-x86_64/sha1/sha1-x86_64.S index 6fb4ac5da..cb923784a 100644 --- a/module/icp/asm-x86_64/sha1/sha1-x86_64.S +++ b/module/icp/asm-x86_64/sha1/sha1-x86_64.S @@ -1,15 +1,15 @@ /* * !/usr/bin/env perl - * + * * ==================================================================== * Written by Andy Polyakov <[email protected]> for the OpenSSL * project. The module is, however, dual licensed under OpenSSL and * CRYPTOGAMS licenses depending on where you obtain it. For further * details see http://www.openssl.org/~appro/cryptogams/. * ==================================================================== - * + * * sha1_block procedure for x86_64. - * + * * It was brought to my attention that on EM64T compiler-generated code * was far behind 32-bit assembler implementation. This is unlike on * Opteron where compiler-generated code was only 15% behind 32-bit @@ -24,29 +24,29 @@ * core must have managed to run-time optimize even 32-bit code just as * good as 64-bit one. Performance improvement is summarized in the * following table: - * + * * gcc 3.4 32-bit asm cycles/byte * Opteron +45% +20% 6.8 * Xeon P4 +65% +0% 9.9 * Core2 +60% +10% 7.0 - * - * + * + * * OpenSolaris OS modifications - * + * * Sun elects to use this software under the BSD license. - * + * * This source originates from OpenSSL file sha1-x86_64.pl at * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz * (presumably for future OpenSSL release 0.9.8h), with these changes: - * + * * 1. Added perl "use strict" and declared variables. - * + * * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. - * + * * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) * assemblers). - * + * */ /* @@ -58,6 +58,7 @@ #include <sys/stdint.h> #include <sys/sha1.h> + /* ARGSUSED */ void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks) @@ -86,7 +87,7 @@ ENTRY_NP(sha1_block_data_order) mov 16(%r8),%r11d .align 4 .Lloop: - mov 0(%r9),%eax + mov 0(%r9),%eax bswap %eax mov %eax,0(%rsp) lea 0x5a827999(%eax,%r11d),%r12d @@ -94,7 +95,7 @@ ENTRY_NP(sha1_block_data_order) mov 4(%r9),%eax mov %edx,%r11d xor %ebp,%ebx - bswap %eax + bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,4(%rsp) @@ -107,7 +108,7 @@ ENTRY_NP(sha1_block_data_order) mov 8(%r9),%eax mov %r12d,%ebp xor %edi,%ebx - bswap %eax + bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,8(%rsp) @@ -120,7 +121,7 @@ ENTRY_NP(sha1_block_data_order) mov 12(%r9),%eax mov %r11d,%edi xor %esi,%ebx - bswap %eax + bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,12(%rsp) @@ -133,7 +134,7 @@ ENTRY_NP(sha1_block_data_order) mov 16(%r9),%eax mov %ebp,%esi xor %edx,%ebx - bswap %eax + bswap %eax rol $5,%esi and %r11d,%ebx mov %eax,16(%rsp) @@ -146,7 +147,7 @@ ENTRY_NP(sha1_block_data_order) mov 20(%r9),%eax mov %edi,%edx xor %r12d,%ebx - bswap %eax + bswap %eax rol $5,%edx and %ebp,%ebx mov %eax,20(%rsp) @@ -159,7 +160,7 @@ ENTRY_NP(sha1_block_data_order) mov 24(%r9),%eax mov %esi,%r12d xor %r11d,%ebx - bswap %eax + bswap %eax rol $5,%r12d and %edi,%ebx mov %eax,24(%rsp) @@ -172,7 +173,7 @@ ENTRY_NP(sha1_block_data_order) mov 28(%r9),%eax mov %edx,%r11d xor %ebp,%ebx - bswap %eax + bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,28(%rsp) @@ -185,7 +186,7 @@ ENTRY_NP(sha1_block_data_order) mov 32(%r9),%eax mov %r12d,%ebp xor %edi,%ebx - bswap %eax + bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,32(%rsp) @@ -198,7 +199,7 @@ ENTRY_NP(sha1_block_data_order) mov 36(%r9),%eax mov %r11d,%edi xor %esi,%ebx - bswap %eax + bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,36(%rsp) @@ -211,7 +212,7 @@ ENTRY_NP(sha1_block_data_order) mov 40(%r9),%eax mov %ebp,%esi xor %edx,%ebx - bswap %eax + bswap %eax rol $5,%esi and %r11d,%ebx mov %eax,40(%rsp) @@ -224,7 +225,7 @@ ENTRY_NP(sha1_block_data_order) mov 44(%r9),%eax mov %edi,%edx xor %r12d,%ebx - bswap %eax + bswap %eax rol $5,%edx and %ebp,%ebx mov %eax,44(%rsp) @@ -237,7 +238,7 @@ ENTRY_NP(sha1_block_data_order) mov 48(%r9),%eax mov %esi,%r12d xor %r11d,%ebx - bswap %eax + bswap %eax rol $5,%r12d and %edi,%ebx mov %eax,48(%rsp) @@ -250,7 +251,7 @@ ENTRY_NP(sha1_block_data_order) mov 52(%r9),%eax mov %edx,%r11d xor %ebp,%ebx - bswap %eax + bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,52(%rsp) @@ -263,7 +264,7 @@ ENTRY_NP(sha1_block_data_order) mov 56(%r9),%eax mov %r12d,%ebp xor %edi,%ebx - bswap %eax + bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,56(%rsp) @@ -276,7 +277,7 @@ ENTRY_NP(sha1_block_data_order) mov 60(%r9),%eax mov %r11d,%edi xor %esi,%ebx - bswap %eax + bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,60(%rsp) @@ -1341,6 +1342,8 @@ ENTRY_NP(sha1_block_data_order) pop %rbx ret SET_SIZE(sha1_block_data_order) + +.data .asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <[email protected]>" #endif /* lint || __lint */ diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S index 367795668..867dbb0f2 100644 --- a/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -62,7 +62,7 @@ */ /* - * This file was generated by a perl script (sha512-x86_64.pl) that were + * This file was generated by a perl script (sha512-x86_64.pl) that were * used to generate sha256 and sha512 variants from the same code base. * The comments from the original file have been pasted above. */ @@ -2036,6 +2036,7 @@ ENTRY_NP(SHA256TransformBlocks) ret SET_SIZE(SHA256TransformBlocks) +.data .align 64 .type K256,@object K256: diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S index 5a49cff4b..4acc72cbc 100644 --- a/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -2037,6 +2037,7 @@ ENTRY_NP(SHA512TransformBlocks) ret SET_SIZE(SHA512TransformBlocks) +.data .align 64 .type K512,@object K512: diff --git a/module/icp/include/sys/asm_linkage.h b/module/icp/include/sys/asm_linkage.h index 380597857..49a494b46 100644 --- a/module/icp/include/sys/asm_linkage.h +++ b/module/icp/include/sys/asm_linkage.h @@ -33,4 +33,14 @@ #endif +#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL) + +#include <asm/frame.h> + +#else /* userspace */ +#define FRAME_BEGIN +#define FRAME_END +#endif + + #endif /* _SYS_ASM_LINKAGE_H */ |