diff options
author | Tom Caputi <[email protected]> | 2018-01-31 18:17:56 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-01-31 15:17:56 -0800 |
commit | a73c94934f6176f63c3ec4c216a84066e9b65465 (patch) | |
tree | aebd8460b1b41f82984302e929385a779ea2c7e9 /module | |
parent | f90a30ad1b32a971f62a540f8944e42f99b254ce (diff) |
Change movaps to movups in AES-NI code
Currently, the ICP contains accelerated assembly code to be
used specifically on CPUs with AES-NI enabled. This code
makes heavy use of the movaps instruction which assumes that
it will be provided aes keys that are 16 byte aligned. This
assumption seems to hold on Illumos, but on Linux some kernel
options such as 'slub_debug=P' will violate it. This patch
changes all instances of this instruction to movups which is
the same except that it can handle unaligned memory.
This patch also adds a few flags which were accidentally never
given to the assembly compiler, resulting in objtool warnings.
Reviewed by: Gvozden Neskovic <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Nathaniel R. Lewis <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #7065
Closes #7108
Diffstat (limited to 'module')
-rw-r--r-- | module/icp/Makefile.in | 1 | ||||
-rw-r--r-- | module/icp/asm-x86_64/aes/aes_intel.S | 94 | ||||
-rw-r--r-- | module/icp/asm-x86_64/modes/gcm_intel.S | 2 |
3 files changed, 49 insertions, 48 deletions
diff --git a/module/icp/Makefile.in b/module/icp/Makefile.in index 2eb9e6f1f..cfe913e1d 100644 --- a/module/icp/Makefile.in +++ b/module/icp/Makefile.in @@ -26,6 +26,7 @@ endif obj-$(CONFIG_ZFS) := $(MODULE).o asflags-y := -I$(src)/include +asflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) ccflags-y := -I$(src)/include ccflags-y += $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS) diff --git a/module/icp/asm-x86_64/aes/aes_intel.S b/module/icp/asm-x86_64/aes/aes_intel.S index ed0df75c5..a40e30fbe 100644 --- a/module/icp/asm-x86_64/aes/aes_intel.S +++ b/module/icp/asm-x86_64/aes/aes_intel.S @@ -207,7 +207,7 @@ _key_expansion_256a_local: shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm0, (%rcx) + movups %xmm0, (%rcx) add $0x10, %rcx ret nop @@ -224,18 +224,18 @@ _key_expansion_192a_local: pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm2, %xmm5 - movaps %xmm2, %xmm6 + movups %xmm2, %xmm5 + movups %xmm2, %xmm6 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, %xmm1 + movups %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 - movaps %xmm6, (%rcx) + movups %xmm6, (%rcx) shufps $0b01001110, %xmm2, %xmm1 - movaps %xmm1, 0x10(%rcx) + movups %xmm1, 0x10(%rcx) add $0x20, %rcx ret SET_SIZE(_key_expansion_192a) @@ -250,13 +250,13 @@ _key_expansion_192b_local: pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm2, %xmm5 + movups %xmm2, %xmm5 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, (%rcx) + movups %xmm0, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_192b) @@ -270,7 +270,7 @@ _key_expansion_256b_local: shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 - movaps %xmm2, (%rcx) + movups %xmm2, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_256b) @@ -327,7 +327,7 @@ rijndael_key_setup_enc_intel_local: jz .Lenc_key_invalid_param movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) - movaps %xmm0, (%AESKEY) + movups %xmm0, (%AESKEY) lea 0x10(%AESKEY), %rcx // key addr pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x @@ -341,7 +341,7 @@ rijndael_key_setup_enc_intel_local: #endif /* OPENSSL_INTERFACE */ movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) - movaps %xmm2, (%rcx) + movups %xmm2, (%rcx) add $0x10, %rcx aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key @@ -525,10 +525,10 @@ FRAME_BEGIN .align 4 .Ldec_key_reorder_loop: - movaps (%AESKEY), %xmm0 - movaps (%ROUNDS64), %xmm1 - movaps %xmm0, (%ROUNDS64) - movaps %xmm1, (%AESKEY) + movups (%AESKEY), %xmm0 + movups (%ROUNDS64), %xmm1 + movups %xmm0, (%ROUNDS64) + movups %xmm1, (%AESKEY) lea 0x10(%AESKEY), %AESKEY lea -0x10(%ROUNDS64), %ROUNDS64 cmp %AESKEY, %ROUNDS64 @@ -536,11 +536,11 @@ FRAME_BEGIN .align 4 .Ldec_key_inv_loop: - movaps (%rcx), %xmm0 + movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption // with the "AES Inverse Mix Columns" instruction aesimc %xmm0, %xmm1 - movaps %xmm1, (%rcx) + movups %xmm1, (%rcx) lea 0x10(%rcx), %rcx cmp %ENDAESKEY, %rcx jnz .Ldec_key_inv_loop @@ -602,7 +602,7 @@ FRAME_BEGIN ENTRY_NP(aes_encrypt_intel) movups (%INP), %STATE // input - movaps (%KEYP), %KEY // key + movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ @@ -618,41 +618,41 @@ ENTRY_NP(aes_encrypt_intel) // AES 256 lea 0x20(%KEYP), %KEYP - movaps -0x60(%KEYP), %KEY + movups -0x60(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x50(%KEYP), %KEY + movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc192: // AES 192 and 256 - movaps -0x40(%KEYP), %KEY + movups -0x40(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x30(%KEYP), %KEY + movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc128: // AES 128, 192, and 256 - movaps -0x20(%KEYP), %KEY + movups -0x20(%KEYP), %KEY aesenc %KEY, %STATE - movaps -0x10(%KEYP), %KEY + movups -0x10(%KEYP), %KEY aesenc %KEY, %STATE - movaps (%KEYP), %KEY + movups (%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x10(%KEYP), %KEY + movups 0x10(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x20(%KEYP), %KEY + movups 0x20(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x30(%KEYP), %KEY + movups 0x30(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x40(%KEYP), %KEY + movups 0x40(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x50(%KEYP), %KEY + movups 0x50(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x60(%KEYP), %KEY + movups 0x60(%KEYP), %KEY aesenc %KEY, %STATE - movaps 0x70(%KEYP), %KEY + movups 0x70(%KEYP), %KEY aesenclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output @@ -685,7 +685,7 @@ ENTRY_NP(aes_encrypt_intel) ENTRY_NP(aes_decrypt_intel) movups (%INP), %STATE // input - movaps (%KEYP), %KEY // key + movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ @@ -701,41 +701,41 @@ ENTRY_NP(aes_decrypt_intel) // AES 256 lea 0x20(%KEYP), %KEYP - movaps -0x60(%KEYP), %KEY + movups -0x60(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x50(%KEYP), %KEY + movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec192: // AES 192 and 256 - movaps -0x40(%KEYP), %KEY + movups -0x40(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x30(%KEYP), %KEY + movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec128: // AES 128, 192, and 256 - movaps -0x20(%KEYP), %KEY + movups -0x20(%KEYP), %KEY aesdec %KEY, %STATE - movaps -0x10(%KEYP), %KEY + movups -0x10(%KEYP), %KEY aesdec %KEY, %STATE - movaps (%KEYP), %KEY + movups (%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x10(%KEYP), %KEY + movups 0x10(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x20(%KEYP), %KEY + movups 0x20(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x30(%KEYP), %KEY + movups 0x30(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x40(%KEYP), %KEY + movups 0x40(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x50(%KEYP), %KEY + movups 0x50(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x60(%KEYP), %KEY + movups 0x60(%KEYP), %KEY aesdec %KEY, %STATE - movaps 0x70(%KEYP), %KEY + movups 0x70(%KEYP), %KEY aesdeclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output diff --git a/module/icp/asm-x86_64/modes/gcm_intel.S b/module/icp/asm-x86_64/modes/gcm_intel.S index a43b5ebcb..3aec0ee15 100644 --- a/module/icp/asm-x86_64/modes/gcm_intel.S +++ b/module/icp/asm-x86_64/modes/gcm_intel.S @@ -150,7 +150,7 @@ ENTRY_NP(gcm_mul_pclmulqdq) // Byte swap 16-byte input // lea .Lbyte_swap16_mask(%rip), %rax - movaps (%rax), %xmm10 + movups (%rax), %xmm10 pshufb %xmm10, %xmm0 pshufb %xmm10, %xmm1 |