diff options
author | Jack Lloyd <[email protected]> | 2017-09-03 17:20:12 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-09-03 17:20:12 -0400 |
commit | 23d4634462cae60e76a8f7bc7686c7d2db7f5e5f (patch) | |
tree | 27d84d34f2968055a9984ccfdf4210513fcfd03c | |
parent | 25dc61dda078e9f0d998ca2f5e571f0be6367090 (diff) | |
parent | b8b8a8482008b63ebf17fe77a665a69dbe4dcfb1 (diff) |
Merge GH #1182 Add support for ARMv8 AES instructions
-rw-r--r-- | src/lib/block/aes/aes.cpp | 62 | ||||
-rw-r--r-- | src/lib/block/aes/aes.h | 15 | ||||
-rw-r--r-- | src/lib/block/aes/aes_armv8/aes_armv8.cpp | 278 | ||||
-rw-r--r-- | src/lib/block/aes/aes_armv8/info.txt | 12 |
4 files changed, 367 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp index 144c7bbfd..0878d84ae 100644 --- a/src/lib/block/aes/aes.cpp +++ b/src/lib/block/aes/aes.cpp @@ -412,6 +412,19 @@ void aes_key_schedule(const uint8_t key[], size_t length, DK.resize(length + 24); copy_mem(EK.data(), XEK.data(), EK.size()); copy_mem(DK.data(), XDK.data(), DK.size()); + +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + // ARM needs the subkeys to be byte reversed + + for(size_t i = 0; i != EK.size(); ++i) + EK[i] = reverse_bytes(EK[i]); + for(size_t i = 0; i != DK.size(); ++i) + DK[i] = reverse_bytes(DK[i]); + } +#endif + } size_t aes_parallelism() @@ -442,6 +455,13 @@ const char* aes_provider() } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return "armv8"; + } +#endif + return "base"; } @@ -471,6 +491,13 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_encrypt_n(in, out, blocks); + } +#endif + aes_encrypt_n(in, out, blocks, m_EK, m_ME); } @@ -490,6 +517,13 @@ void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_decrypt_n(in, out, blocks); + } +#endif + aes_decrypt_n(in, out, blocks, m_DK, m_MD); } @@ -536,6 +570,13 @@ void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_encrypt_n(in, out, blocks); + } +#endif + aes_encrypt_n(in, out, blocks, m_EK, m_ME); } @@ -555,6 +596,13 @@ void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_decrypt_n(in, out, blocks); + } +#endif + aes_decrypt_n(in, out, blocks, m_DK, m_MD); } @@ -601,6 +649,13 @@ void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_encrypt_n(in, out, blocks); + } +#endif + aes_encrypt_n(in, out, blocks, m_EK, m_ME); } @@ -620,6 +675,13 @@ void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const } #endif +#if defined(BOTAN_HAS_AES_ARMV8) + if(CPUID::has_arm_aes()) + { + return armv8_decrypt_n(in, out, blocks); + } +#endif + aes_decrypt_n(in, out, blocks, m_DK, m_MD); } diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h index a74280947..6adac514e 100644 --- a/src/lib/block/aes/aes.h +++ b/src/lib/block/aes/aes.h @@ -43,6 +43,11 @@ class BOTAN_DLL AES_128 final : public Block_Cipher_Fixed_Params<16, 16> void aesni_key_schedule(const uint8_t key[], size_t length); #endif +#if defined(BOTAN_HAS_AES_ARMV8) + void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + secure_vector<uint32_t> m_EK, m_DK; secure_vector<uint8_t> m_ME, m_MD; }; @@ -76,6 +81,11 @@ class BOTAN_DLL AES_192 final : public Block_Cipher_Fixed_Params<16, 24> void aesni_key_schedule(const uint8_t key[], size_t length); #endif +#if defined(BOTAN_HAS_AES_ARMV8) + void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + void key_schedule(const uint8_t key[], size_t length) override; secure_vector<uint32_t> m_EK, m_DK; @@ -112,6 +122,11 @@ class BOTAN_DLL AES_256 final : public Block_Cipher_Fixed_Params<16, 32> void aesni_key_schedule(const uint8_t key[], size_t length); #endif +#if defined(BOTAN_HAS_AES_ARMV8) + void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + void key_schedule(const uint8_t key[], size_t length) override; secure_vector<uint32_t> m_EK, m_DK; diff --git a/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/src/lib/block/aes/aes_armv8/aes_armv8.cpp new file mode 100644 index 000000000..417854bfb --- /dev/null +++ b/src/lib/block/aes/aes_armv8/aes_armv8.cpp @@ -0,0 +1,278 @@ +/* +* AES using ARMv8 +* Contributed by Jeffrey Walton +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/loadstor.h> +#include <arm_neon.h> + +namespace Botan { + +/* +* AES-128 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = veorq_u8(vaeseq_u8(data, K9), K10); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-128 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = veorq_u8(vaesdq_u8(data, K9), K10); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-192 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = vaesmcq_u8(vaeseq_u8(data, K9)); + data = vaesmcq_u8(vaeseq_u8(data, K10)); + data = veorq_u8(vaeseq_u8(data, K11), K12); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-192 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = vaesimcq_u8(vaesdq_u8(data, K9)); + data = vaesimcq_u8(vaesdq_u8(data, K10)); + data = veorq_u8(vaesdq_u8(data, K11), K12); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-256 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(skey + 192); + const uint8x16_t K13 = vld1q_u8(skey + 208); + const uint8x16_t K14 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = vaesmcq_u8(vaeseq_u8(data, K9)); + data = vaesmcq_u8(vaeseq_u8(data, K10)); + data = vaesmcq_u8(vaeseq_u8(data, K11)); + data = vaesmcq_u8(vaeseq_u8(data, K12)); + data = veorq_u8(vaeseq_u8(data, K13), K14); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-256 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(skey + 192); + const uint8x16_t K13 = vld1q_u8(skey + 208); + const uint8x16_t K14 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = vaesimcq_u8(vaesdq_u8(data, K9)); + data = vaesimcq_u8(vaesdq_u8(data, K10)); + data = vaesimcq_u8(vaesdq_u8(data, K11)); + data = vaesimcq_u8(vaesdq_u8(data, K12)); + data = veorq_u8(vaesdq_u8(data, K13), K14); + vst1q_u8(out+16*i, data); + } + } + + +} diff --git a/src/lib/block/aes/aes_armv8/info.txt b/src/lib/block/aes/aes_armv8/info.txt new file mode 100644 index 000000000..1b432556c --- /dev/null +++ b/src/lib/block/aes/aes_armv8/info.txt @@ -0,0 +1,12 @@ +<defines> +AES_ARMV8 -> 20170903 +</defines> + +<arch> +arm64 +</arch> + +<cc> +gcc:5 +clang:3.8 +</cc> |