diff options
author | Jack Lloyd <[email protected]> | 2017-09-03 16:26:56 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-09-03 16:34:46 -0400 |
commit | b8b8a8482008b63ebf17fe77a665a69dbe4dcfb1 (patch) | |
tree | ce9db24f1e19b6db53a314e6dee687b5132fdfa1 /src/lib/block/aes/aes_armv8 | |
parent | 102948ada37eb278ac3ea248f1421f9b751c8906 (diff) |
Add support for AES extensions on ARMv8
Based on the patch in GH #1146
Diffstat (limited to 'src/lib/block/aes/aes_armv8')
-rw-r--r-- | src/lib/block/aes/aes_armv8/aes_armv8.cpp | 278 | ||||
-rw-r--r-- | src/lib/block/aes/aes_armv8/info.txt | 12 |
2 files changed, 290 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/src/lib/block/aes/aes_armv8/aes_armv8.cpp new file mode 100644 index 000000000..417854bfb --- /dev/null +++ b/src/lib/block/aes/aes_armv8/aes_armv8.cpp @@ -0,0 +1,278 @@ +/* +* AES using ARMv8 +* Contributed by Jeffrey Walton +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/loadstor.h> +#include <arm_neon.h> + +namespace Botan { + +/* +* AES-128 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = veorq_u8(vaeseq_u8(data, K9), K10); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-128 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = veorq_u8(vaesdq_u8(data, K9), K10); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-192 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = vaesmcq_u8(vaeseq_u8(data, K9)); + data = vaesmcq_u8(vaeseq_u8(data, K10)); + data = veorq_u8(vaeseq_u8(data, K11), K12); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-192 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = vaesimcq_u8(vaesdq_u8(data, K9)); + data = vaesimcq_u8(vaesdq_u8(data, K10)); + data = veorq_u8(vaesdq_u8(data, K11), K12); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-256 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_EK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(skey + 192); + const uint8x16_t K13 = vld1q_u8(skey + 208); + const uint8x16_t K14 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesmcq_u8(vaeseq_u8(data, K0)); + data = vaesmcq_u8(vaeseq_u8(data, K1)); + data = vaesmcq_u8(vaeseq_u8(data, K2)); + data = vaesmcq_u8(vaeseq_u8(data, K3)); + data = vaesmcq_u8(vaeseq_u8(data, K4)); + data = vaesmcq_u8(vaeseq_u8(data, K5)); + data = vaesmcq_u8(vaeseq_u8(data, K6)); + data = vaesmcq_u8(vaeseq_u8(data, K7)); + data = vaesmcq_u8(vaeseq_u8(data, K8)); + data = vaesmcq_u8(vaeseq_u8(data, K9)); + data = vaesmcq_u8(vaeseq_u8(data, K10)); + data = vaesmcq_u8(vaeseq_u8(data, K11)); + data = vaesmcq_u8(vaeseq_u8(data, K12)); + data = veorq_u8(vaeseq_u8(data, K13), K14); + vst1q_u8(out+16*i, data); + } + } + +/* +* AES-256 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + BOTAN_ASSERT(m_DK.empty() == false, "Key was set"); + + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0); + const uint8x16_t K1 = vld1q_u8(skey + 16); + const uint8x16_t K2 = vld1q_u8(skey + 32); + const uint8x16_t K3 = vld1q_u8(skey + 48); + const uint8x16_t K4 = vld1q_u8(skey + 64); + const uint8x16_t K5 = vld1q_u8(skey + 80); + const uint8x16_t K6 = vld1q_u8(skey + 96); + const uint8x16_t K7 = vld1q_u8(skey + 112); + const uint8x16_t K8 = vld1q_u8(skey + 128); + const uint8x16_t K9 = vld1q_u8(skey + 144); + const uint8x16_t K10 = vld1q_u8(skey + 160); + const uint8x16_t K11 = vld1q_u8(skey + 176); + const uint8x16_t K12 = vld1q_u8(skey + 192); + const uint8x16_t K13 = vld1q_u8(skey + 208); + const uint8x16_t K14 = vld1q_u8(mkey); + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t data = vld1q_u8(in+16*i); + data = vaesimcq_u8(vaesdq_u8(data, K0)); + data = vaesimcq_u8(vaesdq_u8(data, K1)); + data = vaesimcq_u8(vaesdq_u8(data, K2)); + data = vaesimcq_u8(vaesdq_u8(data, K3)); + data = vaesimcq_u8(vaesdq_u8(data, K4)); + data = vaesimcq_u8(vaesdq_u8(data, K5)); + data = vaesimcq_u8(vaesdq_u8(data, K6)); + data = vaesimcq_u8(vaesdq_u8(data, K7)); + data = vaesimcq_u8(vaesdq_u8(data, K8)); + data = vaesimcq_u8(vaesdq_u8(data, K9)); + data = vaesimcq_u8(vaesdq_u8(data, K10)); + data = vaesimcq_u8(vaesdq_u8(data, K11)); + data = vaesimcq_u8(vaesdq_u8(data, K12)); + data = veorq_u8(vaesdq_u8(data, K13), K14); + vst1q_u8(out+16*i, data); + } + } + + +} diff --git a/src/lib/block/aes/aes_armv8/info.txt b/src/lib/block/aes/aes_armv8/info.txt new file mode 100644 index 000000000..1b432556c --- /dev/null +++ b/src/lib/block/aes/aes_armv8/info.txt @@ -0,0 +1,12 @@ +<defines> +AES_ARMV8 -> 20170903 +</defines> + +<arch> +arm64 +</arch> + +<cc> +gcc:5 +clang:3.8 +</cc> |