aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2017-09-03 17:20:12 -0400
committerJack Lloyd <[email protected]>2017-09-03 17:20:12 -0400
commit23d4634462cae60e76a8f7bc7686c7d2db7f5e5f (patch)
tree27d84d34f2968055a9984ccfdf4210513fcfd03c
parent25dc61dda078e9f0d998ca2f5e571f0be6367090 (diff)
parentb8b8a8482008b63ebf17fe77a665a69dbe4dcfb1 (diff)
Merge GH #1182 Add support for ARMv8 AES instructions
-rw-r--r--src/lib/block/aes/aes.cpp62
-rw-r--r--src/lib/block/aes/aes.h15
-rw-r--r--src/lib/block/aes/aes_armv8/aes_armv8.cpp278
-rw-r--r--src/lib/block/aes/aes_armv8/info.txt12
4 files changed, 367 insertions, 0 deletions
diff --git a/src/lib/block/aes/aes.cpp b/src/lib/block/aes/aes.cpp
index 144c7bbfd..0878d84ae 100644
--- a/src/lib/block/aes/aes.cpp
+++ b/src/lib/block/aes/aes.cpp
@@ -412,6 +412,19 @@ void aes_key_schedule(const uint8_t key[], size_t length,
DK.resize(length + 24);
copy_mem(EK.data(), XEK.data(), EK.size());
copy_mem(DK.data(), XDK.data(), DK.size());
+
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ // ARM needs the subkeys to be byte reversed
+
+ for(size_t i = 0; i != EK.size(); ++i)
+ EK[i] = reverse_bytes(EK[i]);
+ for(size_t i = 0; i != DK.size(); ++i)
+ DK[i] = reverse_bytes(DK[i]);
+ }
+#endif
+
}
size_t aes_parallelism()
@@ -442,6 +455,13 @@ const char* aes_provider()
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return "armv8";
+ }
+#endif
+
return "base";
}
@@ -471,6 +491,13 @@ void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -490,6 +517,13 @@ void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
@@ -536,6 +570,13 @@ void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -555,6 +596,13 @@ void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
@@ -601,6 +649,13 @@ void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_encrypt_n(in, out, blocks);
+ }
+#endif
+
aes_encrypt_n(in, out, blocks, m_EK, m_ME);
}
@@ -620,6 +675,13 @@ void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
}
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ if(CPUID::has_arm_aes())
+ {
+ return armv8_decrypt_n(in, out, blocks);
+ }
+#endif
+
aes_decrypt_n(in, out, blocks, m_DK, m_MD);
}
diff --git a/src/lib/block/aes/aes.h b/src/lib/block/aes/aes.h
index a74280947..6adac514e 100644
--- a/src/lib/block/aes/aes.h
+++ b/src/lib/block/aes/aes.h
@@ -43,6 +43,11 @@ class BOTAN_DLL AES_128 final : public Block_Cipher_Fixed_Params<16, 16>
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
secure_vector<uint32_t> m_EK, m_DK;
secure_vector<uint8_t> m_ME, m_MD;
};
@@ -76,6 +81,11 @@ class BOTAN_DLL AES_192 final : public Block_Cipher_Fixed_Params<16, 24>
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
void key_schedule(const uint8_t key[], size_t length) override;
secure_vector<uint32_t> m_EK, m_DK;
@@ -112,6 +122,11 @@ class BOTAN_DLL AES_256 final : public Block_Cipher_Fixed_Params<16, 32>
void aesni_key_schedule(const uint8_t key[], size_t length);
#endif
+#if defined(BOTAN_HAS_AES_ARMV8)
+ void armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
void key_schedule(const uint8_t key[], size_t length) override;
secure_vector<uint32_t> m_EK, m_DK;
diff --git a/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/src/lib/block/aes/aes_armv8/aes_armv8.cpp
new file mode 100644
index 000000000..417854bfb
--- /dev/null
+++ b/src/lib/block/aes/aes_armv8/aes_armv8.cpp
@@ -0,0 +1,278 @@
+/*
+* AES using ARMv8
+* Contributed by Jeffrey Walton
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/aes.h>
+#include <botan/loadstor.h>
+#include <arm_neon.h>
+
+namespace Botan {
+
+/*
+* AES-128 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesmcq_u8(vaeseq_u8(data, K0));
+ data = vaesmcq_u8(vaeseq_u8(data, K1));
+ data = vaesmcq_u8(vaeseq_u8(data, K2));
+ data = vaesmcq_u8(vaeseq_u8(data, K3));
+ data = vaesmcq_u8(vaeseq_u8(data, K4));
+ data = vaesmcq_u8(vaeseq_u8(data, K5));
+ data = vaesmcq_u8(vaeseq_u8(data, K6));
+ data = vaesmcq_u8(vaeseq_u8(data, K7));
+ data = vaesmcq_u8(vaeseq_u8(data, K8));
+ data = veorq_u8(vaeseq_u8(data, K9), K10);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+/*
+* AES-128 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesimcq_u8(vaesdq_u8(data, K0));
+ data = vaesimcq_u8(vaesdq_u8(data, K1));
+ data = vaesimcq_u8(vaesdq_u8(data, K2));
+ data = vaesimcq_u8(vaesdq_u8(data, K3));
+ data = vaesimcq_u8(vaesdq_u8(data, K4));
+ data = vaesimcq_u8(vaesdq_u8(data, K5));
+ data = vaesimcq_u8(vaesdq_u8(data, K6));
+ data = vaesimcq_u8(vaesdq_u8(data, K7));
+ data = vaesimcq_u8(vaesdq_u8(data, K8));
+ data = veorq_u8(vaesdq_u8(data, K9), K10);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+/*
+* AES-192 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesmcq_u8(vaeseq_u8(data, K0));
+ data = vaesmcq_u8(vaeseq_u8(data, K1));
+ data = vaesmcq_u8(vaeseq_u8(data, K2));
+ data = vaesmcq_u8(vaeseq_u8(data, K3));
+ data = vaesmcq_u8(vaeseq_u8(data, K4));
+ data = vaesmcq_u8(vaeseq_u8(data, K5));
+ data = vaesmcq_u8(vaeseq_u8(data, K6));
+ data = vaesmcq_u8(vaeseq_u8(data, K7));
+ data = vaesmcq_u8(vaeseq_u8(data, K8));
+ data = vaesmcq_u8(vaeseq_u8(data, K9));
+ data = vaesmcq_u8(vaeseq_u8(data, K10));
+ data = veorq_u8(vaeseq_u8(data, K11), K12);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+/*
+* AES-192 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesimcq_u8(vaesdq_u8(data, K0));
+ data = vaesimcq_u8(vaesdq_u8(data, K1));
+ data = vaesimcq_u8(vaesdq_u8(data, K2));
+ data = vaesimcq_u8(vaesdq_u8(data, K3));
+ data = vaesimcq_u8(vaesdq_u8(data, K4));
+ data = vaesimcq_u8(vaesdq_u8(data, K5));
+ data = vaesimcq_u8(vaesdq_u8(data, K6));
+ data = vaesimcq_u8(vaesdq_u8(data, K7));
+ data = vaesimcq_u8(vaesdq_u8(data, K8));
+ data = vaesimcq_u8(vaesdq_u8(data, K9));
+ data = vaesimcq_u8(vaesdq_u8(data, K10));
+ data = veorq_u8(vaesdq_u8(data, K11), K12);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+/*
+* AES-256 Encryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_EK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(skey + 192);
+ const uint8x16_t K13 = vld1q_u8(skey + 208);
+ const uint8x16_t K14 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesmcq_u8(vaeseq_u8(data, K0));
+ data = vaesmcq_u8(vaeseq_u8(data, K1));
+ data = vaesmcq_u8(vaeseq_u8(data, K2));
+ data = vaesmcq_u8(vaeseq_u8(data, K3));
+ data = vaesmcq_u8(vaeseq_u8(data, K4));
+ data = vaesmcq_u8(vaeseq_u8(data, K5));
+ data = vaesmcq_u8(vaeseq_u8(data, K6));
+ data = vaesmcq_u8(vaeseq_u8(data, K7));
+ data = vaesmcq_u8(vaeseq_u8(data, K8));
+ data = vaesmcq_u8(vaeseq_u8(data, K9));
+ data = vaesmcq_u8(vaeseq_u8(data, K10));
+ data = vaesmcq_u8(vaeseq_u8(data, K11));
+ data = vaesmcq_u8(vaeseq_u8(data, K12));
+ data = veorq_u8(vaeseq_u8(data, K13), K14);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+/*
+* AES-256 Decryption
+*/
+BOTAN_FUNC_ISA("+crypto")
+void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ BOTAN_ASSERT(m_DK.empty() == false, "Key was set");
+
+ const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
+ const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
+
+ const uint8x16_t K0 = vld1q_u8(skey + 0);
+ const uint8x16_t K1 = vld1q_u8(skey + 16);
+ const uint8x16_t K2 = vld1q_u8(skey + 32);
+ const uint8x16_t K3 = vld1q_u8(skey + 48);
+ const uint8x16_t K4 = vld1q_u8(skey + 64);
+ const uint8x16_t K5 = vld1q_u8(skey + 80);
+ const uint8x16_t K6 = vld1q_u8(skey + 96);
+ const uint8x16_t K7 = vld1q_u8(skey + 112);
+ const uint8x16_t K8 = vld1q_u8(skey + 128);
+ const uint8x16_t K9 = vld1q_u8(skey + 144);
+ const uint8x16_t K10 = vld1q_u8(skey + 160);
+ const uint8x16_t K11 = vld1q_u8(skey + 176);
+ const uint8x16_t K12 = vld1q_u8(skey + 192);
+ const uint8x16_t K13 = vld1q_u8(skey + 208);
+ const uint8x16_t K14 = vld1q_u8(mkey);
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint8x16_t data = vld1q_u8(in+16*i);
+ data = vaesimcq_u8(vaesdq_u8(data, K0));
+ data = vaesimcq_u8(vaesdq_u8(data, K1));
+ data = vaesimcq_u8(vaesdq_u8(data, K2));
+ data = vaesimcq_u8(vaesdq_u8(data, K3));
+ data = vaesimcq_u8(vaesdq_u8(data, K4));
+ data = vaesimcq_u8(vaesdq_u8(data, K5));
+ data = vaesimcq_u8(vaesdq_u8(data, K6));
+ data = vaesimcq_u8(vaesdq_u8(data, K7));
+ data = vaesimcq_u8(vaesdq_u8(data, K8));
+ data = vaesimcq_u8(vaesdq_u8(data, K9));
+ data = vaesimcq_u8(vaesdq_u8(data, K10));
+ data = vaesimcq_u8(vaesdq_u8(data, K11));
+ data = vaesimcq_u8(vaesdq_u8(data, K12));
+ data = veorq_u8(vaesdq_u8(data, K13), K14);
+ vst1q_u8(out+16*i, data);
+ }
+ }
+
+
+}
diff --git a/src/lib/block/aes/aes_armv8/info.txt b/src/lib/block/aes/aes_armv8/info.txt
new file mode 100644
index 000000000..1b432556c
--- /dev/null
+++ b/src/lib/block/aes/aes_armv8/info.txt
@@ -0,0 +1,12 @@
+<defines>
+AES_ARMV8 -> 20170903
+</defines>
+
+<arch>
+arm64
+</arch>
+
+<cc>
+gcc:5
+clang:3.8
+</cc>