aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2018-07-10 09:53:51 -0400
committerJack Lloyd <[email protected]>2018-07-10 09:53:51 -0400
commit65127cd2fc15abeaef5db2301d4cff89b16e615a (patch)
tree1644cbd4cf3479f092122a44eaef8d568efdd5b6
parentf197e17a70a4b0c2b601a6c4cf28784592b596b8 (diff)
parent19b1dd0bb0dc12576659dc5a7a48212a2993e3b2 (diff)
Merge GH #1622 Add support for ARMv8 SM4 instructions
-rw-r--r--src/build-data/arch/arm64.txt4
-rw-r--r--src/build-data/cc/gcc.txt4
-rw-r--r--src/lib/block/sm4/sm4.cpp36
-rw-r--r--src/lib/block/sm4/sm4.h8
-rw-r--r--src/lib/block/sm4/sm4_armv8/info.txt9
-rw-r--r--src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp168
-rw-r--r--src/lib/utils/cpuid/cpuid.cpp14
-rw-r--r--src/lib/utils/cpuid/cpuid.h46
-rw-r--r--src/lib/utils/cpuid/cpuid_arm.cpp18
-rw-r--r--src/tests/data/block/sm4.vec12
10 files changed, 314 insertions, 5 deletions
diff --git a/src/build-data/arch/arm64.txt b/src/build-data/arch/arm64.txt
index d4781c874..056c5318f 100644
--- a/src/build-data/arch/arm64.txt
+++ b/src/build-data/arch/arm64.txt
@@ -12,4 +12,8 @@ armv8-a
<isa_extensions>
neon
armv8crypto
+armv8sm3
+armv8sm4
+armv8sha3
+armv8sha512
</isa_extensions>
diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt
index 98f1a2167..01baeafcd 100644
--- a/src/build-data/cc/gcc.txt
+++ b/src/build-data/cc/gcc.txt
@@ -65,6 +65,10 @@ altivec -> "-maltivec"
ppccrypto -> "-mcrypto"
arm64:armv8crypto -> ""
+arm64:armv8sm3 -> "-march=armv8.2-a+sm4"
+arm64:armv8sm4 -> "-march=armv8.2-a+sm4"
+arm64:armv8sha512 -> "-march=armv8.2-a+sha3"
+arm64:armv8sha3 -> "-march=armv8.2-a+sha3"
# For Aarch32 -mfpu=neon is required
# For Aarch64 NEON is enabled by default
diff --git a/src/lib/block/sm4/sm4.cpp b/src/lib/block/sm4/sm4.cpp
index 7c409d40f..7a370a67b 100644
--- a/src/lib/block/sm4/sm4.cpp
+++ b/src/lib/block/sm4/sm4.cpp
@@ -1,12 +1,14 @@
/*
* SM4
* (C) 2017 Ribose Inc
+* (C) 2018 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#include <botan/sm4.h>
#include <botan/loadstor.h>
+#include <botan/cpuid.h>
namespace Botan {
@@ -126,6 +128,11 @@ void SM4::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_RK.empty() == false);
+#if defined(BOTAN_HAS_SM4_ARMV8)
+ if(CPUID::has_arm_sm4())
+ return sm4_armv8_encrypt(in, out, blocks);
+#endif
+
for(size_t i = 0; i != blocks; ++i)
{
uint32_t B0 = load_be<uint32_t>(in, 0);
@@ -156,6 +163,11 @@ void SM4::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
verify_key_set(m_RK.empty() == false);
+#if defined(BOTAN_HAS_SM4_ARMV8)
+ if(CPUID::has_arm_sm4())
+ return sm4_armv8_decrypt(in, out, blocks);
+#endif
+
for(size_t i = 0; i != blocks; ++i)
{
uint32_t B0 = load_be<uint32_t>(in, 0);
@@ -219,4 +231,28 @@ void SM4::clear()
zap(m_RK);
}
+size_t SM4::parallelism() const
+ {
+#if defined(BOTAN_HAS_SM4_ARMV8)
+ if(CPUID::has_arm_sm4())
+ {
+ return 4;
+ }
+#endif
+
+ return 1;
+ }
+
+std::string SM4::provider() const
+ {
+#if defined(BOTAN_HAS_SM4_ARMV8)
+ if(CPUID::has_arm_sm4())
+ {
+ return "armv8";
+ }
+#endif
+
+ return "base";
+ }
+
}
diff --git a/src/lib/block/sm4/sm4.h b/src/lib/block/sm4/sm4.h
index bf3fa59dc..7d1d8ba98 100644
--- a/src/lib/block/sm4/sm4.h
+++ b/src/lib/block/sm4/sm4.h
@@ -24,9 +24,17 @@ class BOTAN_PUBLIC_API(2,2) SM4 final : public Block_Cipher_Fixed_Params<16, 16>
void clear() override;
std::string name() const override { return "SM4"; }
BlockCipher* clone() const override { return new SM4; }
+
+ std::string provider() const override;
+ size_t parallelism() const override;
private:
void key_schedule(const uint8_t[], size_t) override;
+#if defined(BOTAN_HAS_SM4_ARMV8)
+ void sm4_armv8_encrypt(const uint8_t in[], uint8_t out[], size_t blocks) const;
+ void sm4_armv8_decrypt(const uint8_t in[], uint8_t out[], size_t blocks) const;
+#endif
+
secure_vector<uint32_t> m_RK;
};
diff --git a/src/lib/block/sm4/sm4_armv8/info.txt b/src/lib/block/sm4/sm4_armv8/info.txt
new file mode 100644
index 000000000..7f503d361
--- /dev/null
+++ b/src/lib/block/sm4/sm4_armv8/info.txt
@@ -0,0 +1,9 @@
+<defines>
+SM4_ARMV8 -> 20180709
+</defines>
+
+need_isa armv8sm4
+
+<cc>
+gcc:8
+</cc>
diff --git a/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp b/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp
new file mode 100644
index 000000000..b91004aea
--- /dev/null
+++ b/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp
@@ -0,0 +1,168 @@
+/*
+* (C) 2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/sm4.h>
+#include <arm_neon.h>
+
+namespace Botan {
+
+namespace {
+
+inline uint32x4_t qswap_32(uint32x4_t B)
+ {
+ static const uint8x16_t tbl = (uint8x16_t){12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };
+ return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl));
+ }
+
+inline uint32x4_t bswap_32(uint32x4_t B)
+ {
+ return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B)));
+ }
+
+/*
+ Swap both the quad-words and bytes within each word
+ equivalent to return bswap_32(qswap_32(B))
+*/
+inline uint32x4_t bqswap_32(uint32x4_t B)
+ {
+ static const uint8x16_t tbl = (uint8x16_t){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl));
+ }
+
+#define SM4_E(B0, B1, B2, B3, K) do { \
+ B0 = vsm4eq_u32(B0, K); \
+ B1 = vsm4eq_u32(B1, K); \
+ B2 = vsm4eq_u32(B2, K); \
+ B3 = vsm4eq_u32(B3, K); \
+ } while(0)
+
+}
+
+BOTAN_FUNC_ISA("+sm4")
+void SM4::sm4_armv8_encrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const
+ {
+ const uint32x4_t K0 = vld1q_u32(&m_RK[ 0]);
+ const uint32x4_t K1 = vld1q_u32(&m_RK[ 4]);
+ const uint32x4_t K2 = vld1q_u32(&m_RK[ 8]);
+ const uint32x4_t K3 = vld1q_u32(&m_RK[12]);
+ const uint32x4_t K4 = vld1q_u32(&m_RK[16]);
+ const uint32x4_t K5 = vld1q_u32(&m_RK[20]);
+ const uint32x4_t K6 = vld1q_u32(&m_RK[24]);
+ const uint32x4_t K7 = vld1q_u32(&m_RK[28]);
+
+ const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8));
+ uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8));
+
+ while(blocks >= 4)
+ {
+ uint32x4_t B0 = bswap_32(vld1q_u32(input32));
+ uint32x4_t B1 = bswap_32(vld1q_u32(input32+4));
+ uint32x4_t B2 = bswap_32(vld1q_u32(input32+8));
+ uint32x4_t B3 = bswap_32(vld1q_u32(input32+12));
+
+ SM4_E(B0, B1, B2, B3, K0);
+ SM4_E(B0, B1, B2, B3, K1);
+ SM4_E(B0, B1, B2, B3, K2);
+ SM4_E(B0, B1, B2, B3, K3);
+ SM4_E(B0, B1, B2, B3, K4);
+ SM4_E(B0, B1, B2, B3, K5);
+ SM4_E(B0, B1, B2, B3, K6);
+ SM4_E(B0, B1, B2, B3, K7);
+
+ vst1q_u32(output32 , bqswap_32(B0));
+ vst1q_u32(output32+ 4, bqswap_32(B1));
+ vst1q_u32(output32+ 8, bqswap_32(B2));
+ vst1q_u32(output32+12, bqswap_32(B3));
+
+ input32 += 4*4;
+ output32 += 4*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32x4_t B = bswap_32(vld1q_u32(input32));
+
+ B = vsm4eq_u32(B, K0);
+ B = vsm4eq_u32(B, K1);
+ B = vsm4eq_u32(B, K2);
+ B = vsm4eq_u32(B, K3);
+ B = vsm4eq_u32(B, K4);
+ B = vsm4eq_u32(B, K5);
+ B = vsm4eq_u32(B, K6);
+ B = vsm4eq_u32(B, K7);
+
+ vst1q_u32(output32, bqswap_32(B));
+
+ input32 += 4;
+ output32 += 4;
+ }
+ }
+
+BOTAN_FUNC_ISA("+sm4")
+void SM4::sm4_armv8_decrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const
+ {
+ const uint32x4_t K0 = qswap_32(vld1q_u32(&m_RK[ 0]));
+ const uint32x4_t K1 = qswap_32(vld1q_u32(&m_RK[ 4]));
+ const uint32x4_t K2 = qswap_32(vld1q_u32(&m_RK[ 8]));
+ const uint32x4_t K3 = qswap_32(vld1q_u32(&m_RK[12]));
+ const uint32x4_t K4 = qswap_32(vld1q_u32(&m_RK[16]));
+ const uint32x4_t K5 = qswap_32(vld1q_u32(&m_RK[20]));
+ const uint32x4_t K6 = qswap_32(vld1q_u32(&m_RK[24]));
+ const uint32x4_t K7 = qswap_32(vld1q_u32(&m_RK[28]));
+
+ const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8));
+ uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8));
+
+ while(blocks >= 4)
+ {
+ uint32x4_t B0 = bswap_32(vld1q_u32(input32));
+ uint32x4_t B1 = bswap_32(vld1q_u32(input32+4));
+ uint32x4_t B2 = bswap_32(vld1q_u32(input32+8));
+ uint32x4_t B3 = bswap_32(vld1q_u32(input32+12));
+
+ SM4_E(B0, B1, B2, B3, K7);
+ SM4_E(B0, B1, B2, B3, K6);
+ SM4_E(B0, B1, B2, B3, K5);
+ SM4_E(B0, B1, B2, B3, K4);
+ SM4_E(B0, B1, B2, B3, K3);
+ SM4_E(B0, B1, B2, B3, K2);
+ SM4_E(B0, B1, B2, B3, K1);
+ SM4_E(B0, B1, B2, B3, K0);
+
+ vst1q_u32(output32 , bqswap_32(B0));
+ vst1q_u32(output32+ 4, bqswap_32(B1));
+ vst1q_u32(output32+ 8, bqswap_32(B2));
+ vst1q_u32(output32+12, bqswap_32(B3));
+
+ input32 += 4*4;
+ output32 += 4*4;
+ blocks -= 4;
+ }
+
+ for(size_t i = 0; i != blocks; ++i)
+ {
+ uint32x4_t B = bswap_32(vld1q_u32(input32));
+
+ B = vsm4eq_u32(B, K7);
+ B = vsm4eq_u32(B, K6);
+ B = vsm4eq_u32(B, K5);
+ B = vsm4eq_u32(B, K4);
+ B = vsm4eq_u32(B, K3);
+ B = vsm4eq_u32(B, K2);
+ B = vsm4eq_u32(B, K1);
+ B = vsm4eq_u32(B, K0);
+
+ vst1q_u32(output32, bqswap_32(B));
+
+ input32 += 4;
+ output32 += 4;
+ }
+ }
+
+#undef SM4_E
+
+}
diff --git a/src/lib/utils/cpuid/cpuid.cpp b/src/lib/utils/cpuid/cpuid.cpp
index 3938c7242..c5d39b68e 100644
--- a/src/lib/utils/cpuid/cpuid.cpp
+++ b/src/lib/utils/cpuid/cpuid.cpp
@@ -64,10 +64,16 @@ std::string CPUID::to_string()
#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
CPUID_PRINT(neon);
+ CPUID_PRINT(arm_sve);
+
CPUID_PRINT(arm_sha1);
CPUID_PRINT(arm_sha2);
CPUID_PRINT(arm_aes);
CPUID_PRINT(arm_pmull);
+ CPUID_PRINT(arm_sha2_512);
+ CPUID_PRINT(arm_sha3);
+ CPUID_PRINT(arm_sm3);
+ CPUID_PRINT(arm_sm4);
#endif
#undef CPUID_PRINT
@@ -168,6 +174,14 @@ CPUID::bit_from_string(const std::string& tok)
return {Botan::CPUID::CPUID_ARM_AES_BIT};
if(tok == "armv8pmull")
return {Botan::CPUID::CPUID_ARM_PMULL_BIT};
+ if(tok == "armv8sha3")
+ return {Botan::CPUID::CPUID_ARM_SHA3_BIT};
+ if(tok == "armv8sha2_512")
+ return {Botan::CPUID::CPUID_ARM_SHA2_512_BIT};
+ if(tok == "armv8sm3")
+ return {Botan::CPUID::CPUID_ARM_SM3_BIT};
+ if(tok == "armv8sm4")
+ return {Botan::CPUID::CPUID_ARM_SM4_BIT};
#else
BOTAN_UNUSED(tok);
diff --git a/src/lib/utils/cpuid/cpuid.h b/src/lib/utils/cpuid/cpuid.h
index 633824a6c..95f6d687b 100644
--- a/src/lib/utils/cpuid/cpuid.h
+++ b/src/lib/utils/cpuid/cpuid.h
@@ -114,11 +114,16 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
#endif
#if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
- CPUID_ARM_NEON_BIT = (1ULL << 0),
- CPUID_ARM_AES_BIT = (1ULL << 16),
- CPUID_ARM_PMULL_BIT = (1ULL << 17),
- CPUID_ARM_SHA1_BIT = (1ULL << 18),
- CPUID_ARM_SHA2_BIT = (1ULL << 19),
+ CPUID_ARM_NEON_BIT = (1ULL << 0),
+ CPUID_ARM_SVE_BIT = (1ULL << 1),
+ CPUID_ARM_AES_BIT = (1ULL << 16),
+ CPUID_ARM_PMULL_BIT = (1ULL << 17),
+ CPUID_ARM_SHA1_BIT = (1ULL << 18),
+ CPUID_ARM_SHA2_BIT = (1ULL << 19),
+ CPUID_ARM_SHA3_BIT = (1ULL << 20),
+ CPUID_ARM_SHA2_512_BIT = (1ULL << 21),
+ CPUID_ARM_SM3_BIT = (1ULL << 22),
+ CPUID_ARM_SM4_BIT = (1ULL << 23),
#endif
CPUID_INITIALIZED_BIT = (1ULL << 63)
@@ -147,6 +152,12 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
{ return has_cpuid_bit(CPUID_ARM_NEON_BIT); }
/**
+ * Check if the processor supports ARMv8 SVE
+ */
+ static bool has_arm_sve()
+ { return has_cpuid_bit(CPUID_ARM_SVE_BIT); }
+
+ /**
* Check if the processor supports ARMv8 SHA1
*/
static bool has_arm_sha1()
@@ -169,6 +180,31 @@ class BOTAN_PUBLIC_API(2,1) CPUID final
*/
static bool has_arm_pmull()
{ return has_cpuid_bit(CPUID_ARM_PMULL_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SHA-512
+ */
+ static bool has_arm_sha2_512()
+ { return has_cpuid_bit(CPUID_ARM_SHA2_512_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SHA-3
+ */
+ static bool has_arm_sha3()
+ { return has_cpuid_bit(CPUID_ARM_SHA3_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SM3
+ */
+ static bool has_arm_sm3()
+ { return has_cpuid_bit(CPUID_ARM_SM3_BIT); }
+
+ /**
+ * Check if the processor supports ARMv8 SM4
+ */
+ static bool has_arm_sm4()
+ { return has_cpuid_bit(CPUID_ARM_SM4_BIT); }
+
#endif
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
diff --git a/src/lib/utils/cpuid/cpuid_arm.cpp b/src/lib/utils/cpuid/cpuid_arm.cpp
index 39b6db652..beb80f7d0 100644
--- a/src/lib/utils/cpuid/cpuid_arm.cpp
+++ b/src/lib/utils/cpuid/cpuid_arm.cpp
@@ -130,6 +130,11 @@ uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
PMULL_bit = (1 << 4),
SHA1_bit = (1 << 5),
SHA2_bit = (1 << 6),
+ SHA3_bit = (1 << 17),
+ SM3_bit = (1 << 18),
+ SM4_bit = (1 << 19),
+ SHA2_512_bit = (1 << 21),
+ SVE_bit = (1 << 22),
ARCH_hwcap_neon = 16, // AT_HWCAP
ARCH_hwcap_crypto = 16, // AT_HWCAP
@@ -163,6 +168,19 @@ uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
+#if defined(BOTAN_TARGET_ARCH_IS_ARM64)
+ if(hwcap_crypto & ARM_hwcap_bit::SHA3_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA3_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SM3_bit)
+ detected_features |= CPUID::CPUID_ARM_SM3_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SM4_bit)
+ detected_features |= CPUID::CPUID_ARM_SM4_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SHA2_512_bit)
+ detected_features |= CPUID::CPUID_ARM_SHA2_512_BIT;
+ if(hwcap_crypto & ARM_hwcap_bit::SVE_bit)
+ detected_features |= CPUID::CPUID_ARM_SVE_BIT;
+#endif
+
#elif defined(BOTAN_TARGET_OS_IS_IOS)
char machine[64] = { 0 };
diff --git a/src/tests/data/block/sm4.vec b/src/tests/data/block/sm4.vec
index fbdba610f..a745cdbcf 100644
--- a/src/tests/data/block/sm4.vec
+++ b/src/tests/data/block/sm4.vec
@@ -1,3 +1,5 @@
+#test cpuid armv8sm4
+
[SM4]
Key = 0123456789abcdeffedcba9876543210
In = 0123456789abcdeffedcba9876543210
@@ -7,3 +9,13 @@ Iterations = 1000000
Key = 0123456789abcdeffedcba9876543210
In = 0123456789abcdeffedcba9876543210
Out = 595298c7c6fd271f0402f804c33d3f66
+
+
+# Random tests generated by GmSSL
+Key = 681EDF34D206965E86B3E94F536E4246
+In = F42131B002425B6F5CF52A810682A09D07BCAE6A8388E14651FED84B3749D386F4762615B32C000A165E1D722D708052BA3C19D8926356ED1491C6E4E528782F
+Out = EC4B7B1757FEE9CE455197E5BF9C3A9089F2C41ED97DBB1B74A2AD93B903BBC9F45A41052F9BF3D5B65DF8CC1C75B4CF3E1F30D57DF4B60694F566DE44484FAF
+
+Key = 781EDF34D206965E86B3E94F536E4247
+In = 9108957FF917E3D61C4EA33E53DB6EF3CBA0F0567535D66148B35A9258729C23FA598011F7C2100799451E62F3B5CF09BA1F8555B2DDAB0E4E4D8026B05AF38950C63CE25582571AA5D8EE22089C1B59229AD7A8A83C5E2384B4082E50D06EBF
+Out = 6A529AC093A5F3045AED787F70CCB7F56346F0E4C59532D418CE315B9F22A0F46255459100958F4D953A9D5667692D6D376FEB0978B52AB9C984A14D7E66F67131FFAF2CAD6549F3D9FCD7F02DF5812476F29E93DDF57932A41E83BB7B61A406