diff options
author | Jack Lloyd <[email protected]> | 2018-07-10 09:53:51 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2018-07-10 09:53:51 -0400 |
commit | 65127cd2fc15abeaef5db2301d4cff89b16e615a (patch) | |
tree | 1644cbd4cf3479f092122a44eaef8d568efdd5b6 | |
parent | f197e17a70a4b0c2b601a6c4cf28784592b596b8 (diff) | |
parent | 19b1dd0bb0dc12576659dc5a7a48212a2993e3b2 (diff) |
Merge GH #1622 Add support for ARMv8 SM4 instructions
-rw-r--r-- | src/build-data/arch/arm64.txt | 4 | ||||
-rw-r--r-- | src/build-data/cc/gcc.txt | 4 | ||||
-rw-r--r-- | src/lib/block/sm4/sm4.cpp | 36 | ||||
-rw-r--r-- | src/lib/block/sm4/sm4.h | 8 | ||||
-rw-r--r-- | src/lib/block/sm4/sm4_armv8/info.txt | 9 | ||||
-rw-r--r-- | src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp | 168 | ||||
-rw-r--r-- | src/lib/utils/cpuid/cpuid.cpp | 14 | ||||
-rw-r--r-- | src/lib/utils/cpuid/cpuid.h | 46 | ||||
-rw-r--r-- | src/lib/utils/cpuid/cpuid_arm.cpp | 18 | ||||
-rw-r--r-- | src/tests/data/block/sm4.vec | 12 |
10 files changed, 314 insertions, 5 deletions
diff --git a/src/build-data/arch/arm64.txt b/src/build-data/arch/arm64.txt index d4781c874..056c5318f 100644 --- a/src/build-data/arch/arm64.txt +++ b/src/build-data/arch/arm64.txt @@ -12,4 +12,8 @@ armv8-a <isa_extensions> neon armv8crypto +armv8sm3 +armv8sm4 +armv8sha3 +armv8sha512 </isa_extensions> diff --git a/src/build-data/cc/gcc.txt b/src/build-data/cc/gcc.txt index 98f1a2167..01baeafcd 100644 --- a/src/build-data/cc/gcc.txt +++ b/src/build-data/cc/gcc.txt @@ -65,6 +65,10 @@ altivec -> "-maltivec" ppccrypto -> "-mcrypto" arm64:armv8crypto -> "" +arm64:armv8sm3 -> "-march=armv8.2-a+sm4" +arm64:armv8sm4 -> "-march=armv8.2-a+sm4" +arm64:armv8sha512 -> "-march=armv8.2-a+sha3" +arm64:armv8sha3 -> "-march=armv8.2-a+sha3" # For Aarch32 -mfpu=neon is required # For Aarch64 NEON is enabled by default diff --git a/src/lib/block/sm4/sm4.cpp b/src/lib/block/sm4/sm4.cpp index 7c409d40f..7a370a67b 100644 --- a/src/lib/block/sm4/sm4.cpp +++ b/src/lib/block/sm4/sm4.cpp @@ -1,12 +1,14 @@ /* * SM4 * (C) 2017 Ribose Inc +* (C) 2018 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ #include <botan/sm4.h> #include <botan/loadstor.h> +#include <botan/cpuid.h> namespace Botan { @@ -126,6 +128,11 @@ void SM4::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { verify_key_set(m_RK.empty() == false); +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + return sm4_armv8_encrypt(in, out, blocks); +#endif + for(size_t i = 0; i != blocks; ++i) { uint32_t B0 = load_be<uint32_t>(in, 0); @@ -156,6 +163,11 @@ void SM4::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const { verify_key_set(m_RK.empty() == false); +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + return sm4_armv8_decrypt(in, out, blocks); +#endif + for(size_t i = 0; i != blocks; ++i) { uint32_t B0 = load_be<uint32_t>(in, 0); @@ -219,4 +231,28 @@ void SM4::clear() zap(m_RK); } +size_t SM4::parallelism() const + { +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + { + return 4; + } +#endif + + return 1; + } + +std::string SM4::provider() const + { +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + { + return "armv8"; + } +#endif + + return "base"; + } + } diff --git a/src/lib/block/sm4/sm4.h b/src/lib/block/sm4/sm4.h index bf3fa59dc..7d1d8ba98 100644 --- a/src/lib/block/sm4/sm4.h +++ b/src/lib/block/sm4/sm4.h @@ -24,9 +24,17 @@ class BOTAN_PUBLIC_API(2,2) SM4 final : public Block_Cipher_Fixed_Params<16, 16> void clear() override; std::string name() const override { return "SM4"; } BlockCipher* clone() const override { return new SM4; } + + std::string provider() const override; + size_t parallelism() const override; private: void key_schedule(const uint8_t[], size_t) override; +#if defined(BOTAN_HAS_SM4_ARMV8) + void sm4_armv8_encrypt(const uint8_t in[], uint8_t out[], size_t blocks) const; + void sm4_armv8_decrypt(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + secure_vector<uint32_t> m_RK; }; diff --git a/src/lib/block/sm4/sm4_armv8/info.txt b/src/lib/block/sm4/sm4_armv8/info.txt new file mode 100644 index 000000000..7f503d361 --- /dev/null +++ b/src/lib/block/sm4/sm4_armv8/info.txt @@ -0,0 +1,9 @@ +<defines> +SM4_ARMV8 -> 20180709 +</defines> + +need_isa armv8sm4 + +<cc> +gcc:8 +</cc> diff --git a/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp b/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp new file mode 100644 index 000000000..b91004aea --- /dev/null +++ b/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp @@ -0,0 +1,168 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/sm4.h> +#include <arm_neon.h> + +namespace Botan { + +namespace { + +inline uint32x4_t qswap_32(uint32x4_t B) + { + static const uint8x16_t tbl = (uint8x16_t){12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }; + return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl)); + } + +inline uint32x4_t bswap_32(uint32x4_t B) + { + return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B))); + } + +/* + Swap both the quad-words and bytes within each word + equivalent to return bswap_32(qswap_32(B)) +*/ +inline uint32x4_t bqswap_32(uint32x4_t B) + { + static const uint8x16_t tbl = (uint8x16_t){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl)); + } + +#define SM4_E(B0, B1, B2, B3, K) do { \ + B0 = vsm4eq_u32(B0, K); \ + B1 = vsm4eq_u32(B1, K); \ + B2 = vsm4eq_u32(B2, K); \ + B3 = vsm4eq_u32(B3, K); \ + } while(0) + +} + +BOTAN_FUNC_ISA("+sm4") +void SM4::sm4_armv8_encrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const + { + const uint32x4_t K0 = vld1q_u32(&m_RK[ 0]); + const uint32x4_t K1 = vld1q_u32(&m_RK[ 4]); + const uint32x4_t K2 = vld1q_u32(&m_RK[ 8]); + const uint32x4_t K3 = vld1q_u32(&m_RK[12]); + const uint32x4_t K4 = vld1q_u32(&m_RK[16]); + const uint32x4_t K5 = vld1q_u32(&m_RK[20]); + const uint32x4_t K6 = vld1q_u32(&m_RK[24]); + const uint32x4_t K7 = vld1q_u32(&m_RK[28]); + + const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8)); + uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8)); + + while(blocks >= 4) + { + uint32x4_t B0 = bswap_32(vld1q_u32(input32)); + uint32x4_t B1 = bswap_32(vld1q_u32(input32+4)); + uint32x4_t B2 = bswap_32(vld1q_u32(input32+8)); + uint32x4_t B3 = bswap_32(vld1q_u32(input32+12)); + + SM4_E(B0, B1, B2, B3, K0); + SM4_E(B0, B1, B2, B3, K1); + SM4_E(B0, B1, B2, B3, K2); + SM4_E(B0, B1, B2, B3, K3); + SM4_E(B0, B1, B2, B3, K4); + SM4_E(B0, B1, B2, B3, K5); + SM4_E(B0, B1, B2, B3, K6); + SM4_E(B0, B1, B2, B3, K7); + + vst1q_u32(output32 , bqswap_32(B0)); + vst1q_u32(output32+ 4, bqswap_32(B1)); + vst1q_u32(output32+ 8, bqswap_32(B2)); + vst1q_u32(output32+12, bqswap_32(B3)); + + input32 += 4*4; + output32 += 4*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32x4_t B = bswap_32(vld1q_u32(input32)); + + B = vsm4eq_u32(B, K0); + B = vsm4eq_u32(B, K1); + B = vsm4eq_u32(B, K2); + B = vsm4eq_u32(B, K3); + B = vsm4eq_u32(B, K4); + B = vsm4eq_u32(B, K5); + B = vsm4eq_u32(B, K6); + B = vsm4eq_u32(B, K7); + + vst1q_u32(output32, bqswap_32(B)); + + input32 += 4; + output32 += 4; + } + } + +BOTAN_FUNC_ISA("+sm4") +void SM4::sm4_armv8_decrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const + { + const uint32x4_t K0 = qswap_32(vld1q_u32(&m_RK[ 0])); + const uint32x4_t K1 = qswap_32(vld1q_u32(&m_RK[ 4])); + const uint32x4_t K2 = qswap_32(vld1q_u32(&m_RK[ 8])); + const uint32x4_t K3 = qswap_32(vld1q_u32(&m_RK[12])); + const uint32x4_t K4 = qswap_32(vld1q_u32(&m_RK[16])); + const uint32x4_t K5 = qswap_32(vld1q_u32(&m_RK[20])); + const uint32x4_t K6 = qswap_32(vld1q_u32(&m_RK[24])); + const uint32x4_t K7 = qswap_32(vld1q_u32(&m_RK[28])); + + const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8)); + uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8)); + + while(blocks >= 4) + { + uint32x4_t B0 = bswap_32(vld1q_u32(input32)); + uint32x4_t B1 = bswap_32(vld1q_u32(input32+4)); + uint32x4_t B2 = bswap_32(vld1q_u32(input32+8)); + uint32x4_t B3 = bswap_32(vld1q_u32(input32+12)); + + SM4_E(B0, B1, B2, B3, K7); + SM4_E(B0, B1, B2, B3, K6); + SM4_E(B0, B1, B2, B3, K5); + SM4_E(B0, B1, B2, B3, K4); + SM4_E(B0, B1, B2, B3, K3); + SM4_E(B0, B1, B2, B3, K2); + SM4_E(B0, B1, B2, B3, K1); + SM4_E(B0, B1, B2, B3, K0); + + vst1q_u32(output32 , bqswap_32(B0)); + vst1q_u32(output32+ 4, bqswap_32(B1)); + vst1q_u32(output32+ 8, bqswap_32(B2)); + vst1q_u32(output32+12, bqswap_32(B3)); + + input32 += 4*4; + output32 += 4*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32x4_t B = bswap_32(vld1q_u32(input32)); + + B = vsm4eq_u32(B, K7); + B = vsm4eq_u32(B, K6); + B = vsm4eq_u32(B, K5); + B = vsm4eq_u32(B, K4); + B = vsm4eq_u32(B, K3); + B = vsm4eq_u32(B, K2); + B = vsm4eq_u32(B, K1); + B = vsm4eq_u32(B, K0); + + vst1q_u32(output32, bqswap_32(B)); + + input32 += 4; + output32 += 4; + } + } + +#undef SM4_E + +} diff --git a/src/lib/utils/cpuid/cpuid.cpp b/src/lib/utils/cpuid/cpuid.cpp index 3938c7242..c5d39b68e 100644 --- a/src/lib/utils/cpuid/cpuid.cpp +++ b/src/lib/utils/cpuid/cpuid.cpp @@ -64,10 +64,16 @@ std::string CPUID::to_string() #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) CPUID_PRINT(neon); + CPUID_PRINT(arm_sve); + CPUID_PRINT(arm_sha1); CPUID_PRINT(arm_sha2); CPUID_PRINT(arm_aes); CPUID_PRINT(arm_pmull); + CPUID_PRINT(arm_sha2_512); + CPUID_PRINT(arm_sha3); + CPUID_PRINT(arm_sm3); + CPUID_PRINT(arm_sm4); #endif #undef CPUID_PRINT @@ -168,6 +174,14 @@ CPUID::bit_from_string(const std::string& tok) return {Botan::CPUID::CPUID_ARM_AES_BIT}; if(tok == "armv8pmull") return {Botan::CPUID::CPUID_ARM_PMULL_BIT}; + if(tok == "armv8sha3") + return {Botan::CPUID::CPUID_ARM_SHA3_BIT}; + if(tok == "armv8sha2_512") + return {Botan::CPUID::CPUID_ARM_SHA2_512_BIT}; + if(tok == "armv8sm3") + return {Botan::CPUID::CPUID_ARM_SM3_BIT}; + if(tok == "armv8sm4") + return {Botan::CPUID::CPUID_ARM_SM4_BIT}; #else BOTAN_UNUSED(tok); diff --git a/src/lib/utils/cpuid/cpuid.h b/src/lib/utils/cpuid/cpuid.h index 633824a6c..95f6d687b 100644 --- a/src/lib/utils/cpuid/cpuid.h +++ b/src/lib/utils/cpuid/cpuid.h @@ -114,11 +114,16 @@ class BOTAN_PUBLIC_API(2,1) CPUID final #endif #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY) - CPUID_ARM_NEON_BIT = (1ULL << 0), - CPUID_ARM_AES_BIT = (1ULL << 16), - CPUID_ARM_PMULL_BIT = (1ULL << 17), - CPUID_ARM_SHA1_BIT = (1ULL << 18), - CPUID_ARM_SHA2_BIT = (1ULL << 19), + CPUID_ARM_NEON_BIT = (1ULL << 0), + CPUID_ARM_SVE_BIT = (1ULL << 1), + CPUID_ARM_AES_BIT = (1ULL << 16), + CPUID_ARM_PMULL_BIT = (1ULL << 17), + CPUID_ARM_SHA1_BIT = (1ULL << 18), + CPUID_ARM_SHA2_BIT = (1ULL << 19), + CPUID_ARM_SHA3_BIT = (1ULL << 20), + CPUID_ARM_SHA2_512_BIT = (1ULL << 21), + CPUID_ARM_SM3_BIT = (1ULL << 22), + CPUID_ARM_SM4_BIT = (1ULL << 23), #endif CPUID_INITIALIZED_BIT = (1ULL << 63) @@ -147,6 +152,12 @@ class BOTAN_PUBLIC_API(2,1) CPUID final { return has_cpuid_bit(CPUID_ARM_NEON_BIT); } /** + * Check if the processor supports ARMv8 SVE + */ + static bool has_arm_sve() + { return has_cpuid_bit(CPUID_ARM_SVE_BIT); } + + /** * Check if the processor supports ARMv8 SHA1 */ static bool has_arm_sha1() @@ -169,6 +180,31 @@ class BOTAN_PUBLIC_API(2,1) CPUID final */ static bool has_arm_pmull() { return has_cpuid_bit(CPUID_ARM_PMULL_BIT); } + + /** + * Check if the processor supports ARMv8 SHA-512 + */ + static bool has_arm_sha2_512() + { return has_cpuid_bit(CPUID_ARM_SHA2_512_BIT); } + + /** + * Check if the processor supports ARMv8 SHA-3 + */ + static bool has_arm_sha3() + { return has_cpuid_bit(CPUID_ARM_SHA3_BIT); } + + /** + * Check if the processor supports ARMv8 SM3 + */ + static bool has_arm_sm3() + { return has_cpuid_bit(CPUID_ARM_SM3_BIT); } + + /** + * Check if the processor supports ARMv8 SM4 + */ + static bool has_arm_sm4() + { return has_cpuid_bit(CPUID_ARM_SM4_BIT); } + #endif #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) diff --git a/src/lib/utils/cpuid/cpuid_arm.cpp b/src/lib/utils/cpuid/cpuid_arm.cpp index 39b6db652..beb80f7d0 100644 --- a/src/lib/utils/cpuid/cpuid_arm.cpp +++ b/src/lib/utils/cpuid/cpuid_arm.cpp @@ -130,6 +130,11 @@ uint64_t CPUID::detect_cpu_features(size_t* cache_line_size) PMULL_bit = (1 << 4), SHA1_bit = (1 << 5), SHA2_bit = (1 << 6), + SHA3_bit = (1 << 17), + SM3_bit = (1 << 18), + SM4_bit = (1 << 19), + SHA2_512_bit = (1 << 21), + SVE_bit = (1 << 22), ARCH_hwcap_neon = 16, // AT_HWCAP ARCH_hwcap_crypto = 16, // AT_HWCAP @@ -163,6 +168,19 @@ uint64_t CPUID::detect_cpu_features(size_t* cache_line_size) if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit) detected_features |= CPUID::CPUID_ARM_SHA2_BIT; +#if defined(BOTAN_TARGET_ARCH_IS_ARM64) + if(hwcap_crypto & ARM_hwcap_bit::SHA3_bit) + detected_features |= CPUID::CPUID_ARM_SHA3_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SM3_bit) + detected_features |= CPUID::CPUID_ARM_SM3_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SM4_bit) + detected_features |= CPUID::CPUID_ARM_SM4_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SHA2_512_bit) + detected_features |= CPUID::CPUID_ARM_SHA2_512_BIT; + if(hwcap_crypto & ARM_hwcap_bit::SVE_bit) + detected_features |= CPUID::CPUID_ARM_SVE_BIT; +#endif + #elif defined(BOTAN_TARGET_OS_IS_IOS) char machine[64] = { 0 }; diff --git a/src/tests/data/block/sm4.vec b/src/tests/data/block/sm4.vec index fbdba610f..a745cdbcf 100644 --- a/src/tests/data/block/sm4.vec +++ b/src/tests/data/block/sm4.vec @@ -1,3 +1,5 @@ +#test cpuid armv8sm4 + [SM4] Key = 0123456789abcdeffedcba9876543210 In = 0123456789abcdeffedcba9876543210 @@ -7,3 +9,13 @@ Iterations = 1000000 Key = 0123456789abcdeffedcba9876543210 In = 0123456789abcdeffedcba9876543210 Out = 595298c7c6fd271f0402f804c33d3f66 + + +# Random tests generated by GmSSL +Key = 681EDF34D206965E86B3E94F536E4246 +In = F42131B002425B6F5CF52A810682A09D07BCAE6A8388E14651FED84B3749D386F4762615B32C000A165E1D722D708052BA3C19D8926356ED1491C6E4E528782F +Out = EC4B7B1757FEE9CE455197E5BF9C3A9089F2C41ED97DBB1B74A2AD93B903BBC9F45A41052F9BF3D5B65DF8CC1C75B4CF3E1F30D57DF4B60694F566DE44484FAF + +Key = 781EDF34D206965E86B3E94F536E4247 +In = 9108957FF917E3D61C4EA33E53DB6EF3CBA0F0567535D66148B35A9258729C23FA598011F7C2100799451E62F3B5CF09BA1F8555B2DDAB0E4E4D8026B05AF38950C63CE25582571AA5D8EE22089C1B59229AD7A8A83C5E2384B4082E50D06EBF +Out = 6A529AC093A5F3045AED787F70CCB7F56346F0E4C59532D418CE315B9F22A0F46255459100958F4D953A9D5667692D6D376FEB0978B52AB9C984A14D7E66F67131FFAF2CAD6549F3D9FCD7F02DF5812476F29E93DDF57932A41E83BB7B61A406 |