diff options
author | Jack Lloyd <[email protected]> | 2017-10-12 13:23:21 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2017-10-12 13:23:21 -0400 |
commit | fa8b83578dcffc394b8449207f60662d7e562728 (patch) | |
tree | bc4bb70f1fc988d592c49ee031f7529f4162c66a /src/lib/utils | |
parent | e100b238667a8fcf471b8c86f8f8c33bbe3426e7 (diff) | |
parent | d11f369b6a72f584499a883e0ee7cbb63723dc93 (diff) |
Merge GH #1247 Improve bit rotation functions
Diffstat (limited to 'src/lib/utils')
-rw-r--r-- | src/lib/utils/bswap.h | 6 | ||||
-rw-r--r-- | src/lib/utils/rotate.h | 85 | ||||
-rw-r--r-- | src/lib/utils/simd/simd_32.h | 87 |
3 files changed, 122 insertions, 56 deletions
diff --git a/src/lib/utils/bswap.h b/src/lib/utils/bswap.h index 23b3113ce..c1aa8b594 100644 --- a/src/lib/utils/bswap.h +++ b/src/lib/utils/bswap.h @@ -19,7 +19,7 @@ namespace Botan { */ inline uint16_t reverse_bytes(uint16_t val) { - return rotate_left(val, 8); + return rotl<8>(val); } /** @@ -64,8 +64,8 @@ inline uint32_t reverse_bytes(uint32_t val) #else // Generic implementation - return (rotate_right(val, 8) & 0xFF00FF00) | - (rotate_left (val, 8) & 0x00FF00FF); + return (rotr<8>(val) & 0xFF00FF00) | + (rotl<8>(val) & 0x00FF00FF); #endif } diff --git a/src/lib/utils/rotate.h b/src/lib/utils/rotate.h index cb92daf96..4bb76c9ed 100644 --- a/src/lib/utils/rotate.h +++ b/src/lib/utils/rotate.h @@ -1,6 +1,6 @@ /* * Word Rotation Operations -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2008,2017 Jack Lloyd * * Botan is released under the Simplified BSD License (see license.txt) */ @@ -13,27 +13,90 @@ namespace Botan { /** -* Bit rotation left +* Bit rotation left by a compile-time constant amount * @param input the input word -* @param rot the number of bits to rotate +* @return input rotated left by ROT bits +*/ +template<size_t ROT, typename T> +inline T rotl(T input) + { + static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant"); + return static_cast<T>((input << ROT) | (input >> (8*sizeof(T) - ROT))); + } + +/** +* Bit rotation right by a compile-time constant amount +* @param input the input word +* @return input rotated right by ROT bits +*/ +template<size_t ROT, typename T> +inline T rotr(T input) + { + static_assert(ROT > 0 && ROT < 8*sizeof(T), "Invalid rotation constant"); + return static_cast<T>((input >> ROT) | (input << (8*sizeof(T) - ROT))); + } + +/** +* Bit rotation left, variable rotation amount +* @param input the input word +* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1 * @return input rotated left by rot bits */ -template<typename T> inline T rotate_left(T input, size_t rot) +template<typename T> +inline T rotl_var(T input, size_t rot) { - rot %= 8 * sizeof(T); - return (rot == 0) ? input : static_cast<T>((input << rot) | (input >> (8*sizeof(T)-rot)));; + return rot ? static_cast<T>((input << rot) | (input >> (sizeof(T)*8 - rot))) : input; } /** -* Bit rotation right +* Bit rotation right, variable rotation amount * @param input the input word -* @param rot the number of bits to rotate +* @param rot the number of bits to rotate, must be between 0 and sizeof(T)*8-1 * @return input rotated right by rot bits */ -template<typename T> inline T rotate_right(T input, size_t rot) +template<typename T> +inline T rotr_var(T input, size_t rot) + { + return rot ? static_cast<T>((input >> rot) | (input << (sizeof(T)*8 - rot))) : input; + } + +#if BOTAN_USE_GCC_INLINE_ASM + +#if defined(BOTAN_TARGET_ARCH_IS_X86_64) || defined(BOTAN_TARGET_ARCH_IS_X86_32) + +template<> +inline uint32_t rotl_var(uint32_t input, size_t rot) + { + asm("roll %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot))); + return input; + } + +template<> +inline uint32_t rotr_var(uint32_t input, size_t rot) + { + asm("rorl %1,%0" : "+r" (input) : "c" (static_cast<uint8_t>(rot))); + return input; + } + +#endif + +#endif + + +template<typename T> +BOTAN_DEPRECATED("Use rotl<N> or rotl_var") +inline T rotate_left(T input, size_t rot) + { + // rotl_var does not reduce + return rotl_var(input, rot % (8 * sizeof(T))); + } + +template<typename T> +BOTAN_DEPRECATED("Use rotr<N> or rotr_var") +inline T rotate_right(T input, size_t rot) { - rot %= 8 * sizeof(T); - return (rot == 0) ? input : static_cast<T>((input >> rot) | (input << (8*sizeof(T)-rot))); + // rotr_var does not reduce + return rotr_var(input, rot % (8 * sizeof(T))); } } diff --git a/src/lib/utils/simd/simd_32.h b/src/lib/utils/simd/simd_32.h index 2a177b388..aca7a8e9a 100644 --- a/src/lib/utils/simd/simd_32.h +++ b/src/lib/utils/simd/simd_32.h @@ -281,56 +281,57 @@ class SIMD_4x32 final /* - Return rotate_right(x, rot1) ^ rotate_right(x, rot2) ^ rotate_right(x, rot3) + * This is used for SHA-2/SHACAL2 + * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3) */ - SIMD_4x32 rho(size_t rot1, size_t rot2, size_t rot3) const + template<size_t ROT1, size_t ROT2, size_t ROT3> + SIMD_4x32 rho() const { SIMD_4x32 res; #if defined(BOTAN_SIMD_USE_SSE2) - res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot1)), - _mm_srli_epi32(m_sse, static_cast<int>(rot1))); + res.m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT1)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT1))); res.m_sse = _mm_xor_si128( res.m_sse, - _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot2)), - _mm_srli_epi32(m_sse, static_cast<int>(rot2)))); + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT2)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT2)))); res.m_sse = _mm_xor_si128( res.m_sse, - _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-rot3)), - _mm_srli_epi32(m_sse, static_cast<int>(rot3)))); + _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(32-ROT3)), + _mm_srli_epi32(m_sse, static_cast<int>(ROT3)))); #elif defined(BOTAN_SIMD_USE_ALTIVEC) - const unsigned int r1 = static_cast<unsigned int>(32-rot1); - const unsigned int r2 = static_cast<unsigned int>(32-rot2); - const unsigned int r3 = static_cast<unsigned int>(32-rot3); + const unsigned int r1 = static_cast<unsigned int>(32-ROT1); + const unsigned int r2 = static_cast<unsigned int>(32-ROT2); + const unsigned int r3 = static_cast<unsigned int>(32-ROT3); res.m_vmx = vec_rl(m_vmx, (__vector unsigned int){r1, r1, r1, r1}); res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r2, r2, r2, r2})); res.m_vmx = vec_xor(res.m_vmx, vec_rl(m_vmx, (__vector unsigned int){r3, r3, r3, r3})); #elif defined(BOTAN_SIMD_USE_NEON) - res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot1)), - vshrq_n_u32(m_neon, static_cast<int>(rot1))); + res.m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT1)), + vshrq_n_u32(m_neon, static_cast<int>(ROT1))); res.m_neon = veorq_u32( res.m_neon, - vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot2)), - vshrq_n_u32(m_neon, static_cast<int>(rot2)))); + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT2)), + vshrq_n_u32(m_neon, static_cast<int>(ROT2)))); res.m_neon = veorq_u32( res.m_neon, - vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-rot3)), - vshrq_n_u32(m_neon, static_cast<int>(rot3)))); + vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(32-ROT3)), + vshrq_n_u32(m_neon, static_cast<int>(ROT3)))); #else for(size_t i = 0; i != 4; ++i) { - res.m_scalar[i] = - Botan::rotate_right(m_scalar[i], rot1) ^ - Botan::rotate_right(m_scalar[i], rot2) ^ - Botan::rotate_right(m_scalar[i], rot3); + res.m_scalar[i] = Botan::rotr<ROT1>(m_scalar[i]) ^ + Botan::rotr<ROT2>(m_scalar[i]) ^ + Botan::rotr<ROT3>(m_scalar[i]); } #endif @@ -338,38 +339,42 @@ class SIMD_4x32 final } /** - * Rotate each element of SIMD register n bits left + * Left rotation by a compile time constant */ - void rotate_left(size_t rot) + template<size_t ROT> + SIMD_4x32 rotl() const { + static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant"); + #if defined(BOTAN_SIMD_USE_SSE2) - m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(rot)), - _mm_srli_epi32(m_sse, static_cast<int>(32-rot))); + return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(ROT)), + _mm_srli_epi32(m_sse, static_cast<int>(32-ROT)))); #elif defined(BOTAN_SIMD_USE_ALTIVEC) - const unsigned int r = static_cast<unsigned int>(rot); - m_vmx = vec_rl(m_vmx, (__vector unsigned int){r, r, r, r}); + const unsigned int r = static_cast<unsigned int>(ROT); + return SIMD_4x32(vec_rl(m_vmx, (__vector unsigned int){r, r, r, r})); #elif defined(BOTAN_SIMD_USE_NEON) - m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(rot)), - vshrq_n_u32(m_neon, static_cast<int>(32-rot))); + return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)), + vshrq_n_u32(m_neon, static_cast<int>(32-ROT)))); #else - m_scalar[0] = Botan::rotate_left(m_scalar[0], rot); - m_scalar[1] = Botan::rotate_left(m_scalar[1], rot); - m_scalar[2] = Botan::rotate_left(m_scalar[2], rot); - m_scalar[3] = Botan::rotate_left(m_scalar[3], rot); + return SIMD_4x32(Botan::rotl<ROT>(m_scalar[0]), + Botan::rotl<ROT>(m_scalar[1]), + Botan::rotl<ROT>(m_scalar[2]), + Botan::rotl<ROT>(m_scalar[3])); #endif } /** - * Rotate each element of SIMD register n bits right + * Right rotation by a compile time constant */ - void rotate_right(size_t rot) + template<size_t ROT> + SIMD_4x32 rotr() const { - rotate_left(32 - rot); + return this->rotl<32-ROT>(); } /** @@ -596,13 +601,11 @@ class SIMD_4x32 final //return SIMD_4x32(vrev64q_u32(m_neon)); // FIXME this is really slow - SIMD_4x32 ror8(m_neon); - ror8.rotate_right(8); - SIMD_4x32 rol8(m_neon); - rol8.rotate_left(8); + SIMD_4x32 ror8 = this->rotr<8>(); + SIMD_4x32 rol8 = this->rotl<8>(); - SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00); - SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF); + const SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00); + const SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF); return (ror8 & mask1) | (rol8 & mask2); #else // scalar |