Add compile-time rotation functions

The problem with asm rol/ror is the compiler can't schedule effectively. But we only need asm in the case when the rotation is variable, so distinguish the two cases. If a compile time constant, then static_assert that the rotation is in the correct range and do the straightforward expression knowing the compiler will probably do the right thing. Otherwise do a tricky expression that both GCC and Clang happen to have recognize. Avoid the reduction case; instead require that the rotation be in range (this reverts 2b37c13dcf). Remove the asm rotations (making this branch illnamed), because now both Clang and GCC will create a roll without any extra help. Remove the reduction/mask by the word size for the variable case. The compiler can't optimize that it out well, but it's easy to ensure it is valid in the callers, especially now that the variable input cases are easy to grep for.
author: Jack Lloyd <[email protected]> 2017-10-11 17:02:20 -0400
committer: Jack Lloyd <[email protected]> 2017-10-12 11:13:11 -0400
commit: 175f09ffd806f2f19cd509017a67ae1384f29ae1 (patch)
tree: 6194884467e4720dd79797cd106a45d60211f35f /src/lib/block/shacal2
parent: 40b3f979723b2b3dfb5c44047d7f786a73fd7f6f (diff)
2 files changed, 10 insertions, 10 deletions
diff --git a/src/lib/block/shacal2/shacal2.cpp b/src/lib/block/shacal2/shacal2.cpp
index 30ad711db..12c87c426 100644
--- a/src/lib/block/shacal2/shacal2.cpp
+++ b/src/lib/block/shacal2/shacal2.cpp
@@ -17,8 +17,8 @@ inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
                         uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
                         uint32_t RK)
    {
-   const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
-   const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+   const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);
+   const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);
 
    H += E_rho + ((E & F) ^ (~E & G)) + RK;
    D += H;
@@ -29,8 +29,8 @@ inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D,
                         uint32_t E, uint32_t F, uint32_t G, uint32_t& H,
                         uint32_t RK)
    {
-   const uint32_t A_rho = rotate_right(A, 2) ^ rotate_right(A, 13) ^ rotate_right(A, 22);
-   const uint32_t E_rho = rotate_right(E, 6) ^ rotate_right(E, 11) ^ rotate_right(E, 25);
+   const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);
+   const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);
 
    H -= A_rho + ((A & B) | ((A | B) & C));
    D -= H;
@@ -175,8 +175,8 @@ void SHACAL2::key_schedule(const uint8_t key[], size_t len)
 
    for(size_t i = 16; i != 64; ++i)
       {
-      const uint32_t sigma0_15 = rotate_right(m_RK[i-15], 7) ^ rotate_right(m_RK[i-15], 18) ^ (m_RK[i-15] >> 3);
-      const uint32_t sigma1_2  = rotate_right(m_RK[i-2], 17) ^ rotate_right(m_RK[i-2], 19)  ^ (m_RK[i-2] >> 10);
+      const uint32_t sigma0_15 = rotr<7>(m_RK[i-15]) ^ rotr<18>(m_RK[i-15]) ^ (m_RK[i-15] >> 3);
+      const uint32_t sigma1_2  = rotr<17>(m_RK[i-2]) ^ rotr<19>(m_RK[i-2])  ^ (m_RK[i-2] >> 10);
       m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2;
       }
 
diff --git a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
index a4324c8fb..bdcac1482 100644
--- a/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
+++ b/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp
@@ -17,9 +17,9 @@ void SHACAL2_Fwd(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32&
                  const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
                  uint32_t RK)
    {
-   H += E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+   H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
    D += H;
-   H += A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+   H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
    }
 
 inline
@@ -27,9 +27,9 @@ void SHACAL2_Rev(const SIMD_32& A, const SIMD_32& B, const SIMD_32& C, SIMD_32&
                  const SIMD_32& E, const SIMD_32& F, const SIMD_32& G, SIMD_32& H,
                  uint32_t RK)
    {
-   H -= A.rho(2,13,22) + ((A & B) | ((A | B) & C));
+   H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
    D -= H;
-   H -= E.rho(6,11,25) + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
+   H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_32::splat(RK);
    }
 
 }
author	Jack Lloyd <[email protected]>	2017-10-11 17:02:20 -0400
committer	Jack Lloyd <[email protected]>	2017-10-12 11:13:11 -0400
commit	175f09ffd806f2f19cd509017a67ae1384f29ae1 (patch)
tree	6194884467e4720dd79797cd106a45d60211f35f /src/lib/block/shacal2
parent	40b3f979723b2b3dfb5c44047d7f786a73fd7f6f (diff)