aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
blob: cb83fece88dafc65a1b1bf713a5aeb424c0111f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
* (C) 2018 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/

#include <botan/internal/shacal2.h>
#include <botan/internal/simd_avx2.h>

namespace Botan {

namespace {

void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
   SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
               uint32_t RK)
   {
   H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK);
   D += H;
   H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
   }

void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
   SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
               uint32_t RK)
   {
   H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
   D -= H;
   H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK);
   }

}

void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const
   {
   SIMD_8x32::reset_registers();

   SIMD_8x32 A = SIMD_8x32::load_be(in);
   SIMD_8x32 B = SIMD_8x32::load_be(in+32);
   SIMD_8x32 C = SIMD_8x32::load_be(in+64);
   SIMD_8x32 D = SIMD_8x32::load_be(in+96);

   SIMD_8x32 E = SIMD_8x32::load_be(in+128);
   SIMD_8x32 F = SIMD_8x32::load_be(in+160);
   SIMD_8x32 G = SIMD_8x32::load_be(in+192);
   SIMD_8x32 H = SIMD_8x32::load_be(in+224);

   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);

   for(size_t r = 0; r != 64; r += 8)
      {
      SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
      SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
      SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
      SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
      SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
      SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
      SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
      SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
      }

   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);

   A.store_be(out);
   B.store_be(out+32);
   C.store_be(out+64);
   D.store_be(out+96);

   E.store_be(out+128);
   F.store_be(out+160);
   G.store_be(out+192);
   H.store_be(out+224);

   SIMD_8x32::zero_registers();
   }

BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const
   {
   SIMD_8x32::reset_registers();

   SIMD_8x32 A = SIMD_8x32::load_be(in);
   SIMD_8x32 B = SIMD_8x32::load_be(in+32);
   SIMD_8x32 C = SIMD_8x32::load_be(in+64);
   SIMD_8x32 D = SIMD_8x32::load_be(in+96);

   SIMD_8x32 E = SIMD_8x32::load_be(in+128);
   SIMD_8x32 F = SIMD_8x32::load_be(in+160);
   SIMD_8x32 G = SIMD_8x32::load_be(in+192);
   SIMD_8x32 H = SIMD_8x32::load_be(in+224);

   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);

   for(size_t r = 0; r != 64; r += 8)
      {
      SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
      SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
      SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
      SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
      SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
      SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
      SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
      SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
      }

   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);

   A.store_be(out);
   B.store_be(out+32);
   C.store_be(out+64);
   D.store_be(out+96);

   E.store_be(out+128);
   F.store_be(out+160);
   G.store_be(out+192);
   H.store_be(out+224);

   SIMD_8x32::zero_registers();
   }

}