1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
/*
* XTEA in SIMD
* (C) 2009 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#include <botan/xtea_simd.h>
#include <botan/loadstor.h>
#include <botan/internal/simd_32.h>
namespace Botan {
namespace {
void xtea_encrypt_8(const byte in[64], byte out[64], const u32bit EK[64])
{
SIMD_32 L0 = SIMD_32::load_be(in );
SIMD_32 R0 = SIMD_32::load_be(in + 16);
SIMD_32 L1 = SIMD_32::load_be(in + 32);
SIMD_32 R1 = SIMD_32::load_be(in + 48);
SIMD_32::transpose(L0, R0, L1, R1);
for(size_t i = 0; i != 32; i += 2)
{
SIMD_32 K0(EK[2*i ]);
SIMD_32 K1(EK[2*i+1]);
SIMD_32 K2(EK[2*i+2]);
SIMD_32 K3(EK[2*i+3]);
L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K0;
L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K0;
R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K1;
R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K1;
L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K2;
L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K2;
R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K3;
R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K3;
}
SIMD_32::transpose(L0, R0, L1, R1);
L0.store_be(out);
R0.store_be(out + 16);
L1.store_be(out + 32);
R1.store_be(out + 48);
}
void xtea_decrypt_8(const byte in[64], byte out[64], const u32bit EK[64])
{
SIMD_32 L0 = SIMD_32::load_be(in );
SIMD_32 R0 = SIMD_32::load_be(in + 16);
SIMD_32 L1 = SIMD_32::load_be(in + 32);
SIMD_32 R1 = SIMD_32::load_be(in + 48);
SIMD_32::transpose(L0, R0, L1, R1);
for(size_t i = 0; i != 32; i += 2)
{
SIMD_32 K0(EK[63 - 2*i]);
SIMD_32 K1(EK[62 - 2*i]);
SIMD_32 K2(EK[61 - 2*i]);
SIMD_32 K3(EK[60 - 2*i]);
R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K0;
R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K0;
L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K1;
L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K1;
R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ K2;
R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ K2;
L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ K3;
L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ K3;
}
SIMD_32::transpose(L0, R0, L1, R1);
L0.store_be(out);
R0.store_be(out + 16);
L1.store_be(out + 32);
R1.store_be(out + 48);
}
}
/*
* XTEA Encryption
*/
void XTEA_SIMD::encrypt_n(const byte in[], byte out[], size_t blocks) const
{
const u32bit* KS = &(this->get_EK()[0]);
while(blocks >= 8)
{
xtea_encrypt_8(in, out, KS);
in += 8 * BLOCK_SIZE;
out += 8 * BLOCK_SIZE;
blocks -= 8;
}
if(blocks)
XTEA::encrypt_n(in, out, blocks);
}
/*
* XTEA Decryption
*/
void XTEA_SIMD::decrypt_n(const byte in[], byte out[], size_t blocks) const
{
const u32bit* KS = &(this->get_EK()[0]);
while(blocks >= 8)
{
xtea_decrypt_8(in, out, KS);
in += 8 * BLOCK_SIZE;
out += 8 * BLOCK_SIZE;
blocks -= 8;
}
if(blocks)
XTEA::decrypt_n(in, out, blocks);
}
}
|