aboutsummaryrefslogtreecommitdiffstats
path: root/src/block/aes_ssse3/aes_ssse3.cpp
diff options
context:
space:
mode:
authorlloyd <[email protected]>2010-08-11 19:55:08 +0000
committerlloyd <[email protected]>2010-08-11 19:55:08 +0000
commitb84b75dbf582a45be50d51bdcd837dcc73e9625c (patch)
tree37b01b3f05f3b23581f577cf16e46e187c81a4eb /src/block/aes_ssse3/aes_ssse3.cpp
parent99437cb58e753aed3a066f3862d400eb99b8f1e6 (diff)
Use _mm_set_epi32 instead of _mm_set_epi64x - VC++ obnoxiously only
supports epi64x in 64-bit mode.
Diffstat (limited to 'src/block/aes_ssse3/aes_ssse3.cpp')
-rw-r--r--src/block/aes_ssse3/aes_ssse3.cpp156
1 files changed, 78 insertions, 78 deletions
diff --git a/src/block/aes_ssse3/aes_ssse3.cpp b/src/block/aes_ssse3/aes_ssse3.cpp
index fca77e9a1..be0408897 100644
--- a/src/block/aes_ssse3/aes_ssse3.cpp
+++ b/src/block/aes_ssse3/aes_ssse3.cpp
@@ -19,33 +19,33 @@ namespace {
const __m128i low_nibs = _mm_set1_epi8(0x0F);
-const __m128i k_ipt1 = _mm_set_epi64x(
- 0xCABAE09052227808, 0xC2B2E8985A2A7000);
-const __m128i k_ipt2 = _mm_set_epi64x(
- 0xCD80B1FCB0FDCC81, 0x4C01307D317C4D00);
+const __m128i k_ipt1 = _mm_set_epi32(
+ 0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);
+const __m128i k_ipt2 = _mm_set_epi32(
+ 0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);
-const __m128i k_inv1 = _mm_set_epi64x(
- 0x040703090A0B0C02, 0x0E05060F0D080180);
-const __m128i k_inv2 = _mm_set_epi64x(
- 0x030D0E0C02050809, 0x01040A060F0B0780);
+const __m128i k_inv1 = _mm_set_epi32(
+ 0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);
+const __m128i k_inv2 = _mm_set_epi32(
+ 0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);
-const __m128i sb1u = _mm_set_epi64x(
- 0xA5DF7A6E142AF544, 0xB19BE18FCB503E00);
-const __m128i sb1t = _mm_set_epi64x(
- 0x3BF7CCC10D2ED9EF, 0x3618D415FAE22300);
+const __m128i sb1u = _mm_set_epi32(
+ 0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);
+const __m128i sb1t = _mm_set_epi32(
+ 0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);
const __m128i mc_forward[4] = {
- _mm_set_epi64x(0x0C0F0E0D080B0A09, 0x0407060500030201),
- _mm_set_epi64x(0x000302010C0F0E0D, 0x080B0A0904070605),
- _mm_set_epi64x(0x0407060500030201, 0x0C0F0E0D080B0A09),
- _mm_set_epi64x(0x080B0A0904070605, 0x000302010C0F0E0D)
+ _mm_set_epi32(0x0C0F0E0D, 0x080B0A09, 0x04070605, 0x00030201),
+ _mm_set_epi32(0x00030201, 0x0C0F0E0D, 0x080B0A09, 0x04070605),
+ _mm_set_epi32(0x04070605, 0x00030201, 0x0C0F0E0D, 0x080B0A09),
+ _mm_set_epi32(0x080B0A09, 0x04070605, 0x00030201, 0x0C0F0E0D)
};
const __m128i sr[4] = {
- _mm_set_epi64x(0x0F0E0D0C0B0A0908, 0x0706050403020100),
- _mm_set_epi64x(0x0B06010C07020D08, 0x030E09040F0A0500),
- _mm_set_epi64x(0x070E050C030A0108, 0x0F060D040B020900),
- _mm_set_epi64x(0x0306090C0F020508, 0x0B0E0104070A0D00),
+ _mm_set_epi32(0x0F0E0D0C, 0x0B0A0908, 0x07060504, 0x03020100),
+ _mm_set_epi32(0x0B06010C, 0x07020D08, 0x030E0904, 0x0F0A0500),
+ _mm_set_epi32(0x070E050C, 0x030A0108, 0x0F060D04, 0x0B020900),
+ _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00),
};
#define mm_xor3(x, y, z) _mm_xor_si128(x, _mm_xor_si128(y, z))
@@ -81,14 +81,14 @@ __m128i aes_schedule_mangle(__m128i k, byte round_no)
__m128i aes_schedule_mangle_dec(__m128i k, byte round_no)
{
const __m128i dsk[8] = {
- _mm_set_epi64x(0x4AED933482255BFC, 0xB6116FC87ED9A700),
- _mm_set_epi64x(0x8BB89FACE9DAFDCE, 0x4576516227143300),
- _mm_set_epi64x(0x4622EE8AADC90561, 0x27438FEBCCA86400),
- _mm_set_epi64x(0x73AEE13CBD602FF2, 0x815C13CE4F92DD00),
- _mm_set_epi64x(0xF83F3EF9FA3D3CFB, 0x03C4C50201C6C700),
- _mm_set_epi64x(0xA5526A9D7384BC4B, 0xEE1921D638CFF700),
- _mm_set_epi64x(0xA080D3F310306343, 0xE3C390B053732000),
- _mm_set_epi64x(0x2F45AEC48CE60D67, 0xA0CA214B036982E8)
+ _mm_set_epi32(0x4AED9334, 0x82255BFC, 0xB6116FC8, 0x7ED9A700),
+ _mm_set_epi32(0x8BB89FAC, 0xE9DAFDCE, 0x45765162, 0x27143300),
+ _mm_set_epi32(0x4622EE8A, 0xADC90561, 0x27438FEB, 0xCCA86400),
+ _mm_set_epi32(0x73AEE13C, 0xBD602FF2, 0x815C13CE, 0x4F92DD00),
+ _mm_set_epi32(0xF83F3EF9, 0xFA3D3CFB, 0x03C4C502, 0x01C6C700),
+ _mm_set_epi32(0xA5526A9D, 0x7384BC4B, 0xEE1921D6, 0x38CFF700),
+ _mm_set_epi32(0xA080D3F3, 0x10306343, 0xE3C390B0, 0x53732000),
+ _mm_set_epi32(0x2F45AEC4, 0x8CE60D67, 0xA0CA214B, 0x036982E8)
};
__m128i t = aes_schedule_transform(k, dsk[0], dsk[1]);
@@ -108,10 +108,10 @@ __m128i aes_schedule_mangle_dec(__m128i k, byte round_no)
__m128i aes_schedule_mangle_last(__m128i k, byte round_no)
{
- const __m128i out_tr1 = _mm_set_epi64x(
- 0xF7974121DEBE6808, 0xFF9F4929D6B66000);
- const __m128i out_tr2 = _mm_set_epi64x(
- 0xE10D5DB1B05C0CE0, 0x01EDBD5150BCEC00);
+ const __m128i out_tr1 = _mm_set_epi32(
+ 0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);
+ const __m128i out_tr2 = _mm_set_epi32(
+ 0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);
k = _mm_shuffle_epi8(k, sr[round_no % 4]);
k = _mm_xor_si128(k, _mm_set1_epi8(0x5B));
@@ -120,10 +120,10 @@ __m128i aes_schedule_mangle_last(__m128i k, byte round_no)
__m128i aes_schedule_mangle_last_dec(__m128i k)
{
- const __m128i deskew1 = _mm_set_epi64x(
- 0x1DFEB95A5DBEF91A, 0x07E4A34047A4E300);
- const __m128i deskew2 = _mm_set_epi64x(
- 0x2841C2ABF49D1E77, 0x5F36B5DC83EA6900);
+ const __m128i deskew1 = _mm_set_epi32(
+ 0x1DFEB95A, 0x5DBEF91A, 0x07E4A340, 0x47A4E300);
+ const __m128i deskew2 = _mm_set_epi32(
+ 0x2841C2AB, 0xF49D1E77, 0x5F36B5DC, 0x83EA6900);
k = _mm_xor_si128(k, _mm_set1_epi8(0x5B));
return aes_schedule_transform(k, deskew1, deskew2);
@@ -166,21 +166,21 @@ __m128i aes_schedule_round(__m128i* rcon, __m128i input1, __m128i input2)
__m128i aes_ssse3_encrypt(__m128i B, const __m128i* keys, u32bit rounds)
{
- const __m128i sb2u = _mm_set_epi64x(
- 0x5EB7E955BC982FCD, 0xE27A93C60B712400);
- const __m128i sb2t = _mm_set_epi64x(
- 0xC2A163C8AB82234A, 0x69EB88400AE12900);
+ const __m128i sb2u = _mm_set_epi32(
+ 0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);
+ const __m128i sb2t = _mm_set_epi32(
+ 0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);
- const __m128i sbou = _mm_set_epi64x(
- 0x15AABF7AC502A878, 0xD0D26D176FBDC700);
- const __m128i sbot = _mm_set_epi64x(
- 0x8E1E90D1412B35FA, 0xCFE474A55FBB6A00);
+ const __m128i sbou = _mm_set_epi32(
+ 0x15AABF7A, 0xC502A878, 0xD0D26D17, 0x6FBDC700);
+ const __m128i sbot = _mm_set_epi32(
+ 0x8E1E90D1, 0x412B35FA, 0xCFE474A5, 0x5FBB6A00);
const __m128i mc_backward[4] = {
- _mm_set_epi64x(0x0E0D0C0F0A09080B, 0x0605040702010003),
- _mm_set_epi64x(0x0A09080B06050407, 0x020100030E0D0C0F),
- _mm_set_epi64x(0x0605040702010003, 0x0E0D0C0F0A09080B),
- _mm_set_epi64x(0x020100030E0D0C0F, 0x0A09080B06050407),
+ _mm_set_epi32(0x0E0D0C0F, 0x0A09080B, 0x06050407, 0x02010003),
+ _mm_set_epi32(0x0A09080B, 0x06050407, 0x02010003, 0x0E0D0C0F),
+ _mm_set_epi32(0x06050407, 0x02010003, 0x0E0D0C0F, 0x0A09080B),
+ _mm_set_epi32(0x02010003, 0x0E0D0C0F, 0x0A09080B, 0x06050407),
};
B = mm_xor3(_mm_shuffle_epi8(k_ipt1, _mm_and_si128(low_nibs, B)),
@@ -235,30 +235,30 @@ __m128i aes_ssse3_encrypt(__m128i B, const __m128i* keys, u32bit rounds)
__m128i aes_ssse3_decrypt(__m128i B, const __m128i* keys, u32bit rounds)
{
- const __m128i k_dipt1 = _mm_set_epi64x(
- 0x154A411E114E451A, 0x0F505B040B545F00);
- const __m128i k_dipt2 = _mm_set_epi64x(
- 0x12771772F491F194, 0x86E383E660056500);
-
- const __m128i sb9u = _mm_set_epi64x(
- 0xCAD51F504F994CC9, 0x851C03539A86D600);
- const __m128i sb9t = _mm_set_epi64x(
- 0x725E2C9EB2FBA565, 0xC03B1789ECD74900);
-
- const __m128i sbeu = _mm_set_epi64x(
- 0x2242600464B4F6B0, 0x46F2929626D4D000);
- const __m128i sbet = _mm_set_epi64x(
- 0x9467F36B98593E32, 0x0C55A6CDFFAAC100);
-
- const __m128i sbdu = _mm_set_epi64x(
- 0xF56E9B13882A4439, 0x7D57CCDFE6B1A200);
- const __m128i sbdt = _mm_set_epi64x(
- 0x2931180D15DEEFD3, 0x3CE2FAF724C6CB00);
-
- const __m128i sbbu = _mm_set_epi64x(
- 0x602646F6B0F2D404, 0xD022649296B44200);
- const __m128i sbbt = _mm_set_epi64x(
- 0xF3FF0C3E3255AA6B, 0xC19498A6CD596700);
+ const __m128i k_dipt1 = _mm_set_epi32(
+ 0x154A411E, 0x114E451A, 0x0F505B04, 0x0B545F00);
+ const __m128i k_dipt2 = _mm_set_epi32(
+ 0x12771772, 0xF491F194, 0x86E383E6, 0x60056500);
+
+ const __m128i sb9u = _mm_set_epi32(
+ 0xCAD51F50, 0x4F994CC9, 0x851C0353, 0x9A86D600);
+ const __m128i sb9t = _mm_set_epi32(
+ 0x725E2C9E, 0xB2FBA565, 0xC03B1789, 0xECD74900);
+
+ const __m128i sbeu = _mm_set_epi32(
+ 0x22426004, 0x64B4F6B0, 0x46F29296, 0x26D4D000);
+ const __m128i sbet = _mm_set_epi32(
+ 0x9467F36B, 0x98593E32, 0x0C55A6CD, 0xFFAAC100);
+
+ const __m128i sbdu = _mm_set_epi32(
+ 0xF56E9B13, 0x882A4439, 0x7D57CCDF, 0xE6B1A200);
+ const __m128i sbdt = _mm_set_epi32(
+ 0x2931180D, 0x15DEEFD3, 0x3CE2FAF7, 0x24C6CB00);
+
+ const __m128i sbbu = _mm_set_epi32(
+ 0x602646F6, 0xB0F2D404, 0xD0226492, 0x96B44200);
+ const __m128i sbbt = _mm_set_epi32(
+ 0xF3FF0C3E, 0x3255AA6B, 0xC19498A6, 0xCD596700);
__m128i mc = mc_forward[3];
@@ -290,10 +290,10 @@ __m128i aes_ssse3_decrypt(__m128i B, const __m128i* keys, u32bit rounds)
if(r == rounds)
{
- const __m128i sbou = _mm_set_epi64x(
- 0xC7AA6DB9D4943E2D, 0x1387EA537EF94000);
- const __m128i sbot = _mm_set_epi64x(
- 0xCA4B8159D8C58E9C, 0x12D7560F93441D00);
+ const __m128i sbou = _mm_set_epi32(
+ 0xC7AA6DB9, 0xD4943E2D, 0x1387EA53, 0x7EF94000);
+ const __m128i sbot = _mm_set_epi32(
+ 0xCA4B8159, 0xD8C58E9C, 0x12D7560F, 0x93441D00);
__m128i x = _mm_shuffle_epi8(sbou, t5);
__m128i y = _mm_shuffle_epi8(sbot, t6);
@@ -366,8 +366,8 @@ void AES_128_SSSE3::decrypt_n(const byte in[], byte out[], u32bit blocks) const
*/
void AES_128_SSSE3::key_schedule(const byte keyb[], u32bit)
{
- __m128i rcon = _mm_set_epi64x(0x702A98084D7C7D81,
- 0x1F8391B9AF9DEEB6);
+ __m128i rcon = _mm_set_epi32(0x702A9808, 0x4D7C7D81,
+ 0x1F8391B9, 0xAF9DEEB6);
__m128i key = _mm_loadu_si128((const __m128i*)keyb);