aboutsummaryrefslogtreecommitdiffstats
path: root/src/block
diff options
context:
space:
mode:
authorlloyd <[email protected]>2009-08-12 14:42:34 +0000
committerlloyd <[email protected]>2009-08-12 14:42:34 +0000
commit8d2ceae6d43ab1e21604e112f437b1494def5ef8 (patch)
tree57330a89626c5e67fa12d1baefd611e6ee40637a /src/block
parent89eb757b344d3605f3f8012079749f01ef23bb6b (diff)
Small code cleanups in SSE2 Serpent
Diffstat (limited to 'src/block')
-rw-r--r--src/block/serpent_sse2/serp_sse2.cpp6
-rw-r--r--src/block/serpent_sse2/serp_sse2_sbox.h381
2 files changed, 195 insertions, 192 deletions
diff --git a/src/block/serpent_sse2/serp_sse2.cpp b/src/block/serpent_sse2/serp_sse2.cpp
index ea937c95a..5ce7d8f47 100644
--- a/src/block/serpent_sse2/serp_sse2.cpp
+++ b/src/block/serpent_sse2/serp_sse2.cpp
@@ -65,6 +65,8 @@ void serpent_encrypt_4(const byte in[64],
byte out[64],
const u32bit keys_32[132])
{
+ const __m128i all_ones = _mm_set1_epi8(0xFF);
+
const __m128i* keys = (const __m128i*)(keys_32);
__m128i* out_mm = (__m128i*)(out);
__m128i* in_mm = (__m128i*)(in);
@@ -84,6 +86,7 @@ void serpent_encrypt_4(const byte in[64],
key_xor( 5,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 6,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 7,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor( 8,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor( 9,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(10,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -92,6 +95,7 @@ void serpent_encrypt_4(const byte in[64],
key_xor(13,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(14,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(15,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor(16,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(17,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(18,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -100,6 +104,7 @@ void serpent_encrypt_4(const byte in[64],
key_xor(21,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(22,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(23,B0,B1,B2,B3); SBoxE8(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
key_xor(24,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(25,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
key_xor(26,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
@@ -127,7 +132,6 @@ void Serpent_SSE2::encrypt_n(const byte in[], byte out[], u32bit blocks) const
while(blocks >= 4)
{
serpent_encrypt_4(in, out, this->round_key);
- //Serpent::encrypt_n(in, out, 4);
in += 4 * BLOCK_SIZE;
out += 4 * BLOCK_SIZE;
blocks -= 4;
diff --git a/src/block/serpent_sse2/serp_sse2_sbox.h b/src/block/serpent_sse2/serp_sse2_sbox.h
index bc8678a89..1660643ad 100644
--- a/src/block/serpent_sse2/serp_sse2_sbox.h
+++ b/src/block/serpent_sse2/serp_sse2_sbox.h
@@ -8,211 +8,210 @@
#ifndef SERPENT_SSE2_SBOXES_H__
#define SERPENT_SSE2_SBOXES_H__
-#define SBoxE1(B0, B1, B2, B3) \
- do { \
- B3 = _mm_xor_si128(B3, B0); \
- __m128i B4 = B1; \
- B1 = _mm_and_si128(B1, B3); \
- B4 = _mm_xor_si128(B4, B2); \
- B1 = _mm_xor_si128(B1, B0); \
- B0 = _mm_or_si128(B0, B3); \
- B0 = _mm_xor_si128(B0, B4); \
- B4 = _mm_xor_si128(B4, B3); \
- B3 = _mm_xor_si128(B3, B2); \
- B2 = _mm_or_si128(B2, B1); \
- B2 = _mm_xor_si128(B2, B4); \
- B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \
- B4 = _mm_or_si128(B4, B1); \
- B1 = _mm_xor_si128(B1, B3); \
- B1 = _mm_xor_si128(B1, B4); \
- B3 = _mm_or_si128(B3, B0); \
- B1 = _mm_xor_si128(B1, B3); \
- B4 = _mm_xor_si128(B4, B3); \
- B3 = B0; \
- B0 = B1; \
- B1 = B4; \
+#define SBoxE1(B0, B1, B2, B3) \
+ do { \
+ B3 = _mm_xor_si128(B3, B0); \
+ __m128i B4 = B1; \
+ B1 = _mm_and_si128(B1, B3); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B1 = _mm_xor_si128(B1, B0); \
+ B0 = _mm_or_si128(B0, B3); \
+ B0 = _mm_xor_si128(B0, B4); \
+ B4 = _mm_xor_si128(B4, B3); \
+ B3 = _mm_xor_si128(B3, B2); \
+ B2 = _mm_or_si128(B2, B1); \
+ B2 = _mm_xor_si128(B2, B4); \
+ B4 = _mm_xor_si128(B4, all_ones); \
+ B4 = _mm_or_si128(B4, B1); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B1 = _mm_xor_si128(B1, B4); \
+ B3 = _mm_or_si128(B3, B0); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B4 = _mm_xor_si128(B4, B3); \
+ B3 = B0; \
+ B0 = B1; \
+ B1 = B4; \
} while(0);
-#define SBoxE2(B0, B1, B2, B3) \
- do { \
- B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \
- B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \
- __m128i B4 = B0; \
- B0 = _mm_and_si128(B0, B1); \
- B2 = _mm_xor_si128(B2, B0); \
- B0 = _mm_or_si128(B0, B3); \
- B3 = _mm_xor_si128(B3, B2); \
- B1 = _mm_xor_si128(B1, B0); \
- B0 = _mm_xor_si128(B0, B4); \
- B4 = _mm_or_si128(B4, B1); \
- B1 = _mm_xor_si128(B1, B3); \
- B2 = _mm_or_si128(B2, B0); \
- B2 = _mm_and_si128(B2, B4); \
- B0 = _mm_xor_si128(B0, B1); \
- B1 = _mm_and_si128(B1, B2); \
- B1 = _mm_xor_si128(B1, B0); \
- B0 = _mm_and_si128(B0, B2); \
- B4 = _mm_xor_si128(B4, B0); \
- B0 = B2; \
- B2 = B3; \
- B3 = B1; \
- B1 = B4; \
+#define SBoxE2(B0, B1, B2, B3) \
+ do { \
+ B0 = _mm_xor_si128(B0, all_ones); \
+ B2 = _mm_xor_si128(B2, all_ones); \
+ __m128i B4 = B0; \
+ B0 = _mm_and_si128(B0, B1); \
+ B2 = _mm_xor_si128(B2, B0); \
+ B0 = _mm_or_si128(B0, B3); \
+ B3 = _mm_xor_si128(B3, B2); \
+ B1 = _mm_xor_si128(B1, B0); \
+ B0 = _mm_xor_si128(B0, B4); \
+ B4 = _mm_or_si128(B4, B1); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B2 = _mm_or_si128(B2, B0); \
+ B2 = _mm_and_si128(B2, B4); \
+ B0 = _mm_xor_si128(B0, B1); \
+ B1 = _mm_and_si128(B1, B2); \
+ B1 = _mm_xor_si128(B1, B0); \
+ B0 = _mm_and_si128(B0, B2); \
+ B4 = _mm_xor_si128(B4, B0); \
+ B0 = B2; \
+ B2 = B3; \
+ B3 = B1; \
+ B1 = B4; \
} while(0);
-#define SBoxE3(B0, B1, B2, B3) \
- do { \
- __m128i B4 = B0; \
- B0 = _mm_and_si128(B0, B2); \
- B0 = _mm_xor_si128(B0, B3); \
- B2 = _mm_xor_si128(B2, B1); \
- B2 = _mm_xor_si128(B2, B0); \
- B3 = _mm_or_si128(B3, B4); \
- B3 = _mm_xor_si128(B3, B1); \
- B4 = _mm_xor_si128(B4, B2); \
- B1 = B3; \
- B3 = _mm_or_si128(B3, B4); \
- B3 = _mm_xor_si128(B3, B0); \
- B0 = _mm_and_si128(B0, B1); \
- B4 = _mm_xor_si128(B4, B0); \
- B1 = _mm_xor_si128(B1, B3); \
- B1 = _mm_xor_si128(B1, B4); \
- B4 = _mm_andnot_si128(B4, _mm_set1_epi8(0xFF)); \
- B0 = B2; \
- B2 = B1; \
- B1 = B3; \
- B3 = B4; \
+#define SBoxE3(B0, B1, B2, B3) \
+ do { \
+ __m128i B4 = B0; \
+ B0 = _mm_and_si128(B0, B2); \
+ B0 = _mm_xor_si128(B0, B3); \
+ B2 = _mm_xor_si128(B2, B1); \
+ B2 = _mm_xor_si128(B2, B0); \
+ B3 = _mm_or_si128(B3, B4); \
+ B3 = _mm_xor_si128(B3, B1); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B1 = B3; \
+ B3 = _mm_or_si128(B3, B4); \
+ B3 = _mm_xor_si128(B3, B0); \
+ B0 = _mm_and_si128(B0, B1); \
+ B4 = _mm_xor_si128(B4, B0); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B1 = _mm_xor_si128(B1, B4); \
+ B4 = _mm_xor_si128(B4, all_ones); \
+ B0 = B2; \
+ B2 = B1; \
+ B1 = B3; \
+ B3 = B4; \
} while(0);
-#define SBoxE4(B0, B1, B2, B3) \
- do { \
- __m128i B4 = B0; \
- B0 = _mm_or_si128(B0, B3); \
- B3 = _mm_xor_si128(B3, B1); \
- B1 = _mm_and_si128(B1, B4); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = _mm_xor_si128(B2, B3); \
- B3 = _mm_and_si128(B3, B0); \
- B4 = _mm_or_si128(B4, B1); \
- B3 = _mm_xor_si128(B3, B4); \
- B0 = _mm_xor_si128(B0, B1); \
- B4 = _mm_and_si128(B4, B0); \
- B1 = _mm_xor_si128(B1, B3); \
- B4 = _mm_xor_si128(B4, B2); \
- B1 = _mm_or_si128(B1, B0); \
- B1 = _mm_xor_si128(B1, B2); \
- B0 = _mm_xor_si128(B0, B3); \
- B2 = B1; \
- B1 = _mm_or_si128(B1, B3); \
- B1 = _mm_xor_si128(B1, B0); \
- B0 = B1; \
- B1 = B2; \
- B2 = B3; \
- B3 = B4; \
+#define SBoxE4(B0, B1, B2, B3) \
+ do { \
+ __m128i B4 = B0; \
+ B0 = _mm_or_si128(B0, B3); \
+ B3 = _mm_xor_si128(B3, B1); \
+ B1 = _mm_and_si128(B1, B4); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = _mm_xor_si128(B2, B3); \
+ B3 = _mm_and_si128(B3, B0); \
+ B4 = _mm_or_si128(B4, B1); \
+ B3 = _mm_xor_si128(B3, B4); \
+ B0 = _mm_xor_si128(B0, B1); \
+ B4 = _mm_and_si128(B4, B0); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B1 = _mm_or_si128(B1, B0); \
+ B1 = _mm_xor_si128(B1, B2); \
+ B0 = _mm_xor_si128(B0, B3); \
+ B2 = B1; \
+ B1 = _mm_or_si128(B1, B3); \
+ B0 = _mm_xor_si128(B0, B1); \
+ B1 = B2; \
+ B2 = B3; \
+ B3 = B4; \
} while(0);
-#define SBoxE5(B0, B1, B2, B3) \
- do { \
- B1 = _mm_xor_si128(B1, B3); \
- B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \
- B2 = _mm_xor_si128(B2, B3); \
- B3 = _mm_xor_si128(B3, B0); \
- __m128i B4 = B1; \
- B1 = _mm_and_si128(B1, B3); \
- B1 = _mm_xor_si128(B1, B2); \
- B4 = _mm_xor_si128(B4, B3); \
- B0 = _mm_xor_si128(B0, B4); \
- B2 = _mm_and_si128(B2, B4); \
- B2 = _mm_xor_si128(B2, B0); \
- B0 = _mm_and_si128(B0, B1); \
- B3 = _mm_xor_si128(B3, B0); \
- B4 = _mm_or_si128(B4, B1); \
- B4 = _mm_xor_si128(B4, B0); \
- B0 = _mm_or_si128(B0, B3); \
- B0 = _mm_xor_si128(B0, B2); \
- B2 = _mm_and_si128(B2, B3); \
- B0 = _mm_andnot_si128(B0, _mm_set1_epi8(0xFF)); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = B0; \
- B0 = B1; \
- B1 = B4; \
+#define SBoxE5(B0, B1, B2, B3) \
+ do { \
+ B1 = _mm_xor_si128(B1, B3); \
+ B3 = _mm_xor_si128(B3, all_ones); \
+ B2 = _mm_xor_si128(B2, B3); \
+ B3 = _mm_xor_si128(B3, B0); \
+ __m128i B4 = B1; \
+ B1 = _mm_and_si128(B1, B3); \
+ B1 = _mm_xor_si128(B1, B2); \
+ B4 = _mm_xor_si128(B4, B3); \
+ B0 = _mm_xor_si128(B0, B4); \
+ B2 = _mm_and_si128(B2, B4); \
+ B2 = _mm_xor_si128(B2, B0); \
+ B0 = _mm_and_si128(B0, B1); \
+ B3 = _mm_xor_si128(B3, B0); \
+ B4 = _mm_or_si128(B4, B1); \
+ B4 = _mm_xor_si128(B4, B0); \
+ B0 = _mm_or_si128(B0, B3); \
+ B0 = _mm_xor_si128(B0, B2); \
+ B2 = _mm_and_si128(B2, B3); \
+ B0 = _mm_xor_si128(B0, all_ones); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = B0; \
+ B0 = B1; \
+ B1 = B4; \
} while(0);
-#define SBoxE6(B0, B1, B2, B3) \
- do { \
- B0 = _mm_xor_si128(B0, B1); \
- B1 = _mm_xor_si128(B1, B3); \
- B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \
- __m128i B4 = B1; \
- B1 = _mm_and_si128(B1, B0); \
- B2 = _mm_xor_si128(B2, B3); \
- B1 = _mm_xor_si128(B1, B2); \
- B2 = _mm_or_si128(B2, B4); \
- B4 = _mm_xor_si128(B4, B3); \
- B3 = _mm_and_si128(B3, B1); \
- B3 = _mm_xor_si128(B3, B0); \
- B4 = _mm_xor_si128(B4, B1); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = _mm_xor_si128(B2, B0); \
- B0 = _mm_and_si128(B0, B3); \
- B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \
- B0 = _mm_xor_si128(B0, B4); \
- B4 = _mm_or_si128(B4, B3); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = B0; \
- B0 = B1; \
- B1 = B3; \
- B3 = B4; \
+#define SBoxE6(B0, B1, B2, B3) \
+ do { \
+ B0 = _mm_xor_si128(B0, B1); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B3 = _mm_xor_si128(B3, all_ones); \
+ __m128i B4 = B1; \
+ B1 = _mm_and_si128(B1, B0); \
+ B2 = _mm_xor_si128(B2, B3); \
+ B1 = _mm_xor_si128(B1, B2); \
+ B2 = _mm_or_si128(B2, B4); \
+ B4 = _mm_xor_si128(B4, B3); \
+ B3 = _mm_and_si128(B3, B1); \
+ B3 = _mm_xor_si128(B3, B0); \
+ B4 = _mm_xor_si128(B4, B1); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = _mm_xor_si128(B2, B0); \
+ B0 = _mm_and_si128(B0, B3); \
+ B2 = _mm_xor_si128(B2, all_ones); \
+ B0 = _mm_xor_si128(B0, B4); \
+ B4 = _mm_or_si128(B4, B3); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = B0; \
+ B0 = B1; \
+ B1 = B3; \
+ B3 = B4; \
} while(0);
-#define SBoxE7(B0, B1, B2, B3) \
- do { \
- B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \
- __m128i B4 = B3; \
- B3 = _mm_and_si128(B3, B0); \
- B0 = _mm_xor_si128(B0, B4); \
- B3 = _mm_xor_si128(B3, B2); \
- B2 = _mm_or_si128(B2, B4); \
- B1 = _mm_xor_si128(B1, B3); \
- B2 = _mm_xor_si128(B2, B0); \
- B0 = _mm_or_si128(B0, B1); \
- B2 = _mm_xor_si128(B2, B1); \
- B4 = _mm_xor_si128(B4, B0); \
- B0 = _mm_or_si128(B0, B3); \
- B0 = _mm_xor_si128(B0, B2); \
- B4 = _mm_xor_si128(B4, B3); \
- B4 = _mm_xor_si128(B4, B0); \
- B3 = _mm_andnot_si128(B3, _mm_set1_epi8(0xFF)); \
- B2 = _mm_and_si128(B2, B4); \
- B3 = _mm_xor_si128(B3, B2); \
- B2 = B4; \
+#define SBoxE7(B0, B1, B2, B3) \
+ do { \
+ B2 = _mm_xor_si128(B2, all_ones); \
+ __m128i B4 = B3; \
+ B3 = _mm_and_si128(B3, B0); \
+ B0 = _mm_xor_si128(B0, B4); \
+ B3 = _mm_xor_si128(B3, B2); \
+ B2 = _mm_or_si128(B2, B4); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B2 = _mm_xor_si128(B2, B0); \
+ B0 = _mm_or_si128(B0, B1); \
+ B2 = _mm_xor_si128(B2, B1); \
+ B4 = _mm_xor_si128(B4, B0); \
+ B0 = _mm_or_si128(B0, B3); \
+ B0 = _mm_xor_si128(B0, B2); \
+ B4 = _mm_xor_si128(B4, B3); \
+ B4 = _mm_xor_si128(B4, B0); \
+ B3 = _mm_xor_si128(B3, all_ones); \
+ B2 = _mm_and_si128(B2, B4); \
+ B3 = _mm_xor_si128(B3, B2); \
+ B2 = B4; \
} while(0);
-#define SBoxE8(B0, B1, B2, B3) \
- do { \
- __m128i B4 = B1; \
- B1 = _mm_or_si128(B1, B2); \
- B1 = _mm_xor_si128(B1, B3); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = _mm_xor_si128(B2, B1); \
- B3 = _mm_or_si128(B3, B4); \
- B3 = _mm_and_si128(B3, B0); \
- B4 = _mm_xor_si128(B4, B2); \
- B3 = _mm_xor_si128(B3, B1); \
- B1 = _mm_or_si128(B1, B4); \
- B1 = _mm_xor_si128(B1, B0); \
- B0 = _mm_or_si128(B0, B4); \
- B0 = _mm_xor_si128(B0, B2); \
- B1 = _mm_xor_si128(B1, B4); \
- B2 = _mm_xor_si128(B2, B1); \
- B1 = _mm_and_si128(B1, B0); \
- B1 = _mm_xor_si128(B1, B4); \
- B2 = _mm_andnot_si128(B2, _mm_set1_epi8(0xFF)); \
- B2 = _mm_or_si128(B2, B0); \
- B4 = _mm_xor_si128(B4, B2); \
- B2 = B1; \
- B1 = B3; \
- B3 = B0; \
- B0 = B4; \
+#define SBoxE8(B0, B1, B2, B3) \
+ do { \
+ __m128i B4 = B1; \
+ B1 = _mm_or_si128(B1, B2); \
+ B1 = _mm_xor_si128(B1, B3); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = _mm_xor_si128(B2, B1); \
+ B3 = _mm_or_si128(B3, B4); \
+ B3 = _mm_and_si128(B3, B0); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B3 = _mm_xor_si128(B3, B1); \
+ B1 = _mm_or_si128(B1, B4); \
+ B1 = _mm_xor_si128(B1, B0); \
+ B0 = _mm_or_si128(B0, B4); \
+ B0 = _mm_xor_si128(B0, B2); \
+ B1 = _mm_xor_si128(B1, B4); \
+ B2 = _mm_xor_si128(B2, B1); \
+ B1 = _mm_and_si128(B1, B0); \
+ B1 = _mm_xor_si128(B1, B4); \
+ B2 = _mm_xor_si128(B2, all_ones); \
+ B2 = _mm_or_si128(B2, B0); \
+ B4 = _mm_xor_si128(B4, B2); \
+ B2 = B1; \
+ B1 = B3; \
+ B3 = B0; \
+ B0 = B4; \
} while(0);
#endif