aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/block/serpent_simd/serp_simd_sbox.h5
-rw-r--r--src/hash/fork256/fork256.cpp6
-rw-r--r--src/hash/gost_3411/gost_3411.cpp8
-rw-r--r--src/hash/md4/md4.cpp55
-rw-r--r--src/hash/sha1_amd64/sha1_amd64_imp.S10
-rw-r--r--src/hash/sha2/sha2_64.cpp6
-rw-r--r--src/hash/whirlpool/whrlpool.cpp6
-rw-r--r--src/utils/simd_32/simd_32.h2
-rw-r--r--src/utils/simd_32/simd_altivec.h6
-rw-r--r--src/utils/simd_32/simd_scalar.h9
-rw-r--r--src/utils/simd_32/simd_sse.h9
11 files changed, 73 insertions, 49 deletions
diff --git a/src/block/serpent_simd/serp_simd_sbox.h b/src/block/serpent_simd/serp_simd_sbox.h
index 6e3da7359..fe70f0063 100644
--- a/src/block/serpent_simd/serp_simd_sbox.h
+++ b/src/block/serpent_simd/serp_simd_sbox.h
@@ -76,11 +76,10 @@
B4 ^= B0; \
B1 ^= B3; \
B1 ^= B4; \
- B4 = ~B4; \
B0 = B2; \
B2 = B1; \
B1 = B3; \
- B3 = B4; \
+ B3 = ~B4; \
} while(0);
#define SBoxE4(B0, B1, B2, B3) \
@@ -165,7 +164,7 @@
#define SBoxE7(B0, B1, B2, B3) \
do { \
- B2 = ~B2; \
+ B2 = ~B2; \
SIMD_32 B4 = B3; \
B3 &= B0; \
B0 ^= B4; \
diff --git a/src/hash/fork256/fork256.cpp b/src/hash/fork256/fork256.cpp
index 6718f9f97..bd85dfd7c 100644
--- a/src/hash/fork256/fork256.cpp
+++ b/src/hash/fork256/fork256.cpp
@@ -66,9 +66,7 @@ void FORK_256::compress_n(const byte input[], u32bit blocks)
G1 = G2 = G3 = G4 = digest[6];
H1 = H2 = H3 = H4 = digest[7];
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_be<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_be(M.begin(), input, M.size());
step(A1, B1, C1, D1, E1, F1, G1, H1, M[ 0], M[ 1], DELTA[ 0], DELTA[ 1]);
step(A2, B2, C2, D2, E2, F2, G2, H2, M[14], M[15], DELTA[15], DELTA[14]);
@@ -118,6 +116,8 @@ void FORK_256::compress_n(const byte input[], u32bit blocks)
digest[5] += (F1 + F2) ^ (F3 + F4);
digest[6] += (G1 + G2) ^ (G3 + G4);
digest[7] += (H1 + H2) ^ (H3 + H4);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/gost_3411/gost_3411.cpp b/src/hash/gost_3411/gost_3411.cpp
index 90ef3e805..16b1311da 100644
--- a/src/hash/gost_3411/gost_3411.cpp
+++ b/src/hash/gost_3411/gost_3411.cpp
@@ -79,12 +79,8 @@ void GOST_34_11::compress_n(const byte input[], u32bit blocks)
byte S[32] = { 0 };
u64bit U[4], V[4];
-
- for(u32bit j = 0; j != 4; ++j)
- {
- U[j] = load_be<u64bit>(hash, j);
- V[j] = load_be<u64bit>(input + 32*i, j);
- }
+ load_be(U, hash, 4);
+ load_be(V, input + 32*i, 4);
for(u32bit j = 0; j != 4; ++j)
{
diff --git a/src/hash/md4/md4.cpp b/src/hash/md4/md4.cpp
index c50c73a8d..f573dae25 100644
--- a/src/hash/md4/md4.cpp
+++ b/src/hash/md4/md4.cpp
@@ -51,36 +51,41 @@ void MD4::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- //load_le(M.begin(), input, M.size());
- for(u32bit j = 0; j != 16; ++j)
- M[j] = load_le<u32bit>(input, j);
- input += HASH_BLOCK_SIZE;
-
- FF(A,B,C,D,M[ 0], 3); FF(D,A,B,C,M[ 1], 7); FF(C,D,A,B,M[ 2],11);
- FF(B,C,D,A,M[ 3],19); FF(A,B,C,D,M[ 4], 3); FF(D,A,B,C,M[ 5], 7);
- FF(C,D,A,B,M[ 6],11); FF(B,C,D,A,M[ 7],19); FF(A,B,C,D,M[ 8], 3);
- FF(D,A,B,C,M[ 9], 7); FF(C,D,A,B,M[10],11); FF(B,C,D,A,M[11],19);
- FF(A,B,C,D,M[12], 3); FF(D,A,B,C,M[13], 7); FF(C,D,A,B,M[14],11);
- FF(B,C,D,A,M[15],19);
-
- GG(A,B,C,D,M[ 0], 3); GG(D,A,B,C,M[ 4], 5); GG(C,D,A,B,M[ 8], 9);
- GG(B,C,D,A,M[12],13); GG(A,B,C,D,M[ 1], 3); GG(D,A,B,C,M[ 5], 5);
- GG(C,D,A,B,M[ 9], 9); GG(B,C,D,A,M[13],13); GG(A,B,C,D,M[ 2], 3);
- GG(D,A,B,C,M[ 6], 5); GG(C,D,A,B,M[10], 9); GG(B,C,D,A,M[14],13);
- GG(A,B,C,D,M[ 3], 3); GG(D,A,B,C,M[ 7], 5); GG(C,D,A,B,M[11], 9);
- GG(B,C,D,A,M[15],13);
-
- HH(A,B,C,D,M[ 0], 3); HH(D,A,B,C,M[ 8], 9); HH(C,D,A,B,M[ 4],11);
- HH(B,C,D,A,M[12],15); HH(A,B,C,D,M[ 2], 3); HH(D,A,B,C,M[10], 9);
- HH(C,D,A,B,M[ 6],11); HH(B,C,D,A,M[14],15); HH(A,B,C,D,M[ 1], 3);
- HH(D,A,B,C,M[ 9], 9); HH(C,D,A,B,M[ 5],11); HH(B,C,D,A,M[13],15);
- HH(A,B,C,D,M[ 3], 3); HH(D,A,B,C,M[11], 9); HH(C,D,A,B,M[ 7],11);
- HH(B,C,D,A,M[15],15);
+ load_le(M.begin(), input, M.size());
+
+ FF(A,B,C,D,M[ 0], 3); FF(D,A,B,C,M[ 1], 7);
+ FF(C,D,A,B,M[ 2],11); FF(B,C,D,A,M[ 3],19);
+ FF(A,B,C,D,M[ 4], 3); FF(D,A,B,C,M[ 5], 7);
+ FF(C,D,A,B,M[ 6],11); FF(B,C,D,A,M[ 7],19);
+ FF(A,B,C,D,M[ 8], 3); FF(D,A,B,C,M[ 9], 7);
+ FF(C,D,A,B,M[10],11); FF(B,C,D,A,M[11],19);
+ FF(A,B,C,D,M[12], 3); FF(D,A,B,C,M[13], 7);
+ FF(C,D,A,B,M[14],11); FF(B,C,D,A,M[15],19);
+
+ GG(A,B,C,D,M[ 0], 3); GG(D,A,B,C,M[ 4], 5);
+ GG(C,D,A,B,M[ 8], 9); GG(B,C,D,A,M[12],13);
+ GG(A,B,C,D,M[ 1], 3); GG(D,A,B,C,M[ 5], 5);
+ GG(C,D,A,B,M[ 9], 9); GG(B,C,D,A,M[13],13);
+ GG(A,B,C,D,M[ 2], 3); GG(D,A,B,C,M[ 6], 5);
+ GG(C,D,A,B,M[10], 9); GG(B,C,D,A,M[14],13);
+ GG(A,B,C,D,M[ 3], 3); GG(D,A,B,C,M[ 7], 5);
+ GG(C,D,A,B,M[11], 9); GG(B,C,D,A,M[15],13);
+
+ HH(A,B,C,D,M[ 0], 3); HH(D,A,B,C,M[ 8], 9);
+ HH(C,D,A,B,M[ 4],11); HH(B,C,D,A,M[12],15);
+ HH(A,B,C,D,M[ 2], 3); HH(D,A,B,C,M[10], 9);
+ HH(C,D,A,B,M[ 6],11); HH(B,C,D,A,M[14],15);
+ HH(A,B,C,D,M[ 1], 3); HH(D,A,B,C,M[ 9], 9);
+ HH(C,D,A,B,M[ 5],11); HH(B,C,D,A,M[13],15);
+ HH(A,B,C,D,M[ 3], 3); HH(D,A,B,C,M[11], 9);
+ HH(C,D,A,B,M[ 7],11); HH(B,C,D,A,M[15],15);
A = (digest[0] += A);
B = (digest[1] += B);
C = (digest[2] += C);
D = (digest[3] += D);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/sha1_amd64/sha1_amd64_imp.S b/src/hash/sha1_amd64/sha1_amd64_imp.S
index 34a8318ed..ba4d4d5ef 100644
--- a/src/hash/sha1_amd64/sha1_amd64_imp.S
+++ b/src/hash/sha1_amd64/sha1_amd64_imp.S
@@ -103,10 +103,16 @@ ALIGN;
subq $320, W
+/*
+* Using negative values for SHA-1 constants > 2^31 to work around
+* a bug in binutils not accepting large lea displacements.
+* -0x70E44324 == 0x8F1BBCDC
+* -0x359D3E2A == 0xCA62C1D6
+*/
#define MAGIC1 0x5A827999
#define MAGIC2 0x6ED9EBA1
-#define MAGIC3 0x8F1BBCDC
-#define MAGIC4 0xCA62C1D6
+#define MAGIC3 -0x70E44324
+#define MAGIC4 -0x359D3E2A
#define T %esi
#define T2 %eax
diff --git a/src/hash/sha2/sha2_64.cpp b/src/hash/sha2/sha2_64.cpp
index e260d8338..3e7c0e228 100644
--- a/src/hash/sha2/sha2_64.cpp
+++ b/src/hash/sha2/sha2_64.cpp
@@ -55,9 +55,7 @@ void SHA_384_512_BASE::compress_n(const byte input[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 16; ++j)
- W[j] = load_be<u64bit>(input, j);
- input += HASH_BLOCK_SIZE;
+ load_be(W.begin(), input, 16);
for(u32bit j = 16; j != 80; j += 8)
{
@@ -160,6 +158,8 @@ void SHA_384_512_BASE::compress_n(const byte input[], u32bit blocks)
F = (digest[5] += F);
G = (digest[6] += G);
H = (digest[7] += H);
+
+ input += HASH_BLOCK_SIZE;
}
}
diff --git a/src/hash/whirlpool/whrlpool.cpp b/src/hash/whirlpool/whrlpool.cpp
index b7a02a9b6..06755fe77 100644
--- a/src/hash/whirlpool/whrlpool.cpp
+++ b/src/hash/whirlpool/whrlpool.cpp
@@ -25,9 +25,7 @@ void Whirlpool::compress_n(const byte in[], u32bit blocks)
for(u32bit i = 0; i != blocks; ++i)
{
- for(u32bit j = 0; j != 8; ++j)
- M[j] = load_be<u64bit>(in, j);
- in += HASH_BLOCK_SIZE;
+ load_be(M.begin(), in, M.size());
u64bit K0, K1, K2, K3, K4, K5, K6, K7;
K0 = digest[0]; K1 = digest[1]; K2 = digest[2]; K3 = digest[3];
@@ -121,6 +119,8 @@ void Whirlpool::compress_n(const byte in[], u32bit blocks)
digest[5] ^= B5 ^ M[5];
digest[6] ^= B6 ^ M[6];
digest[7] ^= B7 ^ M[7];
+
+ in += HASH_BLOCK_SIZE;
}
}
diff --git a/src/utils/simd_32/simd_32.h b/src/utils/simd_32/simd_32.h
index be426efd6..324db1a7d 100644
--- a/src/utils/simd_32/simd_32.h
+++ b/src/utils/simd_32/simd_32.h
@@ -10,8 +10,6 @@
#include <botan/types.h>
-//#define BOTAN_TARGET_CPU_HAS_SSE2
-
#if defined(BOTAN_TARGET_CPU_HAS_SSE2)
#include <botan/simd_sse.h>
diff --git a/src/utils/simd_32/simd_altivec.h b/src/utils/simd_32/simd_altivec.h
index e1aa62002..c6dd8a289 100644
--- a/src/utils/simd_32/simd_altivec.h
+++ b/src/utils/simd_32/simd_altivec.h
@@ -168,6 +168,12 @@ class SIMD_Altivec
return vec_nor(reg, reg);
}
+ SIMD_Altivec andc(const SIMD_Altivec& other)
+ {
+ // AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
+ return vec_andc(other.reg, reg);
+ }
+
SIMD_Altivec bswap() const
{
__vector unsigned char perm = vec_lvsl(0, (u32bit*)0);
diff --git a/src/utils/simd_32/simd_scalar.h b/src/utils/simd_32/simd_scalar.h
index 5fc20b462..398503d33 100644
--- a/src/utils/simd_32/simd_scalar.h
+++ b/src/utils/simd_32/simd_scalar.h
@@ -171,6 +171,15 @@ class SIMD_Scalar
return SIMD_Scalar(~R0, ~R1, ~R2, ~R3);
}
+ // (~reg) & other
+ SIMD_Scalar andc(const SIMD_Scalar& other)
+ {
+ return SIMD_Scalar(~R0 & other.R0,
+ ~R1 & other.R1,
+ ~R2 & other.R2,
+ ~R3 & other.R3);
+ }
+
SIMD_Scalar bswap() const
{
return SIMD_Scalar(reverse_bytes(R0),
diff --git a/src/utils/simd_32/simd_sse.h b/src/utils/simd_32/simd_sse.h
index c45d8032f..81d8afe75 100644
--- a/src/utils/simd_32/simd_sse.h
+++ b/src/utils/simd_32/simd_sse.h
@@ -117,8 +117,13 @@ class SIMD_SSE2
SIMD_SSE2 operator~() const
{
- static const __m128i all_ones = _mm_set1_epi32(0xFFFFFFFF);
- return _mm_xor_si128(reg, all_ones);
+ return _mm_xor_si128(reg, _mm_set1_epi32(0xFFFFFFFF));
+ }
+
+ // (~reg) & other
+ SIMD_SSE2 andc(const SIMD_SSE2& other)
+ {
+ return _mm_andnot_si128(reg, other.reg);
}
SIMD_SSE2 bswap() const