aboutsummaryrefslogtreecommitdiffstats
path: root/src/square.cpp
diff options
context:
space:
mode:
authorlloyd <[email protected]>2007-05-31 03:25:19 +0000
committerlloyd <[email protected]>2007-05-31 03:25:19 +0000
commit55608e7dd1aa593944f967f2549564e4f42b654e (patch)
treeec2ec03a762a6dac82eb608487d5394370135624 /src/square.cpp
parent22ecdc45a0efa4c444d0b7010b7cd743aeb68c57 (diff)
Write functions to handle loading and saving words a block at a time, taking into
account endian differences. The current code does not take advantage of the knowledge of which endianness we are running on; an optimization suggested by Yves Jerschow is to use (unsafe) casts to speed up the load/store operations. This turns out to provide large performance increases (30% or more) in some cases. Even without the unsafe casts, this version seems to average a few percent faster, probably because the longer loading loops have been partially or fully unrolled. This also makes the code implementing low-level algorithms like ciphers and hashes a bit more succint.
Diffstat (limited to 'src/square.cpp')
-rw-r--r--src/square.cpp34
1 files changed, 16 insertions, 18 deletions
diff --git a/src/square.cpp b/src/square.cpp
index 7d7cf1da5..988e56ef5 100644
--- a/src/square.cpp
+++ b/src/square.cpp
@@ -117,7 +117,7 @@ void Square::key(const byte key[], u32bit)
{
SecureBuffer<u32bit, 36> XEK, XDK;
for(u32bit j = 0; j != 4; ++j)
- XEK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]);
+ XEK[j] = load_be<u32bit>(key, j);
for(u32bit j = 0; j != 8; ++j)
{
XEK[4*j+4] = XEK[4*j ] ^ rotate_left(XEK[4*j+3], 8) ^ (0x01000000 << j);
@@ -149,27 +149,25 @@ void Square::transform(u32bit round_key[4])
{ 0x03, 0x02, 0x01, 0x01 },
{ 0x01, 0x03, 0x02, 0x01 },
{ 0x01, 0x01, 0x03, 0x02 } };
- SecureBuffer<byte, 4> A[4], B[4];
- for(u32bit j = 0; j != 4; ++j)
- for(u32bit k = 0; k != 4; ++k)
- A[j][k] = get_byte(k, round_key[j]);
+
for(u32bit j = 0; j != 4; ++j)
+ {
+ SecureBuffer<byte, 4> A, B;
+
+ store_be(round_key[j], A);
+
for(u32bit k = 0; k != 4; ++k)
for(u32bit l = 0; l != 4; ++l)
- B[j][k] ^= mul(A[j][l], G[l][k]);
- for(u32bit j = 0; j != 4; ++j)
- round_key[j] = make_u32bit(B[j][0], B[j][1], B[j][2], B[j][3]);
- }
+ {
+ const byte a = A[l];
+ const byte b = G[l][k];
-/*************************************************
-* Multiply in GF(2^8) *
-*************************************************/
-byte Square::mul(byte a, byte b)
- {
- if(a && b)
- return ALog[(Log[a] + Log[b]) % 255];
- else
- return 0;
+ if(a && b)
+ B[k] ^= ALog[(Log[a] + Log[b]) % 255];
+ }
+
+ round_key[j] = load_be<u32bit>(B.begin(), 0);
+ }
}
/*************************************************