diff options
author | lloyd <[email protected]> | 2007-05-31 03:25:19 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2007-05-31 03:25:19 +0000 |
commit | 55608e7dd1aa593944f967f2549564e4f42b654e (patch) | |
tree | ec2ec03a762a6dac82eb608487d5394370135624 /src/square.cpp | |
parent | 22ecdc45a0efa4c444d0b7010b7cd743aeb68c57 (diff) |
Write functions to handle loading and saving words a block at a time, taking into
account endian differences.
The current code does not take advantage of the knowledge of which endianness
we are running on; an optimization suggested by Yves Jerschow is to use (unsafe)
casts to speed up the load/store operations. This turns out to provide large
performance increases (30% or more) in some cases.
Even without the unsafe casts, this version seems to average a few percent
faster, probably because the longer loading loops have been partially or
fully unrolled.
This also makes the code implementing low-level algorithms like ciphers and
hashes a bit more succint.
Diffstat (limited to 'src/square.cpp')
-rw-r--r-- | src/square.cpp | 34 |
1 files changed, 16 insertions, 18 deletions
diff --git a/src/square.cpp b/src/square.cpp index 7d7cf1da5..988e56ef5 100644 --- a/src/square.cpp +++ b/src/square.cpp @@ -117,7 +117,7 @@ void Square::key(const byte key[], u32bit) { SecureBuffer<u32bit, 36> XEK, XDK; for(u32bit j = 0; j != 4; ++j) - XEK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]); + XEK[j] = load_be<u32bit>(key, j); for(u32bit j = 0; j != 8; ++j) { XEK[4*j+4] = XEK[4*j ] ^ rotate_left(XEK[4*j+3], 8) ^ (0x01000000 << j); @@ -149,27 +149,25 @@ void Square::transform(u32bit round_key[4]) { 0x03, 0x02, 0x01, 0x01 }, { 0x01, 0x03, 0x02, 0x01 }, { 0x01, 0x01, 0x03, 0x02 } }; - SecureBuffer<byte, 4> A[4], B[4]; - for(u32bit j = 0; j != 4; ++j) - for(u32bit k = 0; k != 4; ++k) - A[j][k] = get_byte(k, round_key[j]); + for(u32bit j = 0; j != 4; ++j) + { + SecureBuffer<byte, 4> A, B; + + store_be(round_key[j], A); + for(u32bit k = 0; k != 4; ++k) for(u32bit l = 0; l != 4; ++l) - B[j][k] ^= mul(A[j][l], G[l][k]); - for(u32bit j = 0; j != 4; ++j) - round_key[j] = make_u32bit(B[j][0], B[j][1], B[j][2], B[j][3]); - } + { + const byte a = A[l]; + const byte b = G[l][k]; -/************************************************* -* Multiply in GF(2^8) * -*************************************************/ -byte Square::mul(byte a, byte b) - { - if(a && b) - return ALog[(Log[a] + Log[b]) % 255]; - else - return 0; + if(a && b) + B[k] ^= ALog[(Log[a] + Log[b]) % 255]; + } + + round_key[j] = load_be<u32bit>(B.begin(), 0); + } } /************************************************* |