Write functions to handle loading and saving words a block at a time, taking into

account endian differences. The current code does not take advantage of the knowledge of which endianness we are running on; an optimization suggested by Yves Jerschow is to use (unsafe) casts to speed up the load/store operations. This turns out to provide large performance increases (30% or more) in some cases. Even without the unsafe casts, this version seems to average a few percent faster, probably because the longer loading loops have been partially or fully unrolled. This also makes the code implementing low-level algorithms like ciphers and hashes a bit more succint.
author: lloyd <[email protected]> 2007-05-31 03:25:19 +0000
committer: lloyd <[email protected]> 2007-05-31 03:25:19 +0000
commit: 55608e7dd1aa593944f967f2549564e4f42b654e (patch)
tree: ec2ec03a762a6dac82eb608487d5394370135624 /src/idea.cpp
parent: 22ecdc45a0efa4c444d0b7010b7cd743aeb68c57 (diff)
1 files changed, 16 insertions, 13 deletions
diff --git a/src/idea.cpp b/src/idea.cpp
index 1f2facbb8..ed142ca9b 100644
--- a/src/idea.cpp
+++ b/src/idea.cpp
@@ -33,8 +33,10 @@ inline void mul(u16bit& a, u16bit b)
 *************************************************/
 void IDEA::enc(const byte in[], byte out[]) const
    {
-   u16bit X1 = make_u16bit(in[0], in[1]), X2 = make_u16bit(in[2], in[3]),
-          X3 = make_u16bit(in[4], in[5]), X4 = make_u16bit(in[6], in[7]);
+   u16bit X1 = load_be<u16bit>(in, 0);
+   u16bit X2 = load_be<u16bit>(in, 1);
+   u16bit X3 = load_be<u16bit>(in, 2);
+   u16bit X4 = load_be<u16bit>(in, 3);
 
    for(u32bit j = 0; j != 8; ++j)
       {
@@ -57,10 +59,7 @@ void IDEA::enc(const byte in[], byte out[]) const
 
    mul(X1, EK[48]); X2 += EK[50]; X3 += EK[49]; mul(X4, EK[51]);
 
-   out[0] = get_byte(0, X1); out[1] = get_byte(1, X1);
-   out[2] = get_byte(0, X3); out[3] = get_byte(1, X3);
-   out[4] = get_byte(0, X2); out[5] = get_byte(1, X2);
-   out[6] = get_byte(0, X4); out[7] = get_byte(1, X4);
+   store_be(out, X1, X3, X2, X4);
    }
 
 /*************************************************
@@ -68,8 +67,11 @@ void IDEA::enc(const byte in[], byte out[]) const
 *************************************************/
 void IDEA::dec(const byte in[], byte out[]) const
    {
-   u16bit X1 = make_u16bit(in[0], in[1]), X2 = make_u16bit(in[2], in[3]),
-          X3 = make_u16bit(in[4], in[5]), X4 = make_u16bit(in[6], in[7]);
+   u16bit X1 = load_be<u16bit>(in, 0);
+   u16bit X2 = load_be<u16bit>(in, 1);
+   u16bit X3 = load_be<u16bit>(in, 2);
+   u16bit X4 = load_be<u16bit>(in, 3);
+
    for(u32bit j = 0; j != 8; ++j)
       {
       mul(X1, DK[6*j+0]);
@@ -91,10 +93,7 @@ void IDEA::dec(const byte in[], byte out[]) const
 
    mul(X1, DK[48]); X2 += DK[50]; X3 += DK[49]; mul(X4, DK[51]);
 
-   out[0] = get_byte(0, X1); out[1] = get_byte(1, X1);
-   out[2] = get_byte(0, X3); out[3] = get_byte(1, X3);
-   out[4] = get_byte(0, X2); out[5] = get_byte(1, X2);
-   out[6] = get_byte(0, X4); out[7] = get_byte(1, X4);
+   store_be(out, X1, X3, X2, X4);
    }
 
 /*************************************************
@@ -125,17 +124,20 @@ u16bit IDEA::mul_inv(u16bit x)
 void IDEA::key(const byte key[], u32bit)
    {
    for(u32bit j = 0; j != 8; ++j)
-      EK[j] = make_u16bit(key[2*j], key[2*j+1]);
+      EK[j] = load_be<u16bit>(key, j);
+
    for(u32bit j = 1, k = 8, offset = 0; k != 52; j %= 8, ++j, ++k)
       {
       EK[j+7+offset] = (u16bit)((EK[(j     % 8) + offset] << 9) |
                                 (EK[((j+1) % 8) + offset] >> 7));
       offset += (j == 8) ? 8 : 0;
       }
+
    DK[51] = mul_inv(EK[3]);
    DK[50] = (u16bit)-EK[2];
    DK[49] = (u16bit)-EK[1];
    DK[48] = mul_inv(EK[0]);
+
    for(u32bit j = 1, k = 4, counter = 47; j != 8; ++j, k += 6)
       {
       DK[counter--] = EK[k+1];
@@ -145,6 +147,7 @@ void IDEA::key(const byte key[], u32bit)
       DK[counter--] = (u16bit)-EK[k+4];
       DK[counter--] = mul_inv(EK[k+2]);
       }
+
    DK[5] = EK[47];
    DK[4] = EK[46];
    DK[3] = mul_inv(EK[51]);
author	lloyd <[email protected]>	2007-05-31 03:25:19 +0000
committer	lloyd <[email protected]>	2007-05-31 03:25:19 +0000
commit	55608e7dd1aa593944f967f2549564e4f42b654e (patch)
tree	ec2ec03a762a6dac82eb608487d5394370135624 /src/idea.cpp
parent	22ecdc45a0efa4c444d0b7010b7cd743aeb68c57 (diff)