From 55608e7dd1aa593944f967f2549564e4f42b654e Mon Sep 17 00:00:00 2001
From: lloyd <lloyd@randombit.net>
Date: Thu, 31 May 2007 03:25:19 +0000
Subject: Write functions to handle loading and saving words a block at a time,
 taking into account endian differences.

The current code does not take advantage of the knowledge of which endianness
we are running on; an optimization suggested by Yves Jerschow is to use (unsafe)
casts to speed up the load/store operations. This turns out to provide large
performance increases (30% or more) in some cases.

Even without the unsafe casts, this version seems to average a few percent
faster, probably because the longer loading loops have been partially or
fully unrolled.

This also makes the code implementing low-level algorithms like ciphers and
hashes a bit more succint.
---
 src/md4.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/md4.cpp')
diff --git a/src/md4.cpp b/src/md4.cpp
index a0a32b179..b4cc4ce17 100644
--- a/src/md4.cpp
+++ b/src/md4.cpp
@@ -45,7 +45,7 @@ inline void HH(u32bit& A, u32bit B, u32bit C, u32bit D, u32bit M, byte S)
 void MD4::hash(const byte input[])
    {
    for(u32bit j = 0; j != 16; ++j)
-      M[j] = make_u32bit(input[4*j+3], input[4*j+2], input[4*j+1], input[4*j]);
+      M[j] = load_le<u32bit>(input, j);
 
    u32bit A = digest[0], B = digest[1], C = digest[2], D = digest[3];
 
@@ -78,8 +78,8 @@ void MD4::hash(const byte input[])
 *************************************************/
 void MD4::copy_out(byte output[])
    {
-   for(u32bit j = 0; j != OUTPUT_LENGTH; ++j)
-      output[j] = get_byte(3 - (j % 4), digest[j/4]);
+   for(u32bit j = 0; j != OUTPUT_LENGTH; j += 4)
+      store_le(digest[j/4], output + j);
    }
 
 /*************************************************
-- 
cgit v1.2.3