diff options
author | lloyd <[email protected]> | 2010-04-30 16:25:01 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2010-04-30 16:25:01 +0000 |
commit | 1e10b45b171fde455d32ed34a3aafa0bf90f3b4e (patch) | |
tree | 8444300de580a32744ba32aec5a9e77c5d66e607 /src/block | |
parent | 18d5d5fa3f58d2ecd15ac130eda909f44d9c6f71 (diff) |
Modify the implementation of multiplication mod 65537 used in IDEA to
be branch-free. This reduces performance noticably on my Core2 (from
32 MiB/s to a bit over 27 MiB), but so it goes.
The IDEA implementation using SSE2 is already branch-free here, and
runs at about 135 MiB/s on my machine.
Also add more IDEA tests, generated by OpenSSL
Diffstat (limited to 'src/block')
-rw-r--r-- | src/block/idea/idea.cpp | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/src/block/idea/idea.cpp b/src/block/idea/idea.cpp index 15ff7c0ec..0c5dfed42 100644 --- a/src/block/idea/idea.cpp +++ b/src/block/idea/idea.cpp @@ -1,6 +1,6 @@ /* * IDEA -* (C) 1999-2007 Jack Lloyd +* (C) 1999-2010 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -17,15 +17,18 @@ namespace { */ inline u16bit mul(u16bit x, u16bit y) { - if(x && y) - { - u32bit T = static_cast<u32bit>(x) * y; - x = static_cast<u16bit>(T >> 16); - y = static_cast<u16bit>(T & 0xFFFF); - return static_cast<u16bit>(y - x + ((y < x) ? 1 : 0)); - } - else - return static_cast<u16bit>(1 - x - y); + const u32bit P = static_cast<u32bit>(x) * y; + + // P ? 0xFFFF : 0 + const u16bit P_mask = !P - 1; + + const u32bit P_hi = P >> 16; + const u32bit P_lo = P & 0xFFFF; + + const u16bit r_1 = (P_lo - P_hi) + (P_lo < P_hi); + const u16bit r_2 = 1 - x - y; + + return (r_1 & P_mask) | (r_2 & ~P_mask); } /* |