aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2010-04-30 16:25:01 +0000
committerlloyd <[email protected]>2010-04-30 16:25:01 +0000
commit1e10b45b171fde455d32ed34a3aafa0bf90f3b4e (patch)
tree8444300de580a32744ba32aec5a9e77c5d66e607 /src
parent18d5d5fa3f58d2ecd15ac130eda909f44d9c6f71 (diff)
Modify the implementation of multiplication mod 65537 used in IDEA to
be branch-free. This reduces performance noticably on my Core2 (from 32 MiB/s to a bit over 27 MiB), but so it goes. The IDEA implementation using SSE2 is already branch-free here, and runs at about 135 MiB/s on my machine. Also add more IDEA tests, generated by OpenSSL
Diffstat (limited to 'src')
-rw-r--r--src/block/idea/idea.cpp23
1 files changed, 13 insertions, 10 deletions
diff --git a/src/block/idea/idea.cpp b/src/block/idea/idea.cpp
index 15ff7c0ec..0c5dfed42 100644
--- a/src/block/idea/idea.cpp
+++ b/src/block/idea/idea.cpp
@@ -1,6 +1,6 @@
/*
* IDEA
-* (C) 1999-2007 Jack Lloyd
+* (C) 1999-2010 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
@@ -17,15 +17,18 @@ namespace {
*/
inline u16bit mul(u16bit x, u16bit y)
{
- if(x && y)
- {
- u32bit T = static_cast<u32bit>(x) * y;
- x = static_cast<u16bit>(T >> 16);
- y = static_cast<u16bit>(T & 0xFFFF);
- return static_cast<u16bit>(y - x + ((y < x) ? 1 : 0));
- }
- else
- return static_cast<u16bit>(1 - x - y);
+ const u32bit P = static_cast<u32bit>(x) * y;
+
+ // P ? 0xFFFF : 0
+ const u16bit P_mask = !P - 1;
+
+ const u32bit P_hi = P >> 16;
+ const u32bit P_lo = P & 0xFFFF;
+
+ const u16bit r_1 = (P_lo - P_hi) + (P_lo < P_hi);
+ const u16bit r_2 = 1 - x - y;
+
+ return (r_1 & P_mask) | (r_2 & ~P_mask);
}
/*