aboutsummaryrefslogtreecommitdiffstats
path: root/src/utils/mul128.h
diff options
context:
space:
mode:
authorlloyd <[email protected]>2013-07-30 18:13:00 +0000
committerlloyd <[email protected]>2013-07-30 18:13:00 +0000
commit929a271f0c8e1eed79527d0663d75cd371b9841a (patch)
treec0c4d4027ed04c53e6a425107b1b7fcd2bc04803 /src/utils/mul128.h
parent1e420da500081dc11d60affc73933e980285d59e (diff)
Add a generic 64x64->128 multiplication op.
Use it to merge mp_msvc64 (was using MSVC _umul128 intrinsic) and mp_asm64 (was using inline asm) into mp_word64, which calls the new mul64x64_128 function. That function wraps any available compiler intrinsics or CPU instructions.
Diffstat (limited to 'src/utils/mul128.h')
-rw-r--r--src/utils/mul128.h123
1 files changed, 123 insertions, 0 deletions
diff --git a/src/utils/mul128.h b/src/utils/mul128.h
new file mode 100644
index 000000000..83d6f5aa6
--- /dev/null
+++ b/src/utils/mul128.h
@@ -0,0 +1,123 @@
+/*
+* 64x64->128 bit multiply operation
+* (C) 2013 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_UTIL_MUL128_H__
+#define BOTAN_UTIL_MUL128_H__
+
+#include <botan/types.h>
+
+namespace Botan {
+
+#if defined(__SIZEOF_INT128__)
+ #define BOTAN_TARGET_HAS_NATIVE_UINT128
+ typedef unsigned __int128 uint128_t;
+
+#elif (BOTAN_GCC_VERSION > 440) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+ #define BOTAN_TARGET_HAS_NATIVE_UINT128
+ typedef unsigned int uint128_t __attribute__((mode(TI)));
+#endif
+
+}
+
+#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
+ do { \
+ const uint128_t r = (uint128_t)a * b; \
+ *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \
+ *lo = (r ) & 0xFFFFFFFFFFFFFFFF; \
+ } while(0)
+
+#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+
+#include <intrin.h>
+#pragma intrinsic(_umul128)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
+ do { *lo = _umul128(a, b, hi); } while(0)
+
+#elif defined(BOTAN_USE_GCC_INLINE_ASM)
+
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \
+ } while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \
+ *lo = a * b; \
+} while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \
+ *lo = a * b; \
+} while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \
+ *lo = a * b; \
+} while(0)
+
+#endif
+
+#endif
+
+namespace Botan {
+
+/**
+* Perform a 64x64->128 bit multiplication
+*/
+inline void mul64x64_128(u64bit a, u64bit b, u64bit* lo, u64bit* hi)
+ {
+#if defined(BOTAN_FAST_64X64_MUL)
+ BOTAN_FAST_64X64_MUL(a, b, lo, hi);
+#else
+
+ /*
+ * Do a 64x64->128 multiply using four 32x32->64 multiplies plus
+ * some adds and shifts. Last resort for CPUs like UltraSPARC (with
+ * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs.
+ */
+ const size_t HWORD_BITS = 32;
+ const u32bit HWORD_MASK = 0xFFFFFFFF;
+
+ const u32bit a_hi = (a >> HWORD_BITS);
+ const u32bit a_lo = (a & HWORD_MASK);
+ const u32bit b_hi = (b >> HWORD_BITS);
+ const u32bit b_lo = (b & HWORD_MASK);
+
+ u64bit x0 = static_cast<u64bit>(a_hi) * b_hi;
+ u64bit x1 = static_cast<u64bit>(a_lo) * b_hi;
+ u64bit x2 = static_cast<u64bit>(a_hi) * b_lo;
+ u64bit x3 = static_cast<u64bit>(a_lo) * b_lo;
+
+ // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1
+ x2 += x3 >> HWORD_BITS;
+
+ // this one can overflow
+ x2 += x1;
+
+ // propagate the carry if any
+ x0 += static_cast<u64bit>(static_cast<bool>(x2 < x1)) << HWORD_BITS;
+
+ *hi = x0 + (x2 >> HWORD_BITS);
+ *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);
+#endif
+ }
+
+}
+
+}
+
+#endif