aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlloyd <[email protected]>2013-07-30 18:13:00 +0000
committerlloyd <[email protected]>2013-07-30 18:13:00 +0000
commit929a271f0c8e1eed79527d0663d75cd371b9841a (patch)
treec0c4d4027ed04c53e6a425107b1b7fcd2bc04803
parent1e420da500081dc11d60affc73933e980285d59e (diff)
Add a generic 64x64->128 multiplication op.
Use it to merge mp_msvc64 (was using MSVC _umul128 intrinsic) and mp_asm64 (was using inline asm) into mp_word64, which calls the new mul64x64_128 function. That function wraps any available compiler intrinsics or CPU instructions.
-rwxr-xr-xconfigure.py12
-rw-r--r--src/build-data/arch/alpha.txt1
-rw-r--r--src/build-data/arch/ia64.txt2
-rw-r--r--src/build-data/arch/mips64.txt2
-rw-r--r--src/build-data/arch/ppc64.txt1
-rw-r--r--src/build-data/arch/s390x.txt1
-rw-r--r--src/build-data/arch/sparc64.txt1
-rw-r--r--src/build-data/arch/x86_64.txt1
-rw-r--r--src/math/mp/info.txt2
-rw-r--r--src/math/mp/mp_asm64/info.txt24
-rw-r--r--src/math/mp/mp_asm64/mp_asm.h120
-rw-r--r--src/math/mp/mp_msvc64/mp_asm.h61
-rw-r--r--src/math/mp/mp_word64/info.txt (renamed from src/math/mp/mp_msvc64/info.txt)15
-rw-r--r--src/math/mp/mp_word64/mp_asm.h57
-rw-r--r--src/utils/info.txt1
-rw-r--r--src/utils/mul128.h123
16 files changed, 210 insertions, 214 deletions
diff --git a/configure.py b/configure.py
index 338af8766..87ec8aa4a 100755
--- a/configure.py
+++ b/configure.py
@@ -645,13 +645,17 @@ class ArchInfo(object):
['aliases', 'submodels', 'submodel_aliases', 'isa_extensions'],
{ 'endian': None,
'family': None,
- 'unaligned': 'no'
+ 'unaligned': 'no',
+ 'wordsize': None
})
self.submodel_aliases = force_to_dict(self.submodel_aliases)
self.unaligned_ok = (1 if self.unaligned == 'ok' else 0)
+ if self.wordsize is not None:
+ self.wordsize = int(self.wordsize)
+
"""
Return a list of all submodels for this arch, ordered longest
to shortest
@@ -697,6 +701,12 @@ class ArchInfo(object):
if self.family is not None:
macros.append('TARGET_CPU_IS_%s_FAMILY' % (self.family.upper()))
+ if self.wordsize is not None:
+ macros.append('TARGET_CPU_NATIVE_WORD_SIZE %d' % (self.wordsize))
+
+ if self.wordsize == 64:
+ macros.append('TARGET_CPU_HAS_NATIVE_64BIT')
+
macros.append('TARGET_UNALIGNED_MEMORY_ACCESS_OK %d' % (unaligned_ok))
return macros
diff --git a/src/build-data/arch/alpha.txt b/src/build-data/arch/alpha.txt
index 2bf72edef..233691b9e 100644
--- a/src/build-data/arch/alpha.txt
+++ b/src/build-data/arch/alpha.txt
@@ -1,4 +1,5 @@
endian little
+wordsize 64
<aliases>
axp
diff --git a/src/build-data/arch/ia64.txt b/src/build-data/arch/ia64.txt
index 55967d5ab..46b40eff8 100644
--- a/src/build-data/arch/ia64.txt
+++ b/src/build-data/arch/ia64.txt
@@ -1,3 +1,5 @@
+wordsize 64
+
<aliases>
itanium
itanic
diff --git a/src/build-data/arch/mips64.txt b/src/build-data/arch/mips64.txt
index d6f481346..9a56a0334 100644
--- a/src/build-data/arch/mips64.txt
+++ b/src/build-data/arch/mips64.txt
@@ -1,3 +1,5 @@
+wordsize 64
+
<aliases>
mips64el
</aliases>
diff --git a/src/build-data/arch/ppc64.txt b/src/build-data/arch/ppc64.txt
index 07436c19d..fa1dab674 100644
--- a/src/build-data/arch/ppc64.txt
+++ b/src/build-data/arch/ppc64.txt
@@ -1,6 +1,7 @@
endian big
family ppc
+wordsize 64
<aliases>
powerpc64
diff --git a/src/build-data/arch/s390x.txt b/src/build-data/arch/s390x.txt
index 6f4271607..0fec592b4 100644
--- a/src/build-data/arch/s390x.txt
+++ b/src/build-data/arch/s390x.txt
@@ -1,5 +1,6 @@
endian big
unaligned ok
+wordsize 64
<submodels>
s390x
diff --git a/src/build-data/arch/sparc64.txt b/src/build-data/arch/sparc64.txt
index 3a6acd6c3..62dd69be9 100644
--- a/src/build-data/arch/sparc64.txt
+++ b/src/build-data/arch/sparc64.txt
@@ -1,5 +1,6 @@
family sparc
+wordsize 64
<submodels>
ultrasparc
diff --git a/src/build-data/arch/x86_64.txt b/src/build-data/arch/x86_64.txt
index 608249101..e3e6f18e1 100644
--- a/src/build-data/arch/x86_64.txt
+++ b/src/build-data/arch/x86_64.txt
@@ -1,5 +1,6 @@
endian little
unaligned ok
+wordsize 64
family x86
diff --git a/src/math/mp/info.txt b/src/math/mp/info.txt
index bf7f40d3c..531eee4e4 100644
--- a/src/math/mp/info.txt
+++ b/src/math/mp/info.txt
@@ -19,5 +19,5 @@ mp_core.h
</header:internal>
<requires>
-mp_x86_64|mp_msvc64|mp_asm64|mp_x86_32|mp_x86_32_msvc|mp_generic
+mp_x86_64|mp_word64|mp_x86_32|mp_x86_32_msvc|mp_generic
</requires>
diff --git a/src/math/mp/mp_asm64/info.txt b/src/math/mp/mp_asm64/info.txt
deleted file mode 100644
index 9af7c4ae7..000000000
--- a/src/math/mp/mp_asm64/info.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-mp_bits 64
-
-load_on dep
-
-<header:internal>
-mp_asm.h
-mp_generic:mp_asmi.h
-</header:internal>
-
-<arch>
-alpha
-ia64
-mips64
-ppc64
-sparc64
-</arch>
-
-# The inline asm only works with gcc, but it looks like (at least on
-# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25%
-# win, so it's probably worth using elsewhere.
-<cc>
-gcc
-sunstudio
-</cc>
diff --git a/src/math/mp/mp_asm64/mp_asm.h b/src/math/mp/mp_asm64/mp_asm.h
deleted file mode 100644
index 625ea1c4f..000000000
--- a/src/math/mp/mp_asm64/mp_asm.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-* MPI Multiply-Add Core
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_MADD_H__
-#define BOTAN_MP_MADD_H__
-
-#include <botan/mp_types.h>
-
-namespace Botan {
-
-#if (BOTAN_MP_WORD_BITS != 64)
- #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-#if defined(BOTAN_TARGET_ARCH_IS_ALPHA)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do { \
- asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \
- z1 = a * b; \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do { \
- asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \
- z1 = a * b; \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do { \
- asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \
- z1 = a * b; \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do { \
- typedef unsigned int uint128_t __attribute__((mode(TI))); \
- uint128_t r = (uint128_t)a * b; \
- z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \
- z1 = (r ) & 0xFFFFFFFFFFFFFFFF; \
-} while(0);
-
-#else
-
-// Do a 64x64->128 multiply using four 64x64->64 multiplies
-// plus some adds and shifts. Last resort for CPUs like UltraSPARC,
-// with 64-bit registers/ALU, but no 64x64->128 multiply.
-inline void bigint_2word_mul(word a, word b, word* z1, word* z0)
- {
- const size_t MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2;
- const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1;
-
- const word a_hi = (a >> MP_HWORD_BITS);
- const word a_lo = (a & MP_HWORD_MASK);
- const word b_hi = (b >> MP_HWORD_BITS);
- const word b_lo = (b & MP_HWORD_MASK);
-
- word x0 = a_hi * b_hi;
- word x1 = a_lo * b_hi;
- word x2 = a_hi * b_lo;
- word x3 = a_lo * b_lo;
-
- x2 += x3 >> (MP_HWORD_BITS);
- x2 += x1;
-
- if(x2 < x1) // timing channel
- x0 += ((word)1 << MP_HWORD_BITS);
-
- *z0 = x0 + (x2 >> MP_HWORD_BITS);
- *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK);
- }
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0)
-
-#endif
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd2(word a, word b, word* c)
- {
- word z0 = 0, z1 = 0;
-
- BOTAN_WORD_MUL(a, b, z1, z0);
-
- z1 += *c;
- z0 += (z1 < *c);
-
- *c = z0;
- return z1;
- }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
- {
- word z0 = 0, z1 = 0;
-
- BOTAN_WORD_MUL(a, b, z1, z0);
-
- z1 += c;
- z0 += (z1 < c);
-
- z1 += *d;
- z0 += (z1 < *d);
-
- *d = z0;
- return z1;
- }
-
-}
-
-#endif
diff --git a/src/math/mp/mp_msvc64/mp_asm.h b/src/math/mp/mp_msvc64/mp_asm.h
deleted file mode 100644
index 8e4535c35..000000000
--- a/src/math/mp/mp_msvc64/mp_asm.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-* Multiply-Add for 64-bit MSVC
-* (C) 2010 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_H__
-#define BOTAN_MP_ASM_H__
-
-#include <botan/mp_types.h>
-#include <intrin.h>
-
-#if (BOTAN_MP_WORD_BITS != 64)
- #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-#pragma intrinsic(_umul128)
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
- {
- word hi, lo;
- lo = _umul128(a, b, &hi);
-
- lo += *c;
- hi += (lo < *c); // carry?
-
- *c = hi;
- return lo;
- }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
- {
- word hi, lo;
- lo = _umul128(a, b, &hi);
-
- lo += c;
- hi += (lo < c); // carry?
-
- lo += *d;
- hi += (lo < *d); // carry?
-
- *d = hi;
- return lo;
- }
-
-}
-
-}
-
-#endif
diff --git a/src/math/mp/mp_msvc64/info.txt b/src/math/mp/mp_word64/info.txt
index fa7d90fed..a12221f4e 100644
--- a/src/math/mp/mp_msvc64/info.txt
+++ b/src/math/mp/mp_word64/info.txt
@@ -1,17 +1,18 @@
-load_on dep
-
mp_bits 64
+load_on dep
+
<header:internal>
mp_asm.h
mp_generic:mp_asmi.h
</header:internal>
<arch>
-x86_64
+alpha
ia64
+mips64
+ppc64
+s390x
+sparc64
+x86_64
</arch>
-
-<cc>
-msvc
-</cc>
diff --git a/src/math/mp/mp_word64/mp_asm.h b/src/math/mp/mp_word64/mp_asm.h
new file mode 100644
index 000000000..76d2bb918
--- /dev/null
+++ b/src/math/mp/mp_word64/mp_asm.h
@@ -0,0 +1,57 @@
+/*
+* MPI Multiply-Add Core
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_MADD_H__
+#define BOTAN_MP_MADD_H__
+
+#include <botan/mp_types.h>
+#include <botan/internal/mul128.h>
+
+namespace Botan {
+
+#if (BOTAN_MP_WORD_BITS != 64)
+ #error The mp_word64 module requires that BOTAN_MP_WORD_BITS == 64
+#endif
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd2(word a, word b, word* c)
+ {
+ word z0 = 0, z1 = 0;
+
+ mul64x64_128(a, b, &z1, &z0);
+
+ z1 += *c;
+ z0 += (z1 < *c);
+
+ *c = z0;
+ return z1;
+ }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+ {
+ word z0 = 0, z1 = 0;
+
+ mul64x64_128(a, b, &z1, &z0);
+
+ z1 += c;
+ z0 += (z1 < c);
+
+ z1 += *d;
+ z0 += (z1 < *d);
+
+ *d = z0;
+ return z1;
+ }
+
+}
+
+#endif
diff --git a/src/utils/info.txt b/src/utils/info.txt
index b8e72f42e..1d77b87a7 100644
--- a/src/utils/info.txt
+++ b/src/utils/info.txt
@@ -15,6 +15,7 @@ zero_mem.cpp
<header:internal>
bit_ops.h
+mul128.h
prefetch.h
rounding.h
semaphore.h
diff --git a/src/utils/mul128.h b/src/utils/mul128.h
new file mode 100644
index 000000000..83d6f5aa6
--- /dev/null
+++ b/src/utils/mul128.h
@@ -0,0 +1,123 @@
+/*
+* 64x64->128 bit multiply operation
+* (C) 2013 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_UTIL_MUL128_H__
+#define BOTAN_UTIL_MUL128_H__
+
+#include <botan/types.h>
+
+namespace Botan {
+
+#if defined(__SIZEOF_INT128__)
+ #define BOTAN_TARGET_HAS_NATIVE_UINT128
+ typedef unsigned __int128 uint128_t;
+
+#elif (BOTAN_GCC_VERSION > 440) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+ #define BOTAN_TARGET_HAS_NATIVE_UINT128
+ typedef unsigned int uint128_t __attribute__((mode(TI)));
+#endif
+
+}
+
+#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
+ do { \
+ const uint128_t r = (uint128_t)a * b; \
+ *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \
+ *lo = (r ) & 0xFFFFFFFFFFFFFFFF; \
+ } while(0)
+
+#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
+
+#include <intrin.h>
+#pragma intrinsic(_umul128)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
+ do { *lo = _umul128(a, b, hi); } while(0)
+
+#elif defined(BOTAN_USE_GCC_INLINE_ASM)
+
+#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \
+ } while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \
+ *lo = a * b; \
+} while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \
+ *lo = a * b; \
+} while(0)
+
+#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
+
+#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
+ asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \
+ *lo = a * b; \
+} while(0)
+
+#endif
+
+#endif
+
+namespace Botan {
+
+/**
+* Perform a 64x64->128 bit multiplication
+*/
+inline void mul64x64_128(u64bit a, u64bit b, u64bit* lo, u64bit* hi)
+ {
+#if defined(BOTAN_FAST_64X64_MUL)
+ BOTAN_FAST_64X64_MUL(a, b, lo, hi);
+#else
+
+ /*
+ * Do a 64x64->128 multiply using four 32x32->64 multiplies plus
+ * some adds and shifts. Last resort for CPUs like UltraSPARC (with
+ * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs.
+ */
+ const size_t HWORD_BITS = 32;
+ const u32bit HWORD_MASK = 0xFFFFFFFF;
+
+ const u32bit a_hi = (a >> HWORD_BITS);
+ const u32bit a_lo = (a & HWORD_MASK);
+ const u32bit b_hi = (b >> HWORD_BITS);
+ const u32bit b_lo = (b & HWORD_MASK);
+
+ u64bit x0 = static_cast<u64bit>(a_hi) * b_hi;
+ u64bit x1 = static_cast<u64bit>(a_lo) * b_hi;
+ u64bit x2 = static_cast<u64bit>(a_hi) * b_lo;
+ u64bit x3 = static_cast<u64bit>(a_lo) * b_lo;
+
+ // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1
+ x2 += x3 >> HWORD_BITS;
+
+ // this one can overflow
+ x2 += x1;
+
+ // propagate the carry if any
+ x0 += static_cast<u64bit>(static_cast<bool>(x2 < x1)) << HWORD_BITS;
+
+ *hi = x0 + (x2 >> HWORD_BITS);
+ *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);
+#endif
+ }
+
+}
+
+}
+
+#endif