1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
/*
* 64x64->128 bit multiply operation
* (C) 2013 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
#ifndef BOTAN_UTIL_MUL128_H__
#define BOTAN_UTIL_MUL128_H__
#include <botan/types.h>
namespace Botan {
#if defined(__SIZEOF_INT128__)
#define BOTAN_TARGET_HAS_NATIVE_UINT128
typedef unsigned __int128 uint128_t;
#elif (BOTAN_GCC_VERSION > 440) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
#define BOTAN_TARGET_HAS_NATIVE_UINT128
typedef unsigned int uint128_t __attribute__((mode(TI)));
#endif
}
#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
do { \
const uint128_t r = (uint128_t)a * b; \
*hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \
*lo = (r ) & 0xFFFFFFFFFFFFFFFF; \
} while(0)
#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
#include <intrin.h>
#pragma intrinsic(_umul128)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
do { *lo = _umul128(a, b, hi); } while(0)
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \
} while(0)
#elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \
*lo = a * b; \
} while(0)
#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \
*lo = a * b; \
} while(0)
#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \
*lo = a * b; \
} while(0)
#endif
#endif
namespace Botan {
/**
* Perform a 64x64->128 bit multiplication
*/
inline void mul64x64_128(u64bit a, u64bit b, u64bit* lo, u64bit* hi)
{
#if defined(BOTAN_FAST_64X64_MUL)
BOTAN_FAST_64X64_MUL(a, b, lo, hi);
#else
/*
* Do a 64x64->128 multiply using four 32x32->64 multiplies plus
* some adds and shifts. Last resort for CPUs like UltraSPARC (with
* 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs.
*/
const size_t HWORD_BITS = 32;
const u32bit HWORD_MASK = 0xFFFFFFFF;
const u32bit a_hi = (a >> HWORD_BITS);
const u32bit a_lo = (a & HWORD_MASK);
const u32bit b_hi = (b >> HWORD_BITS);
const u32bit b_lo = (b & HWORD_MASK);
u64bit x0 = static_cast<u64bit>(a_hi) * b_hi;
u64bit x1 = static_cast<u64bit>(a_lo) * b_hi;
u64bit x2 = static_cast<u64bit>(a_hi) * b_lo;
u64bit x3 = static_cast<u64bit>(a_lo) * b_lo;
// this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1
x2 += x3 >> HWORD_BITS;
// this one can overflow
x2 += x1;
// propagate the carry if any
x0 += static_cast<u64bit>(static_cast<bool>(x2 < x1)) << HWORD_BITS;
*hi = x0 + (x2 >> HWORD_BITS);
*lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);
#endif
}
}
#endif
|