diff options
author | Jack Lloyd <[email protected]> | 2018-04-18 10:47:51 -0400 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2018-04-18 10:47:51 -0400 |
commit | b05abd8e76edac867204695ac959fc898e495ffd (patch) | |
tree | bb38d40e3fa44d22776fbe572c3f2e9e6e9e4734 /src/lib/math | |
parent | 812b100b5e6612a8916f03624ea51fbb24e87434 (diff) |
P-192 optimizations
5-7% faster for ECDSA and ECDH
Diffstat (limited to 'src/lib/math')
-rw-r--r-- | src/lib/math/numbertheory/nistp_redc.cpp | 98 |
1 files changed, 64 insertions, 34 deletions
diff --git a/src/lib/math/numbertheory/nistp_redc.cpp b/src/lib/math/numbertheory/nistp_redc.cpp index 63b2447d9..2f2b32163 100644 --- a/src/lib/math/numbertheory/nistp_redc.cpp +++ b/src/lib/math/numbertheory/nistp_redc.cpp @@ -139,60 +139,90 @@ const BigInt& prime_p192() void redc_p192(BigInt& x, secure_vector<word>& ws) { - const uint32_t X6 = get_uint32_t(x, 6); - const uint32_t X7 = get_uint32_t(x, 7); - const uint32_t X8 = get_uint32_t(x, 8); - const uint32_t X9 = get_uint32_t(x, 9); - const uint32_t X10 = get_uint32_t(x, 10); - const uint32_t X11 = get_uint32_t(x, 11); + static const size_t p192_limbs = 192 / BOTAN_MP_WORD_BITS; + + const uint64_t X00 = get_uint32_t(x, 0); + const uint64_t X01 = get_uint32_t(x, 1); + const uint64_t X02 = get_uint32_t(x, 2); + const uint64_t X03 = get_uint32_t(x, 3); + const uint64_t X04 = get_uint32_t(x, 4); + const uint64_t X05 = get_uint32_t(x, 5); + const uint64_t X06 = get_uint32_t(x, 6); + const uint64_t X07 = get_uint32_t(x, 7); + const uint64_t X08 = get_uint32_t(x, 8); + const uint64_t X09 = get_uint32_t(x, 9); + const uint64_t X10 = get_uint32_t(x, 10); + const uint64_t X11 = get_uint32_t(x, 11); + + const uint64_t S0 = X00 + X06 + X10; + const uint64_t S1 = X01 + X07 + X11; + const uint64_t S2 = X02 + X06 + X08 + X10; + const uint64_t S3 = X03 + X07 + X09 + X11; + const uint64_t S4 = X04 + X08 + X10; + const uint64_t S5 = X05 + X09 + X11; x.mask_bits(192); uint64_t S = 0; + uint32_t R0 = 0, R1 = 0; - S += get_uint32_t(x, 0); - S += X6; - S += X10; - set_uint32_t(x, 0, S); + S += S0; + R0 = static_cast<uint32_t>(S); S >>= 32; - S += get_uint32_t(x, 1); - S += X7; - S += X11; - set_uint32_t(x, 1, S); + S += S1; + R1 = static_cast<uint32_t>(S); S >>= 32; - S += get_uint32_t(x, 2); - S += X6; - S += X8; - S += X10; - set_uint32_t(x, 2, S); + set_words(x, 0, R0, R1); + + S += S2; + R0 = static_cast<uint32_t>(S); S >>= 32; - S += get_uint32_t(x, 3); - S += X7; - S += X9; - S += X11; - set_uint32_t(x, 3, S); + S += S3; + R1 = static_cast<uint32_t>(S); S >>= 32; - S += get_uint32_t(x, 4); - S += X8; - S += X10; - set_uint32_t(x, 4, S); + set_words(x, 2, R0, R1); + + S += S4; + R0 = static_cast<uint32_t>(S); S >>= 32; - S += get_uint32_t(x, 5); - S += X9; - S += X11; - set_uint32_t(x, 5, S); + S += S5; + R1 = static_cast<uint32_t>(S); S >>= 32; - set_uint32_t(x, 6, S); + set_words(x, 4, R0, R1); // No underflow possible - x.reduce_below(prime_p192(), ws); + BOTAN_ASSERT(S <= 2, "Expected overflow in P-192 reduce"); + + /* + This is a table of (i*P-192) % 2**192 for i in 1...3 + */ + static const word p192_mults[3][p192_limbs] = { +#if (BOTAN_MP_WORD_BITS == 64) + {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF}, +#else + {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, +#endif + }; + + word borrow = bigint_sub2(x.mutable_data(), x.size(), p192_mults[S], p192_limbs); + + BOTAN_ASSERT(borrow == 0 || borrow == 1, "Expected borrow during P-192 reduction"); + + if(borrow) + { + bigint_add2(x.mutable_data(), x.size() - 1, p192_mults[0], p192_limbs); + } } const BigInt& prime_p224() |