From 1518c30f1c90c2d0e5e06731e3dffe21353b34db Mon Sep 17 00:00:00 2001 From: lloyd Date: Sat, 15 Nov 2014 14:35:19 +0000 Subject: Add specialized reduction for P-521 along with 9x9 Comba routines. Roughly 35-50% faster on my laptop (depending on if mlock is enabled, the overhead in that allocator is becoming much more of a hotspot). --- src/lib/math/bigint/bigint.cpp | 11 +- src/lib/math/bigint/bigint.h | 5 + src/lib/math/ec_gfp/curve_gfp.cpp | 32 ++++++ src/lib/math/ec_gfp/curve_gfp.h | 18 +++ src/lib/math/ec_gfp/curve_nistp.cpp | 95 ++++++++++++++++ src/lib/math/ec_gfp/curve_nistp.h | 75 +++++++++++++ src/lib/math/ec_gfp/info.txt | 4 + src/lib/math/ec_gfp/point_gfp.cpp | 8 +- src/lib/math/mp/mp_comba.cpp | 214 +++++++++++++++++++++++++++++++++++- src/lib/math/mp/mp_core.h | 2 + src/lib/math/mp/mp_karat.cpp | 9 ++ 11 files changed, 461 insertions(+), 12 deletions(-) create mode 100644 src/lib/math/ec_gfp/curve_nistp.cpp create mode 100644 src/lib/math/ec_gfp/curve_nistp.h (limited to 'src/lib') diff --git a/src/lib/math/bigint/bigint.cpp b/src/lib/math/bigint/bigint.cpp index 059b019e4..90a319c5a 100644 --- a/src/lib/math/bigint/bigint.cpp +++ b/src/lib/math/bigint/bigint.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace Botan { @@ -208,7 +209,6 @@ void BigInt::clear_bit(size_t n) void BigInt::mask_bits(size_t n) { if(n == 0) { clear(); return; } - if(n >= bits()) return; const size_t top_word = n / MP_WORD_BITS; const word mask = (static_cast(1) << (n % MP_WORD_BITS)) - 1; @@ -237,13 +237,8 @@ size_t BigInt::bits() const if(words == 0) return 0; - size_t full_words = words - 1, top_bits = MP_WORD_BITS; - word top_word = word_at(full_words), mask = MP_WORD_TOP_BIT; - - while(top_bits && ((top_word & mask) == 0)) - { mask >>= 1; top_bits--; } - - return (full_words * MP_WORD_BITS + top_bits); + const size_t full_words = words - 1; + return (full_words * MP_WORD_BITS + high_bit(word_at(full_words))); } /* diff --git a/src/lib/math/bigint/bigint.h b/src/lib/math/bigint/bigint.h index 0d9b43357..2205c7e83 100644 --- a/src/lib/math/bigint/bigint.h +++ b/src/lib/math/bigint/bigint.h @@ -120,6 +120,11 @@ class BOTAN_DLL BigInt std::swap(m_signedness, other.m_signedness); } + void swap_reg(secure_vector& reg) + { + m_reg.swap(reg); + } + /** * += operator * @param y the BigInt to add to this diff --git a/src/lib/math/ec_gfp/curve_gfp.cpp b/src/lib/math/ec_gfp/curve_gfp.cpp index 2fa9f391e..2ee4ae41e 100644 --- a/src/lib/math/ec_gfp/curve_gfp.cpp +++ b/src/lib/math/ec_gfp/curve_gfp.cpp @@ -6,6 +6,7 @@ */ #include +#include #include namespace Botan { @@ -37,6 +38,8 @@ class CurveGFp_Montgomery : public CurveGFp_Repr const BigInt& get_b_rep() const override { return m_b_r; } + size_t get_p_words() const override { return m_p_words; } + void to_curve_rep(BigInt& x, secure_vector& ws) const override; void from_curve_rep(BigInt& x, secure_vector& ws) const override; @@ -113,9 +116,38 @@ void CurveGFp_Montgomery::curve_sqr(BigInt& z, const BigInt& x, } +// Default implementation +void CurveGFp_Repr::normalize(BigInt& x, secure_vector& ws, size_t /*bound*/) const + { + const BigInt& p = get_p(); + + while(x.is_negative()) + x += p; + + const size_t p_words = get_p_words(); + const word* prime = p.data(); + + x.grow_to(p_words + 1); + + if(ws.size() < p_words + 1) + ws.resize(p_words + 1); + + //FIXME: take into account bound if > 0 + while(true) + { + if(bigint_sub3(&ws[0], x.data(), p_words, prime, p_words)) // borrow? + break; + + x.swap_reg(ws); + } + } + std::shared_ptr CurveGFp::choose_repr(const BigInt& p, const BigInt& a, const BigInt& b) { + if(p == CurveGFp_P521::prime()) + return std::shared_ptr(new CurveGFp_P521(a, b)); + return std::shared_ptr(new CurveGFp_Montgomery(p, a, b)); } diff --git a/src/lib/math/ec_gfp/curve_gfp.h b/src/lib/math/ec_gfp/curve_gfp.h index 67fb3d2cf..59639d537 100644 --- a/src/lib/math/ec_gfp/curve_gfp.h +++ b/src/lib/math/ec_gfp/curve_gfp.h @@ -24,6 +24,8 @@ class CurveGFp_Repr virtual const BigInt& get_a() const = 0; virtual const BigInt& get_b() const = 0; + virtual size_t get_p_words() const = 0; + /* * Returns to_curve_rep(get_a()) */ @@ -43,6 +45,10 @@ class CurveGFp_Repr virtual void curve_sqr(BigInt& z, const BigInt& x, secure_vector& ws) const = 0; + + virtual void normalize(BigInt& x, + secure_vector& ws, + size_t bound) const; }; /** @@ -109,6 +115,8 @@ class BOTAN_DLL CurveGFp return xt; } + // TODO: from_rep taking && ref + void mul(BigInt& z, const BigInt& x, const BigInt& y, secure_vector& ws) const { m_repr->curve_mul(z, x, y, ws); @@ -133,6 +141,16 @@ class BOTAN_DLL CurveGFp return z; } + /** + * Adjust x to be in [0,p) + * @param bound if greater than zero, assume that no more than bound + * additions or subtractions are required to move x into range. + */ + void normalize(BigInt& x, secure_vector& ws, size_t bound = 0) const + { + m_repr->normalize(x, ws, bound); + } + void swap(CurveGFp& other) { std::swap(m_repr, other.m_repr); diff --git a/src/lib/math/ec_gfp/curve_nistp.cpp b/src/lib/math/ec_gfp/curve_nistp.cpp new file mode 100644 index 000000000..1aac01f56 --- /dev/null +++ b/src/lib/math/ec_gfp/curve_nistp.cpp @@ -0,0 +1,95 @@ +/* +* NIST curve reduction +* (C) 2014 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#include +#include +#include +#include + +namespace Botan { + +void CurveGFp_NIST::curve_mul(BigInt& z, const BigInt& x, const BigInt& y, + secure_vector& ws) const + { + if(x.is_zero() || y.is_zero()) + { + z = 0; + return; + } + + const size_t p_words = get_p_words(); + const size_t output_size = 2*p_words + 1; + ws.resize(2*(p_words+2)); + + z.grow_to(output_size); + z.clear(); + + bigint_mul(z.mutable_data(), output_size, &ws[0], + x.data(), x.size(), x.sig_words(), + y.data(), y.size(), y.sig_words()); + + this->redc(z, ws); + } + +void CurveGFp_NIST::curve_sqr(BigInt& z, const BigInt& x, + secure_vector& ws) const + { + if(x.is_zero()) + { + z = 0; + return; + } + + const size_t p_words = get_p_words(); + const size_t output_size = 2*p_words + 1; + + ws.resize(2*(p_words+2)); + + z.grow_to(output_size); + z.clear(); + + bigint_sqr(z.mutable_data(), output_size, &ws[0], + x.data(), x.size(), x.sig_words()); + + this->redc(z, ws); + } + +//static +const BigInt& CurveGFp_P521::prime() + { + static const BigInt p521("0x1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + + return p521; + } + +void CurveGFp_P521::redc(BigInt& x, secure_vector& ws) const + { + const size_t p_words = get_p_words(); + + const size_t shift_words = 521 / MP_WORD_BITS, + shift_bits = 521 % MP_WORD_BITS; + + const size_t x_sw = x.sig_words(); + + if(x_sw < p_words) + return; // already smaller + + if(ws.size() < p_words + 1) + ws.resize(p_words + 1); + + clear_mem(&ws[0], ws.size()); + bigint_shr2(&ws[0], x.data(), x_sw, shift_words, shift_bits); + + x.mask_bits(521); + + bigint_add3(x.mutable_data(), x.data(), p_words, &ws[0], p_words); + + normalize(x, ws, max_redc_subtractions()); + } + +} diff --git a/src/lib/math/ec_gfp/curve_nistp.h b/src/lib/math/ec_gfp/curve_nistp.h new file mode 100644 index 000000000..ffa32d377 --- /dev/null +++ b/src/lib/math/ec_gfp/curve_nistp.h @@ -0,0 +1,75 @@ +/* +* NIST elliptic curves over GF(p) +* (C) 2014 Jack Lloyd +* +* Distributed under the terms of the Botan license +*/ + +#ifndef BOTAN_GFP_CURVE_NIST_H__ +#define BOTAN_GFP_CURVE_NIST_H__ + +#include +#include + +namespace Botan { + +class CurveGFp_NIST : public CurveGFp_Repr + { + public: + CurveGFp_NIST(size_t p_bits, const BigInt& a, const BigInt& b) : + m_a(a), m_b(b), m_p_words((p_bits + BOTAN_MP_WORD_BITS - 1) / BOTAN_MP_WORD_BITS) + { + } + + size_t get_p_words() const override { return m_p_words; } + + const BigInt& get_a() const override { return m_a; } + + const BigInt& get_b() const override { return m_b; } + + const BigInt& get_a_rep() const override { return m_a; } + + const BigInt& get_b_rep() const override { return m_b; } + + void to_curve_rep(BigInt& x, secure_vector& ws) const override + { redc(x, ws); } + + void from_curve_rep(BigInt& x, secure_vector& ws) const override + { redc(x, ws); } + + void curve_mul(BigInt& z, const BigInt& x, const BigInt& y, + secure_vector& ws) const override; + + void curve_sqr(BigInt& z, const BigInt& x, + secure_vector& ws) const override; + private: + virtual void redc(BigInt& x, secure_vector& ws) const = 0; + + virtual size_t max_redc_subtractions() const = 0; + + // Curve parameters + BigInt m_a, m_b; + size_t m_p_words; // cache of m_p.sig_words() + }; + +/** +* The NIST P-521 curve +*/ +class CurveGFp_P521 : public CurveGFp_NIST + { + public: + CurveGFp_P521(const BigInt& a, const BigInt& b) : CurveGFp_NIST(521, a, b) {} + + static const BigInt& prime(); + + const BigInt& get_p() const override { return CurveGFp_P521::prime(); } + + private: + void redc(BigInt& x, secure_vector& ws) const override; + + size_t max_redc_subtractions() const override { return 1; } + }; + +} + +#endif diff --git a/src/lib/math/ec_gfp/info.txt b/src/lib/math/ec_gfp/info.txt index 2e67f608e..9af40c752 100644 --- a/src/lib/math/ec_gfp/info.txt +++ b/src/lib/math/ec_gfp/info.txt @@ -7,6 +7,10 @@ curve_gfp.h point_gfp.h + +curve_nistp.h + + numbertheory diff --git a/src/lib/math/ec_gfp/point_gfp.cpp b/src/lib/math/ec_gfp/point_gfp.cpp index 3d244d0f0..6bae35e5f 100644 --- a/src/lib/math/ec_gfp/point_gfp.cpp +++ b/src/lib/math/ec_gfp/point_gfp.cpp @@ -479,18 +479,20 @@ BigInt decompress_point(bool yMod2, { BigInt xpow3 = x * x * x; + const BigInt& p = curve.get_p(); + BigInt g = curve.get_a() * x; g += xpow3; g += curve.get_b(); - g = g % curve.get_p(); + g = g % p; - BigInt z = ressol(g, curve.get_p()); + BigInt z = ressol(g, p); if(z < 0) throw Illegal_Point("error during EC point decompression"); if(z.get_bit(0) != yMod2) - z = curve.get_p() - z; + z = p - z; return z; } diff --git a/src/lib/math/mp/mp_comba.cpp b/src/lib/math/mp/mp_comba.cpp index 99dcda176..b56a59291 100644 --- a/src/lib/math/mp/mp_comba.cpp +++ b/src/lib/math/mp/mp_comba.cpp @@ -1,6 +1,6 @@ /* * Comba Multiplication and Squaring -* (C) 1999-2007,2011 Jack Lloyd +* (C) 1999-2007,2011,2014 Jack Lloyd * * Distributed under the terms of the Botan license */ @@ -381,6 +381,218 @@ void bigint_comba_mul8(word z[16], const word x[8], const word y[8]) z[15] = w0; } +/* +* Comba 9x9 Squaring +*/ +void bigint_comba_sqr9(word z[18], const word x[9]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); + z[11] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); + z[13] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]); + z[14] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); + z[15] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]); + z[16] = w1; + z[17] = w2; + } + +/* +* Comba 9x9 Multiplication +*/ +void bigint_comba_mul9(word z[18], const word x[9], const word y[9]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); + z[10] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); + z[11] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); + z[12] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); + z[13] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); + z[14] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); + z[15] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); + z[16] = w1; + z[17] = w2; + } + /* * Comba 16x16 Squaring */ diff --git a/src/lib/math/mp/mp_core.h b/src/lib/math/mp/mp_core.h index c25cb994f..64b29b869 100644 --- a/src/lib/math/mp/mp_core.h +++ b/src/lib/math/mp/mp_core.h @@ -151,11 +151,13 @@ word bigint_modop(word n1, word n0, word d); void bigint_comba_mul4(word z[8], const word x[4], const word y[4]); void bigint_comba_mul6(word z[12], const word x[6], const word y[6]); void bigint_comba_mul8(word z[16], const word x[8], const word y[8]); +void bigint_comba_mul9(word z[18], const word x[9], const word y[9]); void bigint_comba_mul16(word z[32], const word x[16], const word y[16]); void bigint_comba_sqr4(word out[8], const word in[4]); void bigint_comba_sqr6(word out[12], const word in[6]); void bigint_comba_sqr8(word out[16], const word in[8]); +void bigint_comba_sqr9(word out[18], const word in[9]); void bigint_comba_sqr16(word out[32], const word in[16]); } diff --git a/src/lib/math/mp/mp_karat.cpp b/src/lib/math/mp/mp_karat.cpp index b549a05c8..62620f83d 100644 --- a/src/lib/math/mp/mp_karat.cpp +++ b/src/lib/math/mp/mp_karat.cpp @@ -237,6 +237,11 @@ void bigint_mul(word z[], size_t z_size, word workspace[], { bigint_comba_mul8(z, x, y); } + else if(x_sw <= 9 && x_size >= 9 && + y_sw <= 9 && y_size >= 9 && z_size >= 18) + { + bigint_comba_mul9(z, x, y); + } else if(x_sw <= 16 && x_size >= 16 && y_sw <= 16 && y_size >= 16 && z_size >= 32) { @@ -281,6 +286,10 @@ void bigint_sqr(word z[], size_t z_size, word workspace[], { bigint_comba_sqr8(z, x); } + else if(x_sw == 9 && x_size >= 9 && z_size >= 18) + { + bigint_comba_sqr9(z, x); + } else if(x_sw <= 16 && x_size >= 16 && z_size >= 32) { bigint_comba_sqr16(z, x); -- cgit v1.2.3