aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/math
diff options
context:
space:
mode:
authorlloyd <[email protected]>2014-11-15 14:35:19 +0000
committerlloyd <[email protected]>2014-11-15 14:35:19 +0000
commit1518c30f1c90c2d0e5e06731e3dffe21353b34db (patch)
treec2f819f2a2011a7af6052ede3b32638412b546d0 /src/lib/math
parent17349a1fc49d604f8160f2077538fdf397b702c6 (diff)
Add specialized reduction for P-521 along with 9x9 Comba routines.
Roughly 35-50% faster on my laptop (depending on if mlock is enabled, the overhead in that allocator is becoming much more of a hotspot).
Diffstat (limited to 'src/lib/math')
-rw-r--r--src/lib/math/bigint/bigint.cpp11
-rw-r--r--src/lib/math/bigint/bigint.h5
-rw-r--r--src/lib/math/ec_gfp/curve_gfp.cpp32
-rw-r--r--src/lib/math/ec_gfp/curve_gfp.h18
-rw-r--r--src/lib/math/ec_gfp/curve_nistp.cpp95
-rw-r--r--src/lib/math/ec_gfp/curve_nistp.h75
-rw-r--r--src/lib/math/ec_gfp/info.txt4
-rw-r--r--src/lib/math/ec_gfp/point_gfp.cpp8
-rw-r--r--src/lib/math/mp/mp_comba.cpp214
-rw-r--r--src/lib/math/mp/mp_core.h2
-rw-r--r--src/lib/math/mp/mp_karat.cpp9
11 files changed, 461 insertions, 12 deletions
diff --git a/src/lib/math/bigint/bigint.cpp b/src/lib/math/bigint/bigint.cpp
index 059b019e4..90a319c5a 100644
--- a/src/lib/math/bigint/bigint.cpp
+++ b/src/lib/math/bigint/bigint.cpp
@@ -10,6 +10,7 @@
#include <botan/get_byte.h>
#include <botan/parsing.h>
#include <botan/internal/rounding.h>
+#include <botan/internal/bit_ops.h>
namespace Botan {
@@ -208,7 +209,6 @@ void BigInt::clear_bit(size_t n)
void BigInt::mask_bits(size_t n)
{
if(n == 0) { clear(); return; }
- if(n >= bits()) return;
const size_t top_word = n / MP_WORD_BITS;
const word mask = (static_cast<word>(1) << (n % MP_WORD_BITS)) - 1;
@@ -237,13 +237,8 @@ size_t BigInt::bits() const
if(words == 0)
return 0;
- size_t full_words = words - 1, top_bits = MP_WORD_BITS;
- word top_word = word_at(full_words), mask = MP_WORD_TOP_BIT;
-
- while(top_bits && ((top_word & mask) == 0))
- { mask >>= 1; top_bits--; }
-
- return (full_words * MP_WORD_BITS + top_bits);
+ const size_t full_words = words - 1;
+ return (full_words * MP_WORD_BITS + high_bit(word_at(full_words)));
}
/*
diff --git a/src/lib/math/bigint/bigint.h b/src/lib/math/bigint/bigint.h
index 0d9b43357..2205c7e83 100644
--- a/src/lib/math/bigint/bigint.h
+++ b/src/lib/math/bigint/bigint.h
@@ -120,6 +120,11 @@ class BOTAN_DLL BigInt
std::swap(m_signedness, other.m_signedness);
}
+ void swap_reg(secure_vector<word>& reg)
+ {
+ m_reg.swap(reg);
+ }
+
/**
* += operator
* @param y the BigInt to add to this
diff --git a/src/lib/math/ec_gfp/curve_gfp.cpp b/src/lib/math/ec_gfp/curve_gfp.cpp
index 2fa9f391e..2ee4ae41e 100644
--- a/src/lib/math/ec_gfp/curve_gfp.cpp
+++ b/src/lib/math/ec_gfp/curve_gfp.cpp
@@ -6,6 +6,7 @@
*/
#include <botan/curve_gfp.h>
+#include <botan/internal/curve_nistp.h>
#include <botan/internal/mp_core.h>
namespace Botan {
@@ -37,6 +38,8 @@ class CurveGFp_Montgomery : public CurveGFp_Repr
const BigInt& get_b_rep() const override { return m_b_r; }
+ size_t get_p_words() const override { return m_p_words; }
+
void to_curve_rep(BigInt& x, secure_vector<word>& ws) const override;
void from_curve_rep(BigInt& x, secure_vector<word>& ws) const override;
@@ -113,9 +116,38 @@ void CurveGFp_Montgomery::curve_sqr(BigInt& z, const BigInt& x,
}
+// Default implementation
+void CurveGFp_Repr::normalize(BigInt& x, secure_vector<word>& ws, size_t /*bound*/) const
+ {
+ const BigInt& p = get_p();
+
+ while(x.is_negative())
+ x += p;
+
+ const size_t p_words = get_p_words();
+ const word* prime = p.data();
+
+ x.grow_to(p_words + 1);
+
+ if(ws.size() < p_words + 1)
+ ws.resize(p_words + 1);
+
+ //FIXME: take into account bound if > 0
+ while(true)
+ {
+ if(bigint_sub3(&ws[0], x.data(), p_words, prime, p_words)) // borrow?
+ break;
+
+ x.swap_reg(ws);
+ }
+ }
+
std::shared_ptr<CurveGFp_Repr>
CurveGFp::choose_repr(const BigInt& p, const BigInt& a, const BigInt& b)
{
+ if(p == CurveGFp_P521::prime())
+ return std::shared_ptr<CurveGFp_Repr>(new CurveGFp_P521(a, b));
+
return std::shared_ptr<CurveGFp_Repr>(new CurveGFp_Montgomery(p, a, b));
}
diff --git a/src/lib/math/ec_gfp/curve_gfp.h b/src/lib/math/ec_gfp/curve_gfp.h
index 67fb3d2cf..59639d537 100644
--- a/src/lib/math/ec_gfp/curve_gfp.h
+++ b/src/lib/math/ec_gfp/curve_gfp.h
@@ -24,6 +24,8 @@ class CurveGFp_Repr
virtual const BigInt& get_a() const = 0;
virtual const BigInt& get_b() const = 0;
+ virtual size_t get_p_words() const = 0;
+
/*
* Returns to_curve_rep(get_a())
*/
@@ -43,6 +45,10 @@ class CurveGFp_Repr
virtual void curve_sqr(BigInt& z, const BigInt& x,
secure_vector<word>& ws) const = 0;
+
+ virtual void normalize(BigInt& x,
+ secure_vector<word>& ws,
+ size_t bound) const;
};
/**
@@ -109,6 +115,8 @@ class BOTAN_DLL CurveGFp
return xt;
}
+ // TODO: from_rep taking && ref
+
void mul(BigInt& z, const BigInt& x, const BigInt& y, secure_vector<word>& ws) const
{
m_repr->curve_mul(z, x, y, ws);
@@ -133,6 +141,16 @@ class BOTAN_DLL CurveGFp
return z;
}
+ /**
+ * Adjust x to be in [0,p)
+ * @param bound if greater than zero, assume that no more than bound
+ * additions or subtractions are required to move x into range.
+ */
+ void normalize(BigInt& x, secure_vector<word>& ws, size_t bound = 0) const
+ {
+ m_repr->normalize(x, ws, bound);
+ }
+
void swap(CurveGFp& other)
{
std::swap(m_repr, other.m_repr);
diff --git a/src/lib/math/ec_gfp/curve_nistp.cpp b/src/lib/math/ec_gfp/curve_nistp.cpp
new file mode 100644
index 000000000..1aac01f56
--- /dev/null
+++ b/src/lib/math/ec_gfp/curve_nistp.cpp
@@ -0,0 +1,95 @@
+/*
+* NIST curve reduction
+* (C) 2014 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/curve_nistp.h>
+#include <botan/internal/mp_core.h>
+#include <botan/internal/rounding.h>
+#include <botan/hex.h>
+
+namespace Botan {
+
+void CurveGFp_NIST::curve_mul(BigInt& z, const BigInt& x, const BigInt& y,
+ secure_vector<word>& ws) const
+ {
+ if(x.is_zero() || y.is_zero())
+ {
+ z = 0;
+ return;
+ }
+
+ const size_t p_words = get_p_words();
+ const size_t output_size = 2*p_words + 1;
+ ws.resize(2*(p_words+2));
+
+ z.grow_to(output_size);
+ z.clear();
+
+ bigint_mul(z.mutable_data(), output_size, &ws[0],
+ x.data(), x.size(), x.sig_words(),
+ y.data(), y.size(), y.sig_words());
+
+ this->redc(z, ws);
+ }
+
+void CurveGFp_NIST::curve_sqr(BigInt& z, const BigInt& x,
+ secure_vector<word>& ws) const
+ {
+ if(x.is_zero())
+ {
+ z = 0;
+ return;
+ }
+
+ const size_t p_words = get_p_words();
+ const size_t output_size = 2*p_words + 1;
+
+ ws.resize(2*(p_words+2));
+
+ z.grow_to(output_size);
+ z.clear();
+
+ bigint_sqr(z.mutable_data(), output_size, &ws[0],
+ x.data(), x.size(), x.sig_words());
+
+ this->redc(z, ws);
+ }
+
+//static
+const BigInt& CurveGFp_P521::prime()
+ {
+ static const BigInt p521("0x1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
+
+ return p521;
+ }
+
+void CurveGFp_P521::redc(BigInt& x, secure_vector<word>& ws) const
+ {
+ const size_t p_words = get_p_words();
+
+ const size_t shift_words = 521 / MP_WORD_BITS,
+ shift_bits = 521 % MP_WORD_BITS;
+
+ const size_t x_sw = x.sig_words();
+
+ if(x_sw < p_words)
+ return; // already smaller
+
+ if(ws.size() < p_words + 1)
+ ws.resize(p_words + 1);
+
+ clear_mem(&ws[0], ws.size());
+ bigint_shr2(&ws[0], x.data(), x_sw, shift_words, shift_bits);
+
+ x.mask_bits(521);
+
+ bigint_add3(x.mutable_data(), x.data(), p_words, &ws[0], p_words);
+
+ normalize(x, ws, max_redc_subtractions());
+ }
+
+}
diff --git a/src/lib/math/ec_gfp/curve_nistp.h b/src/lib/math/ec_gfp/curve_nistp.h
new file mode 100644
index 000000000..ffa32d377
--- /dev/null
+++ b/src/lib/math/ec_gfp/curve_nistp.h
@@ -0,0 +1,75 @@
+/*
+* NIST elliptic curves over GF(p)
+* (C) 2014 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_GFP_CURVE_NIST_H__
+#define BOTAN_GFP_CURVE_NIST_H__
+
+#include <botan/curve_gfp.h>
+#include <memory>
+
+namespace Botan {
+
+class CurveGFp_NIST : public CurveGFp_Repr
+ {
+ public:
+ CurveGFp_NIST(size_t p_bits, const BigInt& a, const BigInt& b) :
+ m_a(a), m_b(b), m_p_words((p_bits + BOTAN_MP_WORD_BITS - 1) / BOTAN_MP_WORD_BITS)
+ {
+ }
+
+ size_t get_p_words() const override { return m_p_words; }
+
+ const BigInt& get_a() const override { return m_a; }
+
+ const BigInt& get_b() const override { return m_b; }
+
+ const BigInt& get_a_rep() const override { return m_a; }
+
+ const BigInt& get_b_rep() const override { return m_b; }
+
+ void to_curve_rep(BigInt& x, secure_vector<word>& ws) const override
+ { redc(x, ws); }
+
+ void from_curve_rep(BigInt& x, secure_vector<word>& ws) const override
+ { redc(x, ws); }
+
+ void curve_mul(BigInt& z, const BigInt& x, const BigInt& y,
+ secure_vector<word>& ws) const override;
+
+ void curve_sqr(BigInt& z, const BigInt& x,
+ secure_vector<word>& ws) const override;
+ private:
+ virtual void redc(BigInt& x, secure_vector<word>& ws) const = 0;
+
+ virtual size_t max_redc_subtractions() const = 0;
+
+ // Curve parameters
+ BigInt m_a, m_b;
+ size_t m_p_words; // cache of m_p.sig_words()
+ };
+
+/**
+* The NIST P-521 curve
+*/
+class CurveGFp_P521 : public CurveGFp_NIST
+ {
+ public:
+ CurveGFp_P521(const BigInt& a, const BigInt& b) : CurveGFp_NIST(521, a, b) {}
+
+ static const BigInt& prime();
+
+ const BigInt& get_p() const override { return CurveGFp_P521::prime(); }
+
+ private:
+ void redc(BigInt& x, secure_vector<word>& ws) const override;
+
+ size_t max_redc_subtractions() const override { return 1; }
+ };
+
+}
+
+#endif
diff --git a/src/lib/math/ec_gfp/info.txt b/src/lib/math/ec_gfp/info.txt
index 2e67f608e..9af40c752 100644
--- a/src/lib/math/ec_gfp/info.txt
+++ b/src/lib/math/ec_gfp/info.txt
@@ -7,6 +7,10 @@ curve_gfp.h
point_gfp.h
</header:public>
+<header:internal>
+curve_nistp.h
+</header:internal>
+
<requires>
numbertheory
</requires>
diff --git a/src/lib/math/ec_gfp/point_gfp.cpp b/src/lib/math/ec_gfp/point_gfp.cpp
index 3d244d0f0..6bae35e5f 100644
--- a/src/lib/math/ec_gfp/point_gfp.cpp
+++ b/src/lib/math/ec_gfp/point_gfp.cpp
@@ -479,18 +479,20 @@ BigInt decompress_point(bool yMod2,
{
BigInt xpow3 = x * x * x;
+ const BigInt& p = curve.get_p();
+
BigInt g = curve.get_a() * x;
g += xpow3;
g += curve.get_b();
- g = g % curve.get_p();
+ g = g % p;
- BigInt z = ressol(g, curve.get_p());
+ BigInt z = ressol(g, p);
if(z < 0)
throw Illegal_Point("error during EC point decompression");
if(z.get_bit(0) != yMod2)
- z = curve.get_p() - z;
+ z = p - z;
return z;
}
diff --git a/src/lib/math/mp/mp_comba.cpp b/src/lib/math/mp/mp_comba.cpp
index 99dcda176..b56a59291 100644
--- a/src/lib/math/mp/mp_comba.cpp
+++ b/src/lib/math/mp/mp_comba.cpp
@@ -1,6 +1,6 @@
/*
* Comba Multiplication and Squaring
-* (C) 1999-2007,2011 Jack Lloyd
+* (C) 1999-2007,2011,2014 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
@@ -382,6 +382,218 @@ void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
}
/*
+* Comba 9x9 Squaring
+*/
+void bigint_comba_sqr9(word z[18], const word x[9])
+ {
+ word w2 = 0, w1 = 0, w0 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
+ z[ 0] = w0; w0 = 0;
+
+ word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
+ z[ 1] = w1; w1 = 0;
+
+ word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
+ word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
+ z[ 2] = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
+ z[ 3] = w0; w0 = 0;
+
+ word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
+ word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
+ z[ 4] = w1; w1 = 0;
+
+ word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
+ z[ 5] = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
+ z[ 6] = w0; w0 = 0;
+
+ word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]);
+ z[ 7] = w1; w1 = 0;
+
+ word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]);
+ word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]);
+ z[ 8] = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
+ z[ 9] = w0; w0 = 0;
+
+ word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]);
+ word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]);
+ z[10] = w1; w1 = 0;
+
+ word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]);
+ word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]);
+ z[11] = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
+ z[12] = w0; w0 = 0;
+
+ word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]);
+ word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]);
+ z[13] = w1; w1 = 0;
+
+ word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]);
+ word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]);
+ z[14] = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
+ z[15] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]);
+ z[16] = w1;
+ z[17] = w2;
+ }
+
+/*
+* Comba 9x9 Multiplication
+*/
+void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
+ {
+ word w2 = 0, w1 = 0, w0 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]);
+ z[ 0] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]);
+ word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]);
+ z[ 1] = w1; w1 = 0;
+
+ word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]);
+ word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]);
+ word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]);
+ z[ 2] = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]);
+ z[ 3] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]);
+ word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]);
+ word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]);
+ word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]);
+ word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]);
+ z[ 4] = w1; w1 = 0;
+
+ word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]);
+ word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]);
+ word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]);
+ word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]);
+ word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]);
+ word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]);
+ z[ 5] = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]);
+ z[ 6] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]);
+ word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]);
+ word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]);
+ word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]);
+ word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]);
+ word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]);
+ word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]);
+ word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]);
+ z[ 7] = w1; w1 = 0;
+
+ word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]);
+ word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]);
+ word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]);
+ word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]);
+ word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]);
+ word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]);
+ word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]);
+ word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]);
+ word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]);
+ z[ 8] = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]);
+ z[ 9] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]);
+ word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]);
+ word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]);
+ word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]);
+ word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]);
+ word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]);
+ word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]);
+ z[10] = w1; w1 = 0;
+
+ word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]);
+ word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]);
+ word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]);
+ word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]);
+ word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]);
+ word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]);
+ z[11] = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]);
+ z[12] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]);
+ word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]);
+ word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]);
+ word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]);
+ z[13] = w1; w1 = 0;
+
+ word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]);
+ word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]);
+ word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]);
+ z[14] = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]);
+ z[15] = w0; w0 = 0;
+
+ word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]);
+ z[16] = w1;
+ z[17] = w2;
+ }
+
+/*
* Comba 16x16 Squaring
*/
void bigint_comba_sqr16(word z[32], const word x[16])
diff --git a/src/lib/math/mp/mp_core.h b/src/lib/math/mp/mp_core.h
index c25cb994f..64b29b869 100644
--- a/src/lib/math/mp/mp_core.h
+++ b/src/lib/math/mp/mp_core.h
@@ -151,11 +151,13 @@ word bigint_modop(word n1, word n0, word d);
void bigint_comba_mul4(word z[8], const word x[4], const word y[4]);
void bigint_comba_mul6(word z[12], const word x[6], const word y[6]);
void bigint_comba_mul8(word z[16], const word x[8], const word y[8]);
+void bigint_comba_mul9(word z[18], const word x[9], const word y[9]);
void bigint_comba_mul16(word z[32], const word x[16], const word y[16]);
void bigint_comba_sqr4(word out[8], const word in[4]);
void bigint_comba_sqr6(word out[12], const word in[6]);
void bigint_comba_sqr8(word out[16], const word in[8]);
+void bigint_comba_sqr9(word out[18], const word in[9]);
void bigint_comba_sqr16(word out[32], const word in[16]);
}
diff --git a/src/lib/math/mp/mp_karat.cpp b/src/lib/math/mp/mp_karat.cpp
index b549a05c8..62620f83d 100644
--- a/src/lib/math/mp/mp_karat.cpp
+++ b/src/lib/math/mp/mp_karat.cpp
@@ -237,6 +237,11 @@ void bigint_mul(word z[], size_t z_size, word workspace[],
{
bigint_comba_mul8(z, x, y);
}
+ else if(x_sw <= 9 && x_size >= 9 &&
+ y_sw <= 9 && y_size >= 9 && z_size >= 18)
+ {
+ bigint_comba_mul9(z, x, y);
+ }
else if(x_sw <= 16 && x_size >= 16 &&
y_sw <= 16 && y_size >= 16 && z_size >= 32)
{
@@ -281,6 +286,10 @@ void bigint_sqr(word z[], size_t z_size, word workspace[],
{
bigint_comba_sqr8(z, x);
}
+ else if(x_sw == 9 && x_size >= 9 && z_size >= 18)
+ {
+ bigint_comba_sqr9(z, x);
+ }
else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
{
bigint_comba_sqr16(z, x);