Optimize P-256 and P-384 reduction

Precompute the multiples of the prime and then subtract directly.
author: Jack Lloyd <[email protected]> 2018-02-26 17:26:40 -0500
committer: Jack Lloyd <[email protected]> 2018-02-26 17:26:40 -0500
commit: a89255d933d02bb388f9a9fa1093b189f389732d (patch)
tree: c523d68e41698710f2e82e04a10612fe5145cdd5
parent: 72b12e25bfdacc2e9553f64b3d87a48cb46bd682 (diff)
4 files changed, 101 insertions, 36 deletions
diff --git a/news.rst b/news.rst
index 9026bcc5a..9f1ecde3f 100644
--- a/news.rst
+++ b/news.rst
@@ -6,9 +6,9 @@ Version 2.5.0, Not Yet Released
 
 * Add support for RSA-PSS signatures in TLS (GH #1285)
 
-* A faster algorithm for ECC point multiplications is now used, resulting in
-  ECDSA signature generation being 2-3 times fater than previous releases. Other
-  optimizations have improved ECDSA verification time by about 25%. (GH #1457)
+* Several optimizations in ECC operations have improved ECDSA signature
+  generation and ECDH key exchange performance by 2 to 4 times as compared to
+  previous releases. ECDSA verification is 1.25 to 2 times faster. (GH #1457)
 
 * Add a new Credentials_Manager callback that specifies which CAs the server
   has indicated it trusts (GH #1395 fixing #1261)
diff --git a/src/lib/math/bigint/bigint.cpp b/src/lib/math/bigint/bigint.cpp
index e99ddb50a..ec0df8f2d 100644
--- a/src/lib/math/bigint/bigint.cpp
+++ b/src/lib/math/bigint/bigint.cpp
@@ -291,7 +291,12 @@ BigInt BigInt::abs() const
 void BigInt::grow_to(size_t n)
    {
    if(n > size())
-      m_reg.resize(round_up(n, 8));
+      {
+      if(n <= m_reg.capacity())
+         m_reg.resize(m_reg.capacity());
+      else
+         m_reg.resize(round_up(n, 8));
+      }
    }
 
 /*
@@ -325,9 +330,10 @@ void BigInt::binary_decode(const uint8_t buf[], size_t length)
       m_reg[length / WORD_BYTES] = (m_reg[length / WORD_BYTES] << 8) | buf[i];
    }
 
-void BigInt::shrink_to_fit()
+void BigInt::shrink_to_fit(size_t min_size)
    {
-   m_reg.resize(sig_words());
+   const size_t words = std::max(min_size, sig_words());
+   m_reg.resize(words);
    }
 
 void BigInt::const_time_lookup(secure_vector<word>& output,
diff --git a/src/lib/math/bigint/bigint.h b/src/lib/math/bigint/bigint.h
index ca35bd07d..6e2f8dbde 100644
--- a/src/lib/math/bigint/bigint.h
+++ b/src/lib/math/bigint/bigint.h
@@ -480,7 +480,11 @@ class BOTAN_PUBLIC_API(2,0) BigInt final
      */
      void grow_to(size_t n);
 
-     void shrink_to_fit();
+     /**
+     * Resize the vector to the minimum word size to hold the integer, or
+     * min_size words, whichever is larger
+     */
+     void shrink_to_fit(size_t min_size = 0);
 
      /**
      * Fill BigInt with a random number with size of bitsize
diff --git a/src/lib/math/numbertheory/nistp_redc.cpp b/src/lib/math/numbertheory/nistp_redc.cpp
index 29771036d..a2420e975 100644
--- a/src/lib/math/numbertheory/nistp_redc.cpp
+++ b/src/lib/math/numbertheory/nistp_redc.cpp
@@ -1,6 +1,6 @@
 /*
 * NIST prime reductions
-* (C) 2014,2015 Jack Lloyd
+* (C) 2014,2015,2018 Jack Lloyd
 *
 * Botan is released under the Simplified BSD License (see license.txt)
 */
@@ -234,6 +234,10 @@ const BigInt& prime_p256()
 
 void redc_p256(BigInt& x, secure_vector<word>& ws)
    {
+   static const size_t p256_limbs = (BOTAN_MP_WORD_BITS == 32) ? 8 : 4;
+
+   BOTAN_UNUSED(ws);
+
    const uint32_t X8 = get_uint32_t(x, 8);
    const uint32_t X9 = get_uint32_t(x, 9);
    const uint32_t X10 = get_uint32_t(x, 10);
@@ -244,6 +248,7 @@ void redc_p256(BigInt& x, secure_vector<word>& ws)
    const uint32_t X15 = get_uint32_t(x, 15);
 
    x.mask_bits(256);
+   x.shrink_to_fit(p256_limbs + 1);
 
    int64_t S = 0;
 
@@ -342,36 +347,49 @@ void redc_p256(BigInt& x, secure_vector<word>& ws)
    set_uint32_t(x, 7, S);
    S >>= 32;
 
-   S += 5;
-   set_uint32_t(x, 8, S);
-
-   BOTAN_ASSERT_EQUAL(S >> 32, 0, "No underflow");
-
-#if 0
-   BOTAN_ASSERT(S <= 10, "Expected overflow");
-   static const BigInt P256_mults[11] = {
-      prime_p256(),
-      2*prime_p256(),
-      3*prime_p256(),
-      4*prime_p256(),
-      5*prime_p256(),
-      6*prime_p256(),
-      7*prime_p256(),
-      8*prime_p256(),
-      9*prime_p256(),
-      10*prime_p256(),
-      11*prime_p256()
+   S += 5; // final carry of 6*P-256
+
+   BOTAN_ASSERT(S >= 0 && S <= 10, "Expected overflow");
+
+   /*
+   This is a table of (i*P-256) % 2**256 for i in 1...10
+   */
+   static const word p256_mults[11][p256_limbs] = {
+#if (BOTAN_MP_WORD_BITS == 64)
+      {0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001},
+      {0xFFFFFFFFFFFFFFFE, 0x00000001FFFFFFFF, 0x0000000000000000, 0xFFFFFFFE00000002},
+      {0xFFFFFFFFFFFFFFFD, 0x00000002FFFFFFFF, 0x0000000000000000, 0xFFFFFFFD00000003},
+      {0xFFFFFFFFFFFFFFFC, 0x00000003FFFFFFFF, 0x0000000000000000, 0xFFFFFFFC00000004},
+      {0xFFFFFFFFFFFFFFFB, 0x00000004FFFFFFFF, 0x0000000000000000, 0xFFFFFFFB00000005},
+      {0xFFFFFFFFFFFFFFFA, 0x00000005FFFFFFFF, 0x0000000000000000, 0xFFFFFFFA00000006},
+      {0xFFFFFFFFFFFFFFF9, 0x00000006FFFFFFFF, 0x0000000000000000, 0xFFFFFFF900000007},
+      {0xFFFFFFFFFFFFFFF8, 0x00000007FFFFFFFF, 0x0000000000000000, 0xFFFFFFF800000008},
+      {0xFFFFFFFFFFFFFFF7, 0x00000008FFFFFFFF, 0x0000000000000000, 0xFFFFFFF700000009},
+      {0xFFFFFFFFFFFFFFF6, 0x00000009FFFFFFFF, 0x0000000000000000, 0xFFFFFFF60000000A},
+      {0xFFFFFFFFFFFFFFF5, 0x0000000AFFFFFFFF, 0x0000000000000000, 0xFFFFFFF50000000B},
+#else
+      {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
+      {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001, 0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
+      {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002, 0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
+      {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003, 0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
+      {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004, 0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
+      {0xFFFFFFFA, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000005, 0x00000000, 0x00000000, 0x00000006, 0xFFFFFFFA},
+      {0xFFFFFFF9, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000006, 0x00000000, 0x00000000, 0x00000007, 0xFFFFFFF9},
+      {0xFFFFFFF8, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000007, 0x00000000, 0x00000000, 0x00000008, 0xFFFFFFF8},
+      {0xFFFFFFF7, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000008, 0x00000000, 0x00000000, 0x00000009, 0xFFFFFFF7},
+      {0xFFFFFFF6, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000009, 0x00000000, 0x00000000, 0x0000000A, 0xFFFFFFF6},
+      {0xFFFFFFF5, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000A, 0x00000000, 0x00000000, 0x0000000B, 0xFFFFFFF5},
+#endif
    };
 
-   x -= P256_mults[S];
+   word borrow = bigint_sub2(x.mutable_data(), x.size(), p256_mults[S], p256_limbs);
 
-   while(x.is_negative())
+   BOTAN_ASSERT(borrow == 0 || borrow == 1, "Expected borrow during P-256 reduction");
+
+   if(borrow)
       {
-      x += prime_p256();
+      bigint_add2(x.mutable_data(), x.size() - 1, p256_mults[0], p256_limbs);
       }
-#else
-   x.reduce_below(prime_p256(), ws);
-   #endif
    }
 
 const BigInt& prime_p384()
@@ -382,6 +400,10 @@ const BigInt& prime_p384()
 
 void redc_p384(BigInt& x, secure_vector<word>& ws)
    {
+   BOTAN_UNUSED(ws);
+
+   static const size_t p384_limbs = (BOTAN_MP_WORD_BITS == 32) ? 12 : 6;
+
    const uint32_t X12 = get_uint32_t(x, 12);
    const uint32_t X13 = get_uint32_t(x, 13);
    const uint32_t X14 = get_uint32_t(x, 14);
@@ -396,6 +418,7 @@ void redc_p384(BigInt& x, secure_vector<word>& ws)
    const uint32_t X23 = get_uint32_t(x, 23);
 
    x.mask_bits(384);
+   x.shrink_to_fit(p384_limbs + 1);
 
    int64_t S = 0;
 
@@ -523,10 +546,42 @@ void redc_p384(BigInt& x, secure_vector<word>& ws)
    S -= X22;
    set_uint32_t(x, 11, S);
    S >>= 32;
-   BOTAN_ASSERT_EQUAL(S >> 32, 0, "No underflow");
-   set_uint32_t(x, 12, S);
 
-   x.reduce_below(prime_p384(), ws);
+   BOTAN_ASSERT(S >= 0 && S <= 4, "Expected overflow in P-384 reduction");
+
+   /*
+   This is a table of (i*P-384) % 2**384 for i in 1...4
+   */
+   static const word p384_mults[5][p384_limbs] = {
+#if (BOTAN_MP_WORD_BITS == 64)
+      {0x00000000FFFFFFFF, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+      {0x00000001FFFFFFFE, 0xFFFFFFFE00000000, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+      {0x00000002FFFFFFFD, 0xFFFFFFFD00000000, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+      {0x00000003FFFFFFFC, 0xFFFFFFFC00000000, 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+      {0x00000004FFFFFFFB, 0xFFFFFFFB00000000, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+
+#else
+      {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
+       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
+      {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
+       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
+      {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
+       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
+      {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
+       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
+      {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
+       0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
+#endif
+   };
+
+   word borrow = bigint_sub2(x.mutable_data(), x.size(), p384_mults[S], p384_limbs);
+
+   BOTAN_ASSERT(borrow == 0 || borrow == 1, "Expected borrow during P-384 reduction");
+
+   if(borrow)
+      {
+      bigint_add2(x.mutable_data(), x.size() - 1, p384_mults[0], p384_limbs);
+      }
    }
 
 #endif
author	Jack Lloyd <[email protected]>	2018-02-26 17:26:40 -0500
committer	Jack Lloyd <[email protected]>	2018-02-26 17:26:40 -0500
commit	a89255d933d02bb388f9a9fa1093b189f389732d (patch)
tree	c523d68e41698710f2e82e04a10612fe5145cdd5
parent	72b12e25bfdacc2e9553f64b3d87a48cb46bd682 (diff)