Merge GH #1796 More const-time improvements

author: Jack Lloyd <[email protected]> 2018-12-24 14:32:03 -0500
committer: Jack Lloyd <[email protected]> 2018-12-24 14:32:03 -0500
commit: 2f9e7224f4b20d382776786f12a49091504f773a (patch)
tree: 482d4c36a474fbf499d652e639159b5d5044663f
parent: 58a434fb5b94451832b2ccb501567baae20af2e5 (diff)
parent: f29d725c6fc2cf301a8acd7daa03c9ccadccba9e (diff)
7 files changed, 79 insertions, 52 deletions
diff --git a/doc/manual/side_channels.rst b/doc/manual/side_channels.rst
index 32be498e7..8f8067004 100644
--- a/doc/manual/side_channels.rst
+++ b/doc/manual/side_channels.rst
@@ -9,6 +9,34 @@ obviously all open for future improvement.
 The following text assumes the reader is already familiar with cryptographic
 implementations, side channel attacks, and common countermeasures.
 
+Modular Exponentiation
+------------------------
+
+Modular exponentiation uses a fixed window algorithm with Montgomery
+representation. A side channel silent table lookup is used to access the
+precomputed powers. The caller provides the maximum possible bit length of the
+exponent, and the exponent is zero-padded as required. For example, in a DSA
+signature with 256-bit q, the caller will specify a maximum length of exponent
+of 256 bits, even if the k that was generated was 250 bits. This avoids leaking
+the length of the exponent through the number of loop iterations.
+See monty_exp.cpp and monty.cpp
+
+Karatsuba multiplication algorithm avoids any conditional branches; in
+cases where different operations must be performed it instead uses masked
+operations. See mp_karat.cpp for details.
+
+The Montgomery reduction is written to run in constant time.
+The final reduction is handled with a masked subtraction. See mp_monty.cpp.
+
+Barrett Reduction
+--------------------
+
+The Barrett reduction code is written to avoid input dependent branches. The
+Barrett algorithm only works for inputs that are most the square of the modulus;
+larger values fall back on a different (slower) division algorithm. This
+algorithm is also const time, but the branch allows detecting when a value
+larger than the square of the modulus was reduced.
+
 RSA
 ----------------------
 
@@ -105,33 +133,6 @@ coming from the length of the RSA key (which is public information).
 
 See eme_oaep.cpp.
 
-Modular Exponentiation
-------------------------
-
-Modular exponentiation uses a fixed window algorithm with Montgomery
-representation. A side channel silent table lookup is used to access the
-precomputed powers. The caller provides the maximum possible bit length of the
-exponent, and the exponent is zero-padded as required. For example, in a DSA
-signature with 256-bit q, the caller will specify a maximum length of exponent
-of 256 bits, even if the k that was generated was 250 bits. This avoids leaking
-the length of the exponent through the number of loop iterations.
-See monty_exp.cpp and monty.cpp
-
-Karatsuba multiplication algorithm avoids any conditional branches; in
-cases where different operations must be performed it instead uses masked
-operations. See mp_karat.cpp for details.
-
-The Montgomery reduction is written (and tested) to run in constant time.
-The final reduction is handled with a masked subtraction. See mp_monty.cpp.
-
-Barrett Reduction
---------------------
-
-The Barrett reduction code is written to avoid input dependent branches.
-However the Barrett algorithm only works for inputs that are most the
-square of the modulus; larger values fall back to the schoolbook
-division algorithm which is not const time.
-
 ECC point decoding
 ----------------------
 
@@ -170,9 +171,12 @@ The base point multiplication algorithm is a comb-like technique which
 precomputes ``P^i,(2*P)^i,(3*P)^i`` for all ``i`` in the range of valid scalars.
 This means the scalar multiplication involves only point additions and no
 doublings, which may help against attacks which rely on distinguishing between
-point doublings and point additions. The elements of the table are accessed
-by masked lookups, so as not to leak information about bits of the scalar
-via a cache side channel.
+point doublings and point additions. The elements of the table are accessed by
+masked lookups, so as not to leak information about bits of the scalar via a
+cache side channel. However, whenever 3 sequential bits of the scalar are all 0,
+no operation is performed in that iteration of the loop. This exposes the scalar
+multiply to a cache-based side channel attack; scalar blinding is necessary to
+prevent this attack from leaking information about the scalar.
 
 The variable point multiplication algorithm uses a fixed-window algorithm. Since
 this is normally invoked using untrusted points (eg during ECDH key exchange) it
diff --git a/src/lib/math/bigint/bigint.cpp b/src/lib/math/bigint/bigint.cpp
index 75c87e5b6..065c469b4 100644
--- a/src/lib/math/bigint/bigint.cpp
+++ b/src/lib/math/bigint/bigint.cpp
@@ -280,6 +280,7 @@ size_t BigInt::top_bits_free() const
 
    const word top_word = word_at(words - 1);
    const size_t bits_used = high_bit(top_word);
+   CT::unpoison(bits_used);
    return BOTAN_MP_WORD_BITS - bits_used;
    }
 
diff --git a/src/lib/math/numbertheory/nistp_redc.cpp b/src/lib/math/numbertheory/nistp_redc.cpp
index eca78d180..17089fcbe 100644
--- a/src/lib/math/numbertheory/nistp_redc.cpp
+++ b/src/lib/math/numbertheory/nistp_redc.cpp
@@ -176,8 +176,6 @@ void redc_p192(BigInt& x, secure_vector<word>& ws)
 
    // No underflow possible
 
-   BOTAN_ASSERT(S <= 2, "Expected overflow in P-192 reduce");
-
    /*
    This is a table of (i*P-192) % 2**192 for i in 1...3
    */
@@ -193,6 +191,9 @@ void redc_p192(BigInt& x, secure_vector<word>& ws)
 #endif
    };
 
+   CT::unpoison(S);
+   BOTAN_ASSERT(S <= 2, "Expected overflow");
+
    BOTAN_ASSERT_NOMSG(x.size() == p192_limbs + 1);
    word borrow = bigint_sub2(x.mutable_data(), p192_limbs + 1, p192_mults[S], p192_limbs);
    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
@@ -280,8 +281,6 @@ void redc_p224(BigInt& x, secure_vector<word>& ws)
 
    set_words(xw, 6, R0, 0);
 
-   BOTAN_ASSERT(S >= 0 && S <= 2, "Expected overflow in P-224 reduce");
-
    static const word p224_mults[3][p224_limbs] = {
 #if (BOTAN_MP_WORD_BITS == 64)
     {0x0000000000000001, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF},
@@ -295,6 +294,9 @@ void redc_p224(BigInt& x, secure_vector<word>& ws)
 
    };
 
+   CT::unpoison(S);
+   BOTAN_ASSERT(S >= 0 && S <= 2, "Expected overflow");
+
    BOTAN_ASSERT_NOMSG(x.size() == p224_limbs + 1);
    word borrow = bigint_sub2(x.mutable_data(), p224_limbs + 1, p224_mults[S], p224_limbs);
    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
@@ -390,8 +392,6 @@ void redc_p256(BigInt& x, secure_vector<word>& ws)
 
    S += 5; // the top digits of 6*P-256
 
-   BOTAN_DEBUG_ASSERT(S >= 0 && S <= 10);
-
    /*
    This is a table of (i*P-256) % 2**256 for i in 1...10
    */
@@ -424,6 +424,7 @@ void redc_p256(BigInt& x, secure_vector<word>& ws)
    };
 
    CT::unpoison(S);
+   BOTAN_ASSERT(S >= 0 && S <= 10, "Expected overflow");
 
    BOTAN_ASSERT_NOMSG(x.size() == p256_limbs + 1);
    word borrow = bigint_sub2(x.mutable_data(), p256_limbs + 1, p256_mults[S], p256_limbs);
@@ -551,8 +552,6 @@ void redc_p384(BigInt& x, secure_vector<word>& ws)
 
    set_words(xw, 10, R0, R1);
 
-   BOTAN_ASSERT(S >= 0 && S <= 4, "Expected overflow in P-384 reduction");
-
    /*
    This is a table of (i*P-384) % 2**384 for i in 1...4
    */
@@ -578,6 +577,9 @@ void redc_p384(BigInt& x, secure_vector<word>& ws)
 #endif
    };
 
+   CT::unpoison(S);
+   BOTAN_ASSERT(S >= 0 && S <= 4, "Expected overflow");
+
    BOTAN_ASSERT_NOMSG(x.size() == p384_limbs + 1);
    word borrow = bigint_sub2(x.mutable_data(), p384_limbs + 1, p384_mults[S], p384_limbs);
    BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1);
diff --git a/src/lib/math/numbertheory/reducer.cpp b/src/lib/math/numbertheory/reducer.cpp
index ec0071eac..c37a1daeb 100644
--- a/src/lib/math/numbertheory/reducer.cpp
+++ b/src/lib/math/numbertheory/reducer.cpp
@@ -7,6 +7,7 @@
 
 #include <botan/reducer.h>
 #include <botan/internal/ct_utils.h>
+#include <botan/internal/mp_core.h>
 #include <botan/divide.h>
 
 namespace Botan {
@@ -41,6 +42,31 @@ BigInt Modular_Reducer::reduce(const BigInt& x) const
    return r;
    }
 
+namespace {
+
+/*
+* Like if(cnd) x.rev_sub(...) but in const time
+*/
+void cnd_rev_sub(bool cnd, BigInt& x, const word y[], size_t y_sw, secure_vector<word>& ws)
+   {
+   if(x.sign() != BigInt::Positive)
+      throw Invalid_State("BigInt::sub_rev requires this is positive");
+
+   const size_t x_sw = x.sig_words();
+
+   const size_t max_words = std::max(x_sw, y_sw);
+   ws.resize(std::max(x_sw, y_sw));
+   clear_mem(ws.data(), ws.size());
+   x.grow_to(max_words);
+
+   const int32_t relative_size = bigint_sub_abs(ws.data(), x.data(), x_sw, y, y_sw);
+
+   x.cond_flip_sign((relative_size > 0) && cnd);
+   bigint_cnd_swap(cnd, x.mutable_data(), ws.data(), max_words);
+   }
+
+}
+
 void Modular_Reducer::reduce(BigInt& t1, const BigInt& x, secure_vector<word>& ws) const
    {
    if(&t1 == &x)
@@ -50,7 +76,7 @@ void Modular_Reducer::reduce(BigInt& t1, const BigInt& x, secure_vector<word>& w
 
    const size_t x_sw = x.sig_words();
 
-   if(x_sw >= (2*m_mod_words - 1) && x.cmp(m_modulus_2, false) >= 0)
+   if(x.cmp(m_modulus_2, false) >= 0)
       {
       // too big, fall back to slow boat division
       t1 = ct_modulo(x, m_modulus);
@@ -87,10 +113,7 @@ void Modular_Reducer::reduce(BigInt& t1, const BigInt& x, secure_vector<word>& w
    // Per HAC this step requires at most 2 subtractions
    t1.ct_reduce_below(m_modulus, ws, 2);
 
-   if(x.is_negative() && t1.is_nonzero())
-      {
-      t1.rev_sub(m_modulus.data(), m_modulus.size(), ws);
-      }
+   cnd_rev_sub(t1.is_nonzero() && x.is_negative(), t1, m_modulus.data(), m_modulus.size(), ws);
    }
 
 }
diff --git a/src/lib/pubkey/dsa/dsa.cpp b/src/lib/pubkey/dsa/dsa.cpp
index 412270173..bece42d06 100644
--- a/src/lib/pubkey/dsa/dsa.cpp
+++ b/src/lib/pubkey/dsa/dsa.cpp
@@ -10,6 +10,7 @@
 #include <botan/keypair.h>
 #include <botan/reducer.h>
 #include <botan/rng.h>
+#include <botan/divide.h>
 #include <botan/internal/pk_ops_impl.h>
 
 #if defined(BOTAN_HAS_RFC6979_GENERATOR)
@@ -124,7 +125,7 @@ DSA_Signature_Operation::raw_sign(const uint8_t msg[], size_t msg_len,
 
    const BigInt k_inv = m_group.inverse_mod_q(k);
 
-   const BigInt r = m_group.mod_q(m_group.power_g_p(k, m_group.q_bits()));
+   const BigInt r = ct_modulo(m_group.power_g_p(k, m_group.q_bits()), m_group.get_q());
 
    /*
    * Blind the input message and compute x*r+m as (x*r*b + m*b)/b
diff --git a/src/lib/pubkey/rsa/rsa.cpp b/src/lib/pubkey/rsa/rsa.cpp
index 9f14b9d6a..fd50ce539 100644
--- a/src/lib/pubkey/rsa/rsa.cpp
+++ b/src/lib/pubkey/rsa/rsa.cpp
@@ -247,7 +247,7 @@ class RSA_Private_Operation
          auto future_j1 = std::async(std::launch::async, [this, &m, &d1_mask, powm_window]() {
 #endif
          const BigInt masked_d1 = m_key.get_d1() + (d1_mask * (m_key.get_p() - 1));
-         auto powm_d1_p = monty_precompute(m_monty_p, m_mod_p.reduce(m), powm_window);
+         auto powm_d1_p = monty_precompute(m_monty_p, ct_modulo(m, m_key.get_p()), powm_window);
          BigInt j1 = monty_execute(*powm_d1_p, masked_d1, m_max_d1_bits);
 
 #if defined(BOTAN_RSA_USE_ASYNC)
@@ -257,7 +257,7 @@ class RSA_Private_Operation
 
          const BigInt d2_mask(m_blinder.rng(), m_blinding_bits);
          const BigInt masked_d2 = m_key.get_d2() + (d2_mask * (m_key.get_q() - 1));
-         auto powm_d2_q = monty_precompute(m_monty_q, m_mod_q.reduce(m), powm_window);
+         auto powm_d2_q = monty_precompute(m_monty_q, ct_modulo(m, m_key.get_q()), powm_window);
          const BigInt j2 = monty_execute(*powm_d2_q, masked_d2, m_max_d2_bits);
 
          /*
@@ -272,11 +272,7 @@ class RSA_Private_Operation
          BigInt j1 = future_j1.get();
 #endif
 
-         /*
-         To prevent a side channel that allows detecting case where j1 < j2,
-         add p to j1 before reducing [computing c*(p+j1-j2) mod p]
-         */
-         j1 = m_mod_p.reduce(sub_mul(m_key.get_p() + j1, j2, m_key.get_c()));
+         j1 = m_mod_p.multiply(j1 - j2, m_key.get_c());
          return mul_add(j1, m_key.get_q(), j2);
          }
 
diff --git a/src/lib/pubkey/sm2/sm2.cpp b/src/lib/pubkey/sm2/sm2.cpp
index cf08d0f1c..5ffd547cf 100644
--- a/src/lib/pubkey/sm2/sm2.cpp
+++ b/src/lib/pubkey/sm2/sm2.cpp
@@ -153,7 +153,7 @@ SM2_Signature_Operation::sign(RandomNumberGenerator& rng)
 
    const BigInt r = m_group.mod_order(
       m_group.blinded_base_point_multiply_x(k, rng, m_ws) + e);
-   const BigInt s = m_group.multiply_mod_order(m_da_inv, (k - r*m_x));
+   const BigInt s = m_group.multiply_mod_order(m_da_inv, m_group.mod_order(k - r*m_x));
 
    return BigInt::encode_fixed_length_int_pair(r, s, m_group.get_order().bytes());
    }
author	Jack Lloyd <[email protected]>	2018-12-24 14:32:03 -0500
committer	Jack Lloyd <[email protected]>	2018-12-24 14:32:03 -0500
commit	2f9e7224f4b20d382776786f12a49091504f773a (patch)
tree	482d4c36a474fbf499d652e639159b5d5044663f
parent	58a434fb5b94451832b2ccb501567baae20af2e5 (diff)
parent	f29d725c6fc2cf301a8acd7daa03c9ccadccba9e (diff)