Fix Barrett reduction input bound

In the long ago when I wrote the Barrett code I must have missed that Barrett works for any input < 2^2k where k is the word size of the modulus. Fixing this has several nice effects, it is faster because it replaces a multiprecision comparison with a single size_t compare, and now the branch does not reveal information about the input or modulus, but only their word lengths, which is not considered sensitive. Fixing this allows reverting the change make in a57ce5a4fd2 and now RSA signing is even slightly faster than in 2.8, rather than 30% slower.
author: Jack Lloyd <[email protected]> 2018-12-26 09:15:54 -0500
committer: Jack Lloyd <[email protected]> 2018-12-26 09:15:54 -0500
commit: e5477c449830e099afc7c495ba738570ab7aabf8 (patch)
tree: d53e08983d8dffac39cde7d8681bc563ca2a5359 /src/lib/pubkey/rsa
parent: 79ed5ea9aeafad3990076df8273fe9193078f4c1 (diff)
1 files changed, 18 insertions, 8 deletions
diff --git a/src/lib/pubkey/rsa/rsa.cpp b/src/lib/pubkey/rsa/rsa.cpp
index fd50ce539..441127984 100644
--- a/src/lib/pubkey/rsa/rsa.cpp
+++ b/src/lib/pubkey/rsa/rsa.cpp
@@ -234,6 +234,12 @@ class RSA_Private_Operation
 
       BigInt private_op(const BigInt& m) const
          {
+         /*
+         TODO
+           Consider using Montgomery reduction instead of Barrett, using
+           the "Smooth RSA-CRT" method. https://eprint.iacr.org/2007/039.pdf
+         */
+
          const size_t powm_window = 4;
 
          const BigInt d1_mask(m_blinder.rng(), m_blinding_bits);
@@ -242,12 +248,11 @@ class RSA_Private_Operation
    #define BOTAN_RSA_USE_ASYNC
 #endif
 
-
 #if defined(BOTAN_RSA_USE_ASYNC)
          auto future_j1 = std::async(std::launch::async, [this, &m, &d1_mask, powm_window]() {
 #endif
          const BigInt masked_d1 = m_key.get_d1() + (d1_mask * (m_key.get_p() - 1));
-         auto powm_d1_p = monty_precompute(m_monty_p, ct_modulo(m, m_key.get_p()), powm_window);
+         auto powm_d1_p = monty_precompute(m_monty_p, m_mod_p.reduce(m), powm_window);
          BigInt j1 = monty_execute(*powm_d1_p, masked_d1, m_max_d1_bits);
 
 #if defined(BOTAN_RSA_USE_ASYNC)
@@ -257,22 +262,27 @@ class RSA_Private_Operation
 
          const BigInt d2_mask(m_blinder.rng(), m_blinding_bits);
          const BigInt masked_d2 = m_key.get_d2() + (d2_mask * (m_key.get_q() - 1));
-         auto powm_d2_q = monty_precompute(m_monty_q, ct_modulo(m, m_key.get_q()), powm_window);
+         auto powm_d2_q = monty_precompute(m_monty_q, m_mod_q.reduce(m), powm_window);
          const BigInt j2 = monty_execute(*powm_d2_q, masked_d2, m_max_d2_bits);
 
+#if defined(BOTAN_RSA_USE_ASYNC)
+         BigInt j1 = future_j1.get();
+#endif
+
          /*
          * To recover the final value from the CRT representation (j1,j2)
          * we use Garner's algorithm:
          * c = q^-1 mod p (this is precomputed)
          * h = c*(j1-j2) mod p
          * m = j2 + h*q
+         *
+         * We must avoid leaking if j1 >= j2 or not, as doing so allows deriving
+         * information about the secret prime. Do this by first adding p to j1,
+         * which should ensure the subtraction of j2 does not underflow. But
+         * this may still underflow if p and q are imbalanced in size.
          */
 
-#if defined(BOTAN_RSA_USE_ASYNC)
-         BigInt j1 = future_j1.get();
-#endif
-
-         j1 = m_mod_p.multiply(j1 - j2, m_key.get_c());
+         j1 = m_mod_p.multiply(m_mod_p.reduce((m_key.get_p() + j1) - j2), m_key.get_c());
          return mul_add(j1, m_key.get_q(), j2);
          }
author	Jack Lloyd <[email protected]>	2018-12-26 09:15:54 -0500
committer	Jack Lloyd <[email protected]>	2018-12-26 09:15:54 -0500
commit	e5477c449830e099afc7c495ba738570ab7aabf8 (patch)
tree	d53e08983d8dffac39cde7d8681bc563ca2a5359 /src/lib/pubkey/rsa
parent	79ed5ea9aeafad3990076df8273fe9193078f4c1 (diff)