aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/pubkey/curve25519
diff options
context:
space:
mode:
authorJack Lloyd <[email protected]>2018-01-06 17:30:01 -0500
committerJack Lloyd <[email protected]>2018-01-06 17:30:01 -0500
commitd6dae3f5f48605f102e9482a44ede89bb77568d1 (patch)
treea617eddbe477da8cc478beb70ae8ddaa0c8bd457 /src/lib/pubkey/curve25519
parent6af53b3f668bd82bbf454703c0e15590c447254c (diff)
Reformat donna.cpp
Was originally kept in the same format as upstream, but upstream is not maintained anymore so no reason to stick with it.
Diffstat (limited to 'src/lib/pubkey/curve25519')
-rw-r--r--src/lib/pubkey/curve25519/donna.cpp821
1 files changed, 412 insertions, 409 deletions
diff --git a/src/lib/pubkey/curve25519/donna.cpp b/src/lib/pubkey/curve25519/donna.cpp
index 6807d56f6..4305ea143 100644
--- a/src/lib/pubkey/curve25519/donna.cpp
+++ b/src/lib/pubkey/curve25519/donna.cpp
@@ -1,462 +1,465 @@
/*
-* curve25519-donna-c64.c from github.com/agl/curve25519-donna
+* Based on curve25519-donna-c64.c from github.com/agl/curve25519-donna
* revision 80ad9b9930c9baef5829dd2a235b6b7646d32a8e
+*
+* Further changes
+* (C) 2014,2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
*/
/* Copyright 2008, Google Inc.
- * All rights reserved.
- *
- * Code released into the public domain.
- *
- * curve25519-donna: Curve25519 elliptic curve, public key function
- *
- * https://code.google.com/p/curve25519-donna/
- *
- * Adam Langley <[email protected]>
- *
- * Derived from public domain C code by Daniel J. Bernstein <[email protected]>
- *
- * More information about curve25519 can be found here
- * https://cr.yp.to/ecdh.html
- *
- * djb's sample implementation of curve25519 is written in a special assembly
- * language called qhasm and uses the floating point registers.
- *
- * This is, almost, a clean room reimplementation from the curve25519 paper. It
- * uses many of the tricks described therein. Only the crecip function is taken
- * from the sample implementation.
- */
+* All rights reserved.
+*
+* Code released into the public domain.
+*
+* curve25519-donna: Curve25519 elliptic curve, public key function
+*
+* https://code.google.com/p/curve25519-donna/
+*
+* Adam Langley <[email protected]>
+*
+* Derived from public domain C code by Daniel J. Bernstein <[email protected]>
+*
+* More information about curve25519 can be found here
+* https://cr.yp.to/ecdh.html
+*
+* djb's sample implementation of curve25519 is written in a special assembly
+* language called qhasm and uses the floating point registers.
+*
+* This is, almost, a clean room reimplementation from the curve25519 paper. It
+* uses many of the tricks described therein. Only the crecip function is taken
+* from the sample implementation.
+*/
#include <botan/curve25519.h>
#include <botan/mul128.h>
-#include <botan/internal/donna128.h>
#include <botan/internal/ct_utils.h>
+#include <botan/internal/donna128.h>
#include <botan/loadstor.h>
namespace Botan {
-typedef uint8_t u8;
-typedef uint64_t limb;
-typedef limb felem[5];
-
-typedef struct
- {
- limb* x;
- limb* z;
- } fmonty_pair_t;
-
-typedef struct
- {
- fmonty_pair_t q;
- fmonty_pair_t q_dash;
- const limb* q_minus_q_dash;
- } fmonty_in_t;
-
-typedef struct
- {
- fmonty_pair_t two_q;
- fmonty_pair_t q_plus_q_dash;
- } fmonty_out_t;
-
+namespace {
#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
typedef donna128 uint128_t;
#endif
-/* Sum two numbers: output += in */
-static inline void
-fsum(limb *output, const limb *in) {
- output[0] += in[0];
- output[1] += in[1];
- output[2] += in[2];
- output[3] += in[3];
- output[4] += in[4];
-}
-
-/* Find the difference of two numbers: output = in - output
- * (note the order of the arguments!)
- *
- * Assumes that out[i] < 2**52
- * On return, out[i] < 2**55
- */
-static inline void
-fdifference_backwards(felem out, const felem in) {
- /* 152 is 19 << 3 */
- static const limb two54m152 = (static_cast<limb>(1) << 54) - 152;
- static const limb two54m8 = (static_cast<limb>(1) << 54) - 8;
-
- out[0] = in[0] + two54m152 - out[0];
- out[1] = in[1] + two54m8 - out[1];
- out[2] = in[2] + two54m8 - out[2];
- out[3] = in[3] + two54m8 - out[3];
- out[4] = in[4] + two54m8 - out[4];
-}
-
-/* Multiply a number by a scalar: output = in * scalar */
-static inline void
-fscalar_product(felem output, const felem in, const limb scalar) {
- uint128_t a = uint128_t(in[0]) * scalar;
- output[0] = a & 0x7ffffffffffff;
+struct fmonty_pair_t
+ {
+ uint64_t* x;
+ uint64_t* z;
+ };
- a = uint128_t(in[1]) * scalar + carry_shift(a, 51);
- output[1] = a & 0x7ffffffffffff;
+struct fmonty_in_t
+ {
+ fmonty_pair_t q;
+ fmonty_pair_t q_dash;
+ const uint64_t* q_minus_q_dash;
+ };
- a = uint128_t(in[2]) * scalar + carry_shift(a, 51);
- output[2] = a & 0x7ffffffffffff;
+struct fmonty_out_t
+ {
+ fmonty_pair_t two_q;
+ fmonty_pair_t q_plus_q_dash;
+ };
- a = uint128_t(in[3]) * scalar + carry_shift(a, 51);
- output[3] = a & 0x7ffffffffffff;
+/* Sum two numbers: output += in */
+inline void fsum(uint64_t out[5], const uint64_t in[5])
+ {
+ out[0] += in[0];
+ out[1] += in[1];
+ out[2] += in[2];
+ out[3] += in[3];
+ out[4] += in[4];
+ }
+
+/* Find the difference of two numbers: out = in - out
+* (note the order of the arguments!)
+*
+* Assumes that out[i] < 2**52
+* On return, out[i] < 2**55
+*/
+inline void fdifference_backwards(uint64_t out[5], const uint64_t in[5])
+ {
+ /* 152 is 19 << 3 */
+ const uint64_t two54m152 = (static_cast<uint64_t>(1) << 54) - 152;
+ const uint64_t two54m8 = (static_cast<uint64_t>(1) << 54) - 8;
+
+ out[0] = in[0] + two54m152 - out[0];
+ out[1] = in[1] + two54m8 - out[1];
+ out[2] = in[2] + two54m8 - out[2];
+ out[3] = in[3] + two54m8 - out[3];
+ out[4] = in[4] + two54m8 - out[4];
+ }
+
+/* Multiply a number by a scalar: out = in * scalar */
+inline void fscalar_product(uint64_t out[5], const uint64_t in[5], const uint64_t scalar)
+ {
+ uint128_t a = uint128_t(in[0]) * scalar;
+ out[0] = a & 0x7ffffffffffff;
- a = uint128_t(in[4]) * scalar + carry_shift(a, 51);
- output[4] = a & 0x7ffffffffffff;
+ a = uint128_t(in[1]) * scalar + carry_shift(a, 51);
+ out[1] = a & 0x7ffffffffffff;
- output[0] += carry_shift(a, 51) * 19;
-}
+ a = uint128_t(in[2]) * scalar + carry_shift(a, 51);
+ out[2] = a & 0x7ffffffffffff;
-/* Multiply two numbers: output = in2 * in
- *
- * output must be distinct to both inputs. The inputs are reduced coefficient
- * form, the output is not.
- *
- * Assumes that in[i] < 2**55 and likewise for in2.
- * On return, output[i] < 2**52
- */
-static inline void
-fmul(felem output, const felem in2, const felem in) {
- uint128_t t[5];
- limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
-
- r0 = in[0];
- r1 = in[1];
- r2 = in[2];
- r3 = in[3];
- r4 = in[4];
-
- s0 = in2[0];
- s1 = in2[1];
- s2 = in2[2];
- s3 = in2[3];
- s4 = in2[4];
-
- t[0] = uint128_t(r0) * s0;
- t[1] = uint128_t(r0) * s1 + uint128_t(r1) * s0;
- t[2] = uint128_t(r0) * s2 + uint128_t(r2) * s0 + uint128_t(r1) * s1;
- t[3] = uint128_t(r0) * s3 + uint128_t(r3) * s0 + uint128_t(r1) * s2 + uint128_t(r2) * s1;
- t[4] = uint128_t(r0) * s4 + uint128_t(r4) * s0 + uint128_t(r3) * s1 + uint128_t(r1) * s3 + uint128_t(r2) * s2;
-
- r4 *= 19;
- r1 *= 19;
- r2 *= 19;
- r3 *= 19;
-
- t[0] += uint128_t(r4) * s1 + uint128_t(r1) * s4 + uint128_t(r2) * s3 + uint128_t(r3) * s2;
- t[1] += uint128_t(r4) * s2 + uint128_t(r2) * s4 + uint128_t(r3) * s3;
- t[2] += uint128_t(r4) * s3 + uint128_t(r3) * s4;
- t[3] += uint128_t(r4) * s4;
-
- r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51);
- t[1] += c; r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51);
- t[2] += c; r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51);
- t[3] += c; r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51);
- t[4] += c; r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51);
- r0 += c * 19; c = carry_shift(r0, 51); r0 = r0 & 0x7ffffffffffff;
- r1 += c; c = carry_shift(r1, 51); r1 = r1 & 0x7ffffffffffff;
- r2 += c;
-
- output[0] = r0;
- output[1] = r1;
- output[2] = r2;
- output[3] = r3;
- output[4] = r4;
-}
+ a = uint128_t(in[3]) * scalar + carry_shift(a, 51);
+ out[3] = a & 0x7ffffffffffff;
-static inline void fsquare_times(felem output, const felem in, limb count) {
- uint128_t t[5];
- limb r0,r1,r2,r3,r4,c;
- limb d0,d1,d2,d4,d419;
-
- r0 = in[0];
- r1 = in[1];
- r2 = in[2];
- r3 = in[3];
- r4 = in[4];
-
- do {
- d0 = r0 * 2;
- d1 = r1 * 2;
- d2 = r2 * 2 * 19;
- d419 = r4 * 19;
- d4 = d419 * 2;
-
- t[0] = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3 );
- t[1] = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19);
- t[2] = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3 );
- t[3] = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419 );
- t[4] = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2 );
-
- r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51);
- t[1] += c; r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51);
- t[2] += c; r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51);
- t[3] += c; r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51);
- t[4] += c; r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51);
- r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
- r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
- r2 += c;
- } while(--count);
-
- output[0] = r0;
- output[1] = r1;
- output[2] = r2;
- output[3] = r3;
- output[4] = r4;
-}
+ a = uint128_t(in[4]) * scalar + carry_shift(a, 51);
+ out[4] = a & 0x7ffffffffffff;
-/* Load a little-endian 64-bit number */
-static limb
-load_limb(const u8 *in) {
- return load_le<uint64_t>(in, 0);
-}
+ out[0] += carry_shift(a, 51) * 19;
+ }
-static void
-store_limb(u8 *out, limb in) {
- store_le(in, out);
-}
+/* Multiply two numbers: out = in2 * in
+*
+* out must be distinct to both inputs. The inputs are reduced coefficient
+* form, the out is not.
+*
+* Assumes that in[i] < 2**55 and likewise for in2.
+* On return, out[i] < 2**52
+*/
+inline void fmul(uint64_t out[5], const uint64_t in2[5], const uint64_t in[5])
+ {
+ uint128_t t[5];
+ uint64_t r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
+
+ r0 = in[0];
+ r1 = in[1];
+ r2 = in[2];
+ r3 = in[3];
+ r4 = in[4];
+
+ s0 = in2[0];
+ s1 = in2[1];
+ s2 = in2[2];
+ s3 = in2[3];
+ s4 = in2[4];
+
+ t[0] = uint128_t(r0) * s0;
+ t[1] = uint128_t(r0) * s1 + uint128_t(r1) * s0;
+ t[2] = uint128_t(r0) * s2 + uint128_t(r2) * s0 + uint128_t(r1) * s1;
+ t[3] = uint128_t(r0) * s3 + uint128_t(r3) * s0 + uint128_t(r1) * s2 + uint128_t(r2) * s1;
+ t[4] = uint128_t(r0) * s4 + uint128_t(r4) * s0 + uint128_t(r3) * s1 + uint128_t(r1) * s3 + uint128_t(r2) * s2;
+
+ r4 *= 19;
+ r1 *= 19;
+ r2 *= 19;
+ r3 *= 19;
+
+ t[0] += uint128_t(r4) * s1 + uint128_t(r1) * s4 + uint128_t(r2) * s3 + uint128_t(r3) * s2;
+ t[1] += uint128_t(r4) * s2 + uint128_t(r2) * s4 + uint128_t(r3) * s3;
+ t[2] += uint128_t(r4) * s3 + uint128_t(r3) * s4;
+ t[3] += uint128_t(r4) * s4;
+
+ r0 = t[0] & 0x7ffffffffffff; c = carry_shift(t[0], 51);
+ t[1] += c; r1 = t[1] & 0x7ffffffffffff; c = carry_shift(t[1], 51);
+ t[2] += c; r2 = t[2] & 0x7ffffffffffff; c = carry_shift(t[2], 51);
+ t[3] += c; r3 = t[3] & 0x7ffffffffffff; c = carry_shift(t[3], 51);
+ t[4] += c; r4 = t[4] & 0x7ffffffffffff; c = carry_shift(t[4], 51);
+ r0 += c * 19; c = carry_shift(r0, 51); r0 = r0 & 0x7ffffffffffff;
+ r1 += c; c = carry_shift(r1, 51); r1 = r1 & 0x7ffffffffffff;
+ r2 += c;
+
+ out[0] = r0;
+ out[1] = r1;
+ out[2] = r2;
+ out[3] = r3;
+ out[4] = r4;
+ }
+
+inline void fsquare_times(uint64_t out[5], const uint64_t in[5], size_t count)
+ {
+ uint64_t r0 = in[0];
+ uint64_t r1 = in[1];
+ uint64_t r2 = in[2];
+ uint64_t r3 = in[3];
+ uint64_t r4 = in[4];
+
+ for(size_t i = 0; i != count; ++i)
+ {
+ const uint64_t d0 = r0 * 2;
+ const uint64_t d1 = r1 * 2;
+ const uint64_t d2 = r2 * 2 * 19;
+ const uint64_t d419 = r4 * 19;
+ const uint64_t d4 = d419 * 2;
+
+ uint128_t t0 = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3 );
+ uint128_t t1 = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19);
+ uint128_t t2 = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3 );
+ uint128_t t3 = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419 );
+ uint128_t t4 = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2 );
+
+ r0 = t0 & 0x7ffffffffffff; t1 += carry_shift(t0, 51);
+ r1 = t1 & 0x7ffffffffffff; t2 += carry_shift(t1, 51);
+ r2 = t2 & 0x7ffffffffffff; t3 += carry_shift(t2, 51);
+ r3 = t3 & 0x7ffffffffffff; t4 += carry_shift(t3, 51);
+ r4 = t4 & 0x7ffffffffffff;
+
+ uint64_t c = carry_shift(t4, 51);
+
+ r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
+ r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
+ r2 += c;
+ }
+
+ out[0] = r0;
+ out[1] = r1;
+ out[2] = r2;
+ out[3] = r3;
+ out[4] = r4;
+ }
/* Take a little-endian, 32-byte number and expand it into polynomial form */
-static void
-fexpand(limb *output, const u8 *in) {
- output[0] = load_limb(in) & 0x7ffffffffffff;
- output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
- output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
- output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
- output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
-}
+inline void fexpand(uint64_t *out, const uint8_t *in)
+ {
+ out[0] = load_le<uint64_t>(in, 0) & 0x7ffffffffffff;
+ out[1] = (load_le<uint64_t>(in+6, 0) >> 3) & 0x7ffffffffffff;
+ out[2] = (load_le<uint64_t>(in+12, 0) >> 6) & 0x7ffffffffffff;
+ out[3] = (load_le<uint64_t>(in+19, 0) >> 1) & 0x7ffffffffffff;
+ out[4] = (load_le<uint64_t>(in+24, 0) >> 12) & 0x7ffffffffffff;
+ }
/* Take a fully reduced polynomial form number and contract it into a
- * little-endian, 32-byte array
- */
-static void
-fcontract(u8 *output, const felem input) {
- uint128_t t[5];
-
- t[0] = input[0];
- t[1] = input[1];
- t[2] = input[2];
- t[3] = input[3];
- t[4] = input[4];
-
- t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
- t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
- t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
- t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
- t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
-
- t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
- t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
- t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
- t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
- t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
-
- /* now t is between 0 and 2^255-1, properly carried. */
- /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
-
- t[0] += 19;
-
- t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
- t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
- t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
- t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
- t[0] += (t[4] >> 51) * 19; t[4] &= 0x7ffffffffffff;
-
- /* now between 19 and 2^255-1 in both cases, and offset by 19. */
-
- t[0] += 0x8000000000000 - 19;
- t[1] += 0x8000000000000 - 1;
- t[2] += 0x8000000000000 - 1;
- t[3] += 0x8000000000000 - 1;
- t[4] += 0x8000000000000 - 1;
-
- /* now between 2^255 and 2^256-20, and offset by 2^255. */
-
- t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
- t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
- t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
- t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
- t[4] &= 0x7ffffffffffff;
-
- store_limb(output, combine_lower(t[0], 0, t[1], 51));
- store_limb(output+8, combine_lower(t[1], 13, t[2], 38));
- store_limb(output+16, combine_lower(t[2], 26, t[3], 25));
- store_limb(output+24, combine_lower(t[3], 39, t[4], 12));
-}
+* little-endian, 32-byte array
+*/
+inline void fcontract(uint8_t *out, const uint64_t input[5])
+ {
+ uint128_t t0 = input[0];
+ uint128_t t1 = input[1];
+ uint128_t t2 = input[2];
+ uint128_t t3 = input[3];
+ uint128_t t4 = input[4];
+
+ t1 += t0 >> 51; t0 &= 0x7ffffffffffff;
+ t2 += t1 >> 51; t1 &= 0x7ffffffffffff;
+ t3 += t2 >> 51; t2 &= 0x7ffffffffffff;
+ t4 += t3 >> 51; t3 &= 0x7ffffffffffff;
+ t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+ t1 += t0 >> 51; t0 &= 0x7ffffffffffff;
+ t2 += t1 >> 51; t1 &= 0x7ffffffffffff;
+ t3 += t2 >> 51; t2 &= 0x7ffffffffffff;
+ t4 += t3 >> 51; t3 &= 0x7ffffffffffff;
+ t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+ /* now t is between 0 and 2^255-1, properly carried. */
+ /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+ t0 += 19;
+
+ t1 += t0 >> 51; t0 &= 0x7ffffffffffff;
+ t2 += t1 >> 51; t1 &= 0x7ffffffffffff;
+ t3 += t2 >> 51; t2 &= 0x7ffffffffffff;
+ t4 += t3 >> 51; t3 &= 0x7ffffffffffff;
+ t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+ /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+ t0 += 0x8000000000000 - 19;
+ t1 += 0x8000000000000 - 1;
+ t2 += 0x8000000000000 - 1;
+ t3 += 0x8000000000000 - 1;
+ t4 += 0x8000000000000 - 1;
+
+ /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+ t1 += t0 >> 51; t0 &= 0x7ffffffffffff;
+ t2 += t1 >> 51; t1 &= 0x7ffffffffffff;
+ t3 += t2 >> 51; t2 &= 0x7ffffffffffff;
+ t4 += t3 >> 51; t3 &= 0x7ffffffffffff;
+ t4 &= 0x7ffffffffffff;
+
+ store_le(out,
+ combine_lower(t0, 0, t1, 51),
+ combine_lower(t1, 13, t2, 38),
+ combine_lower(t2, 26, t3, 25),
+ combine_lower(t3, 39, t4, 12));
+ }
/* Input: Q, Q', Q-Q'
- * Output: 2Q, Q+Q'
- *
- * result.two_q (2*Q): long form
- * result.q_plus_q_dash (Q + Q): long form
- * in.q: short form, destroyed
- * in.q_dash: short form, destroyed
- * in.q_minus_q_dash: short form, preserved
- */
-static void
+* Out: 2Q, Q+Q'
+*
+* result.two_q (2*Q): long form
+* result.q_plus_q_dash (Q + Q): long form
+* in.q: short form, destroyed
+* in.q_dash: short form, destroyed
+* in.q_minus_q_dash: short form, preserved
+*/
+void
fmonty(fmonty_out_t& result, fmonty_in_t& in)
-{
- limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
- zzprime[5], zzzprime[5];
-
- copy_mem(origx, in.q.x, 5);
- fsum(in.q.x, in.q.z);
- fdifference_backwards(in.q.z, origx); // does x - z
-
- copy_mem(origxprime, in.q_dash.x, 5);
- fsum(in.q_dash.x, in.q_dash.z);
- fdifference_backwards(in.q_dash.z, origxprime);
- fmul(xxprime, in.q_dash.x, in.q.z);
- fmul(zzprime, in.q.x, in.q_dash.z);
- copy_mem(origxprime, xxprime, 5);
- fsum(xxprime, zzprime);
- fdifference_backwards(zzprime, origxprime);
- fsquare_times(result.q_plus_q_dash.x, xxprime, 1);
- fsquare_times(zzzprime, zzprime, 1);
- fmul(result.q_plus_q_dash.z, zzzprime, in.q_minus_q_dash);
-
- fsquare_times(xx, in.q.x, 1);
- fsquare_times(zz, in.q.z, 1);
- fmul(result.two_q.x, xx, zz);
- fdifference_backwards(zz, xx); // does zz = xx - zz
- fscalar_product(zzz, zz, 121665);
- fsum(zzz, xx);
- fmul(result.two_q.z, zz, zzz);
-}
+ {
+ uint64_t origx[5];
+ uint64_t origxprime[5];
+ uint64_t zzz[5];
+ uint64_t xx[5];
+ uint64_t zz[5];
+ uint64_t xxprime[5];
+ uint64_t zzprime[5];
+ uint64_t zzzprime[5];
+
+ copy_mem(origx, in.q.x, 5);
+ fsum(in.q.x, in.q.z);
+ fdifference_backwards(in.q.z, origx); // does x - z
+
+ copy_mem(origxprime, in.q_dash.x, 5);
+ fsum(in.q_dash.x, in.q_dash.z);
+ fdifference_backwards(in.q_dash.z, origxprime);
+ fmul(xxprime, in.q_dash.x, in.q.z);
+ fmul(zzprime, in.q.x, in.q_dash.z);
+ copy_mem(origxprime, xxprime, 5);
+ fsum(xxprime, zzprime);
+ fdifference_backwards(zzprime, origxprime);
+ fsquare_times(result.q_plus_q_dash.x, xxprime, 1);
+ fsquare_times(zzzprime, zzprime, 1);
+ fmul(result.q_plus_q_dash.z, zzzprime, in.q_minus_q_dash);
+
+ fsquare_times(xx, in.q.x, 1);
+ fsquare_times(zz, in.q.z, 1);
+ fmul(result.two_q.x, xx, zz);
+ fdifference_backwards(zz, xx); // does zz = xx - zz
+ fscalar_product(zzz, zz, 121665);
+ fsum(zzz, xx);
+ fmul(result.two_q.z, zz, zzz);
+ }
// -----------------------------------------------------------------------------
-// Maybe swap the contents of two limb arrays (@a and @b), each @len elements
+// Maybe swap the contents of two uint64_t arrays (@a and @b), each @len elements
// long. Perform the swap iff @swap is non-zero.
//
// This function performs the swap without leaking any side-channel
// information.
// -----------------------------------------------------------------------------
-static void
-swap_conditional(limb a[5], limb b[5], limb iswap) {
- unsigned i;
- const limb swap = static_cast<limb>(-iswap);
-
- for (i = 0; i < 5; ++i) {
- const limb x = swap & (a[i] ^ b[i]);
- a[i] ^= x;
- b[i] ^= x;
- }
-}
+void swap_conditional(uint64_t a[5], uint64_t b[5], uint64_t iswap)
+ {
+ const uint64_t swap = static_cast<uint64_t>(-iswap);
+
+ for(size_t i = 0; i < 5; ++i)
+ {
+ const uint64_t x = swap & (a[i] ^ b[i]);
+ a[i] ^= x;
+ b[i] ^= x;
+ }
+ }
/* Calculates nQ where Q is the x-coordinate of a point on the curve
- *
- * resultx/resultz: the x coordinate of the resulting curve point (short form)
- * n: a little endian, 32-byte number
- * q: a point of the curve (short form)
- */
-static void
-cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
- limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
- limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
- limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
- limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
-
- unsigned i, j;
-
- copy_mem(nqpqx, q, 5);
-
- for (i = 0; i < 32; ++i) {
- u8 byteval = n[31 - i];
- for (j = 0; j < 8; ++j) {
- const limb bit = byteval >> 7;
-
- swap_conditional(nqx, nqpqx, bit);
- swap_conditional(nqz, nqpqz, bit);
-
- fmonty_out_t result { {nqx2, nqz2}, {nqpqx2, nqpqz2} };
- fmonty_in_t in { { nqx, nqz }, { nqpqx, nqpqz }, q };
- fmonty(result, in);
- swap_conditional(nqx2, nqpqx2, bit);
- swap_conditional(nqz2, nqpqz2, bit);
-
- t = nqx;
- nqx = nqx2;
- nqx2 = t;
- t = nqz;
- nqz = nqz2;
- nqz2 = t;
- t = nqpqx;
- nqpqx = nqpqx2;
- nqpqx2 = t;
- t = nqpqz;
- nqpqz = nqpqz2;
- nqpqz2 = t;
-
- byteval <<= 1;
- }
- }
-
- copy_mem(resultx, nqx, 5);
- copy_mem(resultz, nqz, 5);
-}
+*
+* resultx/resultz: the x coordinate of the resulting curve point (short form)
+* n: a little endian, 32-byte number
+* q: a point of the curve (short form)
+*/
+void cmult(uint64_t *resultx, uint64_t *resultz, const uint8_t *n, const uint64_t *q)
+ {
+ uint64_t a[5] = {0};
+ uint64_t b[5] = {1};
+ uint64_t c[5] = {1};
+ uint64_t d[5] = {0};
+ uint64_t e[5] = {0};
+ uint64_t f[5] = {1};
+ uint64_t g[5] = {0};
+ uint64_t h[5] = {1};
+
+ uint64_t *nqpqx = a;
+ uint64_t *nqpqz = b;
+ uint64_t *nqx = c;
+ uint64_t *nqz = d;
+ uint64_t *nqpqx2 = e;
+ uint64_t *nqpqz2 = f;
+ uint64_t *nqx2 = g;
+ uint64_t *nqz2 = h;
+
+ copy_mem(nqpqx, q, 5);
+
+ for(size_t i = 0; i < 32; ++i)
+ {
+ for(size_t j = 0; j < 8; ++j)
+ {
+ const uint64_t bit = (n[31 - i] >> (7-j)) & 1;
+
+ swap_conditional(nqx, nqpqx, bit);
+ swap_conditional(nqz, nqpqz, bit);
+
+ fmonty_out_t result { {nqx2, nqz2}, {nqpqx2, nqpqz2} };
+ fmonty_in_t in { { nqx, nqz }, { nqpqx, nqpqz }, q };
+ fmonty(result, in);
+ swap_conditional(nqx2, nqpqx2, bit);
+ swap_conditional(nqz2, nqpqz2, bit);
+
+ std::swap(nqx, nqx2);
+ std::swap(nqz, nqz2);
+ std::swap(nqpqx, nqpqx2);
+ std::swap(nqpqz, nqpqz2);
+ }
+ }
+
+ copy_mem(resultx, nqx, 5);
+ copy_mem(resultz, nqz, 5);
+ }
// -----------------------------------------------------------------------------
// Shamelessly copied from djb's code, tightened a little
// -----------------------------------------------------------------------------
-static void
-crecip(felem out, const felem z) {
- felem a,t0,b,c;
-
- /* 2 */ fsquare_times(a, z, 1); // a = 2
- /* 8 */ fsquare_times(t0, a, 2);
- /* 9 */ fmul(b, t0, z); // b = 9
- /* 11 */ fmul(a, b, a); // a = 11
- /* 22 */ fsquare_times(t0, a, 1);
- /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
- /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
- /* 2^10 - 2^0 */ fmul(b, t0, b);
- /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
- /* 2^20 - 2^0 */ fmul(c, t0, b);
- /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
- /* 2^40 - 2^0 */ fmul(t0, t0, c);
- /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
- /* 2^50 - 2^0 */ fmul(b, t0, b);
- /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
- /* 2^100 - 2^0 */ fmul(c, t0, b);
- /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
- /* 2^200 - 2^0 */ fmul(t0, t0, c);
- /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
- /* 2^250 - 2^0 */ fmul(t0, t0, b);
- /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
- /* 2^255 - 21 */ fmul(out, t0, a);
+void crecip(uint64_t out[5], const uint64_t z[5])
+ {
+ uint64_t a[5];
+ uint64_t b[5];
+ uint64_t c[5];
+ uint64_t t0[5];
+
+ /* 2 */ fsquare_times(a, z, 1); // a = 2
+ /* 8 */ fsquare_times(t0, a, 2);
+ /* 9 */ fmul(b, t0, z); // b = 9
+ /* 11 */ fmul(a, b, a); // a = 11
+ /* 22 */ fsquare_times(t0, a, 1);
+ /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
+ /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
+ /* 2^10 - 2^0 */ fmul(b, t0, b);
+ /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
+ /* 2^20 - 2^0 */ fmul(c, t0, b);
+ /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
+ /* 2^40 - 2^0 */ fmul(t0, t0, c);
+ /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
+ /* 2^50 - 2^0 */ fmul(b, t0, b);
+ /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
+ /* 2^100 - 2^0 */ fmul(c, t0, b);
+ /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
+ /* 2^200 - 2^0 */ fmul(t0, t0, c);
+ /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
+ /* 2^250 - 2^0 */ fmul(t0, t0, b);
+ /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
+ /* 2^255 - 21 */ fmul(out, t0, a);
+ }
+
}
void
-curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
-
- CT::poison(secret, 32);
- CT::poison(basepoint, 32);
-
- limb bp[5], x[5], z[5], zmone[5];
- uint8_t e[32];
- int i;
-
- for (i = 0;i < 32;++i) e[i] = secret[i];
- e[0] &= 248;
- e[31] &= 127;
- e[31] |= 64;
-
- fexpand(bp, basepoint);
- cmult(x, z, e, bp);
- crecip(zmone, z);
- fmul(z, x, zmone);
- fcontract(mypublic, z);
-
- CT::unpoison(secret, 32);
- CT::unpoison(basepoint, 32);
- CT::unpoison(mypublic, 32);
-}
+curve25519_donna(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint)
+ {
+ CT::poison(secret, 32);
+ CT::poison(basepoint, 32);
+
+ uint64_t bp[5], x[5], z[5], zmone[5];
+ uint8_t e[32];
+
+ copy_mem(e, secret, 32);
+ e[ 0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+
+ fexpand(bp, basepoint);
+ cmult(x, z, e, bp);
+ crecip(zmone, z);
+ fmul(z, x, zmone);
+ fcontract(mypublic, z);
+
+ CT::unpoison(secret, 32);
+ CT::unpoison(basepoint, 32);
+ CT::unpoison(mypublic, 32);
+ }
}