aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2010-09-28 14:50:16 +0000
committerlloyd <[email protected]>2010-09-28 14:50:16 +0000
commit526f7067844eac0f1dccf2d0c4489b9c68e34632 (patch)
tree676bd818e3a98eb9157992da9a6b9d4787eb526a /src
parent7c0a8f58cfbbefcb37d0337ba4f8c592998bf38e (diff)
Cleanup Karatsuba a bit
Diffstat (limited to 'src')
-rw-r--r--src/math/mp/mp_karat.cpp210
1 files changed, 107 insertions, 103 deletions
diff --git a/src/math/mp/mp_karat.cpp b/src/math/mp/mp_karat.cpp
index 8ae346f1e..1cb278367 100644
--- a/src/math/mp/mp_karat.cpp
+++ b/src/math/mp/mp_karat.cpp
@@ -1,13 +1,13 @@
/*
* Karatsuba Multiplication/Squaring
-* (C) 1999-2008 Jack Lloyd
+* (C) 1999-2010 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
#include <botan/internal/mp_asmi.h>
+#include <botan/mem_ops.h>
namespace Botan {
@@ -19,78 +19,79 @@ namespace {
void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
word workspace[])
{
- if(N == 6)
- bigint_comba_mul6(z, x, y);
- else if(N == 8)
- bigint_comba_mul8(z, x, y);
- else if(N == 16)
- bigint_comba_mul16(z, x, y);
- else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
- bigint_simple_mul(z, x, N, y, N);
- else
+ if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
{
- const u32bit N2 = N / 2;
+ if(N == 6)
+ return bigint_comba_mul6(z, x, y);
+ else if(N == 8)
+ return bigint_comba_mul8(z, x, y);
+ else if(N == 16)
+ return bigint_comba_mul16(z, x, y);
+ else
+ return bigint_simple_mul(z, x, N, y, N);
+ }
- const word* x0 = x;
- const word* x1 = x + N2;
- const word* y0 = y;
- const word* y1 = y + N2;
- word* z0 = z;
- word* z1 = z + N;
+ const u32bit N2 = N / 2;
- const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
- const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ const word* y0 = y;
+ const word* y1 = y + N2;
+ word* z0 = z;
+ word* z1 = z + N;
- clear_mem(workspace, 2*N);
+ const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
+ const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
- if(cmp0 && cmp1)
- {
- if(cmp0 > 0)
- bigint_sub3(z0, x0, N2, x1, N2);
- else
- bigint_sub3(z0, x1, N2, x0, N2);
+ clear_mem(workspace, 2*N);
- if(cmp1 > 0)
- bigint_sub3(z1, y1, N2, y0, N2);
- else
- bigint_sub3(z1, y0, N2, y1, N2);
+ if(cmp0 && cmp1)
+ {
+ if(cmp0 > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
- karatsuba_mul(workspace, z0, z1, N2, workspace+N);
- }
+ if(cmp1 > 0)
+ bigint_sub3(z1, y1, N2, y0, N2);
+ else
+ bigint_sub3(z1, y0, N2, y1, N2);
- karatsuba_mul(z0, x0, y0, N2, workspace+N);
- karatsuba_mul(z1, x1, y1, N2, workspace+N);
+ karatsuba_mul(workspace, z0, z1, N2, workspace+N);
+ }
- const u32bit blocks_of_8 = N - (N % 8);
+ karatsuba_mul(z0, x0, y0, N2, workspace+N);
+ karatsuba_mul(z1, x1, y1, N2, workspace+N);
- word carry = 0;
+ const u32bit blocks_of_8 = N - (N % 8);
- for(u32bit j = 0; j != blocks_of_8; j += 8)
- carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+ word ws_carry = 0;
- for(u32bit j = blocks_of_8; j != N; ++j)
- workspace[N + j] = word_add(z0[j], z1[j], &carry);
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
- word carry2 = 0;
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ workspace[N + j] = word_add(z0[j], z1[j], &ws_carry);
- for(u32bit j = 0; j != blocks_of_8; j += 8)
- carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+ word z_carry = 0;
- for(u32bit j = blocks_of_8; j != N; ++j)
- z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry);
- z[N + N2] = word_add(z[N + N2], carry2, &carry);
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry);
- if(carry)
- for(u32bit j = 1; j != N2; ++j)
- if(++z[N + N2 + j])
- break;
+ z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry);
- if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
- bigint_add2(z + N2, 2*N-N2, workspace, N);
- else
- bigint_sub2(z + N2, 2*N-N2, workspace, N);
- }
+ if(z_carry)
+ for(u32bit j = 1; j != N2; ++j)
+ if(++z[N + N2 + j])
+ break;
+
+ if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
+ bigint_add2(z + N2, 2*N-N2, workspace, N);
+ else
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
}
/*
@@ -98,70 +99,73 @@ void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
*/
void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[])
{
- if(N == 6)
- bigint_comba_sqr6(z, x);
- else if(N == 8)
- bigint_comba_sqr8(z, x);
- else if(N == 16)
- bigint_comba_sqr16(z, x);
- else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
- bigint_simple_sqr(z, x, N);
- else
+ if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
{
- const u32bit N2 = N / 2;
+ if(N == 6)
+ return bigint_comba_sqr6(z, x);
+ else if(N == 8)
+ return bigint_comba_sqr8(z, x);
+ else if(N == 16)
+ return bigint_comba_sqr16(z, x);
+ else
+ return bigint_simple_sqr(z, x, N);
+ }
- const word* x0 = x;
- const word* x1 = x + N2;
- word* z0 = z;
- word* z1 = z + N;
+ const u32bit N2 = N / 2;
- const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ word* z0 = z;
+ word* z1 = z + N;
- clear_mem(workspace, 2*N);
+ const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
- if(cmp)
- {
- if(cmp > 0)
- bigint_sub3(z0, x0, N2, x1, N2);
- else
- bigint_sub3(z0, x1, N2, x0, N2);
+ clear_mem(workspace, 2*N);
- karatsuba_sqr(workspace, z0, N2, workspace+N);
- }
+ if(cmp)
+ {
+ if(cmp > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
- karatsuba_sqr(z0, x0, N2, workspace+N);
- karatsuba_sqr(z1, x1, N2, workspace+N);
+ karatsuba_sqr(workspace, z0, N2, workspace+N);
+ }
- const u32bit blocks_of_8 = N - (N % 8);
+ karatsuba_sqr(z0, x0, N2, workspace+N);
+ karatsuba_sqr(z1, x1, N2, workspace+N);
- word carry = 0;
+ const u32bit blocks_of_8 = N - (N % 8);
- for(u32bit j = 0; j != blocks_of_8; j += 8)
- carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+ word ws_carry = 0;
- for(u32bit j = blocks_of_8; j != N; ++j)
- workspace[N + j] = word_add(z0[j], z1[j], &carry);
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry);
- word carry2 = 0;
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ workspace[N + j] = word_add(z0[j], z1[j], &ws_carry);
- for(u32bit j = 0; j != blocks_of_8; j += 8)
- carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+ word z_carry = 0;
- for(u32bit j = blocks_of_8; j != N; ++j)
- z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry);
- z[N + N2] = word_add(z[N + N2], carry2, &carry);
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry);
- if(carry)
- for(u32bit j = 1; j != N2; ++j)
- if(++z[N + N2 + j])
- break;
+ z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry);
- if(cmp == 0)
- bigint_add2(z + N2, 2*N-N2, workspace, N);
- else
- bigint_sub2(z + N2, 2*N-N2, workspace, N);
- }
+ if(z_carry)
+ for(u32bit j = 1; j != N2; ++j)
+ if(++z[N + N2 + j])
+ break;
+
+ /*
+ * This is only actually required if cmp is != 0, however
+ * if cmp==0 then workspace[0:N] == 0 and avoiding the jump
+ * hides a timing channel.
+ */
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
}
/*