aboutsummaryrefslogtreecommitdiffstats
path: root/src/lib/math/mp/mp_karat.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/math/mp/mp_karat.cpp')
-rw-r--r--src/lib/math/mp/mp_karat.cpp303
1 files changed, 303 insertions, 0 deletions
diff --git a/src/lib/math/mp/mp_karat.cpp b/src/lib/math/mp/mp_karat.cpp
new file mode 100644
index 000000000..b549a05c8
--- /dev/null
+++ b/src/lib/math/mp/mp_karat.cpp
@@ -0,0 +1,303 @@
+/*
+* Karatsuba Multiplication/Squaring
+* (C) 1999-2010 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/internal/mp_asmi.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+namespace {
+
+static const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
+static const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
+
+/*
+* Karatsuba Multiplication Operation
+*/
+void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
+ word workspace[])
+ {
+ if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
+ {
+ if(N == 6)
+ return bigint_comba_mul6(z, x, y);
+ else if(N == 8)
+ return bigint_comba_mul8(z, x, y);
+ else if(N == 16)
+ return bigint_comba_mul16(z, x, y);
+ else
+ return bigint_simple_mul(z, x, N, y, N);
+ }
+
+ const size_t N2 = N / 2;
+
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ const word* y0 = y;
+ const word* y1 = y + N2;
+ word* z0 = z;
+ word* z1 = z + N;
+
+ const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
+ const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
+
+ clear_mem(workspace, 2*N);
+
+ //if(cmp0 && cmp1)
+ {
+ if(cmp0 > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
+
+ if(cmp1 > 0)
+ bigint_sub3(z1, y1, N2, y0, N2);
+ else
+ bigint_sub3(z1, y0, N2, y1, N2);
+
+ karatsuba_mul(workspace, z0, z1, N2, workspace+N);
+ }
+
+ karatsuba_mul(z0, x0, y0, N2, workspace+N);
+ karatsuba_mul(z1, x1, y1, N2, workspace+N);
+
+ const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
+ word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
+
+ z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
+ bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
+
+ if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
+ bigint_add2(z + N2, 2*N-N2, workspace, N);
+ else
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
+ }
+
+/*
+* Karatsuba Squaring Operation
+*/
+void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
+ {
+ if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
+ {
+ if(N == 6)
+ return bigint_comba_sqr6(z, x);
+ else if(N == 8)
+ return bigint_comba_sqr8(z, x);
+ else if(N == 16)
+ return bigint_comba_sqr16(z, x);
+ else
+ return bigint_simple_sqr(z, x, N);
+ }
+
+ const size_t N2 = N / 2;
+
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ word* z0 = z;
+ word* z1 = z + N;
+
+ const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
+
+ clear_mem(workspace, 2*N);
+
+ //if(cmp)
+ {
+ if(cmp > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
+
+ karatsuba_sqr(workspace, z0, N2, workspace+N);
+ }
+
+ karatsuba_sqr(z0, x0, N2, workspace+N);
+ karatsuba_sqr(z1, x1, N2, workspace+N);
+
+ const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
+ word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
+
+ z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
+ bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
+
+ /*
+ * This is only actually required if cmp is != 0, however
+ * if cmp==0 then workspace[0:N] == 0 and avoiding the jump
+ * hides a timing channel.
+ */
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
+ }
+
+/*
+* Pick a good size for the Karatsuba multiply
+*/
+size_t karatsuba_size(size_t z_size,
+ size_t x_size, size_t x_sw,
+ size_t y_size, size_t y_sw)
+ {
+ if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
+ return 0;
+
+ if(((x_size == x_sw) && (x_size % 2)) ||
+ ((y_size == y_sw) && (y_size % 2)))
+ return 0;
+
+ const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
+ const size_t end = (x_size < y_size) ? x_size : y_size;
+
+ if(start == end)
+ {
+ if(start % 2)
+ return 0;
+ return start;
+ }
+
+ for(size_t j = start; j <= end; ++j)
+ {
+ if(j % 2)
+ continue;
+
+ if(2*j > z_size)
+ return 0;
+
+ if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
+ {
+ if(j % 4 == 2 &&
+ (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
+ return j+2;
+ return j;
+ }
+ }
+
+ return 0;
+ }
+
+/*
+* Pick a good size for the Karatsuba squaring
+*/
+size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
+ {
+ if(x_sw == x_size)
+ {
+ if(x_sw % 2)
+ return 0;
+ return x_sw;
+ }
+
+ for(size_t j = x_sw; j <= x_size; ++j)
+ {
+ if(j % 2)
+ continue;
+
+ if(2*j > z_size)
+ return 0;
+
+ if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
+ return j+2;
+ return j;
+ }
+
+ return 0;
+ }
+
+}
+
+/*
+* Multiplication Algorithm Dispatcher
+*/
+void bigint_mul(word z[], size_t z_size, word workspace[],
+ const word x[], size_t x_size, size_t x_sw,
+ const word y[], size_t y_size, size_t y_sw)
+ {
+ if(x_sw == 1)
+ {
+ bigint_linmul3(z, y, y_sw, x[0]);
+ }
+ else if(y_sw == 1)
+ {
+ bigint_linmul3(z, x, x_sw, y[0]);
+ }
+ else if(x_sw <= 4 && x_size >= 4 &&
+ y_sw <= 4 && y_size >= 4 && z_size >= 8)
+ {
+ bigint_comba_mul4(z, x, y);
+ }
+ else if(x_sw <= 6 && x_size >= 6 &&
+ y_sw <= 6 && y_size >= 6 && z_size >= 12)
+ {
+ bigint_comba_mul6(z, x, y);
+ }
+ else if(x_sw <= 8 && x_size >= 8 &&
+ y_sw <= 8 && y_size >= 8 && z_size >= 16)
+ {
+ bigint_comba_mul8(z, x, y);
+ }
+ else if(x_sw <= 16 && x_size >= 16 &&
+ y_sw <= 16 && y_size >= 16 && z_size >= 32)
+ {
+ bigint_comba_mul16(z, x, y);
+ }
+ else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
+ y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
+ !workspace)
+ {
+ bigint_simple_mul(z, x, x_sw, y, y_sw);
+ }
+ else
+ {
+ const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
+
+ if(N)
+ karatsuba_mul(z, x, y, N, workspace);
+ else
+ bigint_simple_mul(z, x, x_sw, y, y_sw);
+ }
+ }
+
+/*
+* Squaring Algorithm Dispatcher
+*/
+void bigint_sqr(word z[], size_t z_size, word workspace[],
+ const word x[], size_t x_size, size_t x_sw)
+ {
+ if(x_sw == 1)
+ {
+ bigint_linmul3(z, x, x_sw, x[0]);
+ }
+ else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
+ {
+ bigint_comba_sqr4(z, x);
+ }
+ else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
+ {
+ bigint_comba_sqr6(z, x);
+ }
+ else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
+ {
+ bigint_comba_sqr8(z, x);
+ }
+ else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
+ {
+ bigint_comba_sqr16(z, x);
+ }
+ else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
+ {
+ bigint_simple_sqr(z, x, x_sw);
+ }
+ else
+ {
+ const size_t N = karatsuba_size(z_size, x_size, x_sw);
+
+ if(N)
+ karatsuba_sqr(z, x, N, workspace);
+ else
+ bigint_simple_sqr(z, x, x_sw);
+ }
+ }
+
+}