aboutsummaryrefslogtreecommitdiffstats
path: root/src/bigint/mp_karat.cpp
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-09-28 22:40:27 +0000
committerlloyd <[email protected]>2008-09-28 22:40:27 +0000
commitc32a8e6c7ecf97fc9423e6a967ce3d98b0689404 (patch)
treed9d41c74dd0f99f43119ae355f461fae1f3bf32c /src/bigint/mp_karat.cpp
parent31204986023619c385d378e79a6511bb81ef7b78 (diff)
Move all BigInt stuff into bigint/. Currently all asm modules are disabled;
configure.pl doesn't understand how to handle this yet (replace logic only understands stuff in src, not how one module can replace another modules src, or anything about prioritizing). Move some hex and base64 stuff out of charset.cpp and into their codec directories.
Diffstat (limited to 'src/bigint/mp_karat.cpp')
-rw-r--r--src/bigint/mp_karat.cpp334
1 files changed, 334 insertions, 0 deletions
diff --git a/src/bigint/mp_karat.cpp b/src/bigint/mp_karat.cpp
new file mode 100644
index 000000000..15b0551fd
--- /dev/null
+++ b/src/bigint/mp_karat.cpp
@@ -0,0 +1,334 @@
+/*************************************************
+* Karatsuba Multiplication/Squaring Source File *
+* (C) 1999-2008 Jack Lloyd *
+*************************************************/
+
+#include <botan/mp_core.h>
+#include <botan/mem_ops.h>
+#include <botan/mp_asmi.h>
+
+namespace Botan {
+
+namespace {
+
+/*************************************************
+* Karatsuba Multiplication Operation *
+*************************************************/
+void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
+ word workspace[])
+ {
+ if(N == 6)
+ bigint_comba_mul6(z, x, y);
+ else if(N == 8)
+ bigint_comba_mul8(z, x, y);
+ else if(N == 16)
+ bigint_comba_mul16(z, x, y);
+ else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
+ bigint_simple_mul(z, x, N, y, N);
+ else
+ {
+ const u32bit N2 = N / 2;
+
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ const word* y0 = y;
+ const word* y1 = y + N2;
+ word* z0 = z;
+ word* z1 = z + N;
+
+ const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
+ const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
+
+ clear_mem(workspace, 2*N);
+
+ if(cmp0 && cmp1)
+ {
+ if(cmp0 > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
+
+ if(cmp1 > 0)
+ bigint_sub3(z1, y1, N2, y0, N2);
+ else
+ bigint_sub3(z1, y0, N2, y1, N2);
+
+ karatsuba_mul(workspace, z0, z1, N2, workspace+N);
+ }
+
+ karatsuba_mul(z0, x0, y0, N2, workspace+N);
+ karatsuba_mul(z1, x1, y1, N2, workspace+N);
+
+ const u32bit blocks_of_8 = N - (N % 8);
+
+ word carry = 0;
+
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ workspace[N + j] = word_add(z0[j], z1[j], &carry);
+
+ word carry2 = 0;
+
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+
+ z[N + N2] = word_add(z[N + N2], carry2, &carry);
+
+ if(carry)
+ for(u32bit j = 1; j != N2; ++j)
+ if(++z[N + N2 + j])
+ break;
+
+ if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
+ bigint_add2(z + N2, 2*N-N2, workspace, N);
+ else
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
+ }
+ }
+
+/*************************************************
+* Karatsuba Squaring Operation *
+*************************************************/
+void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[])
+ {
+ if(N == 6)
+ bigint_comba_sqr6(z, x);
+ else if(N == 8)
+ bigint_comba_sqr8(z, x);
+ else if(N == 16)
+ bigint_comba_sqr16(z, x);
+ else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
+ bigint_simple_sqr(z, x, N);
+ else
+ {
+ const u32bit N2 = N / 2;
+
+ const word* x0 = x;
+ const word* x1 = x + N2;
+ word* z0 = z;
+ word* z1 = z + N;
+
+ const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
+
+ clear_mem(workspace, 2*N);
+
+ if(cmp)
+ {
+ if(cmp > 0)
+ bigint_sub3(z0, x0, N2, x1, N2);
+ else
+ bigint_sub3(z0, x1, N2, x0, N2);
+
+ karatsuba_sqr(workspace, z0, N2, workspace+N);
+ }
+
+ karatsuba_sqr(z0, x0, N2, workspace+N);
+ karatsuba_sqr(z1, x1, N2, workspace+N);
+
+ const u32bit blocks_of_8 = N - (N % 8);
+
+ word carry = 0;
+
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ workspace[N + j] = word_add(z0[j], z1[j], &carry);
+
+ word carry2 = 0;
+
+ for(u32bit j = 0; j != blocks_of_8; j += 8)
+ carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+
+ for(u32bit j = blocks_of_8; j != N; ++j)
+ z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+
+ z[N + N2] = word_add(z[N + N2], carry2, &carry);
+
+ if(carry)
+ for(u32bit j = 1; j != N2; ++j)
+ if(++z[N + N2 + j])
+ break;
+
+ if(cmp == 0)
+ bigint_add2(z + N2, 2*N-N2, workspace, N);
+ else
+ bigint_sub2(z + N2, 2*N-N2, workspace, N);
+ }
+ }
+
+/*************************************************
+* Pick a good size for the Karatsuba multiply *
+*************************************************/
+u32bit karatsuba_size(u32bit z_size,
+ u32bit x_size, u32bit x_sw,
+ u32bit y_size, u32bit y_sw)
+ {
+ if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
+ return 0;
+
+ if(((x_size == x_sw) && (x_size % 2)) ||
+ ((y_size == y_sw) && (y_size % 2)))
+ return 0;
+
+ const u32bit start = (x_sw > y_sw) ? x_sw : y_sw;
+ const u32bit end = (x_size < y_size) ? x_size : y_size;
+
+ if(start == end)
+ {
+ if(start % 2)
+ return 0;
+ return start;
+ }
+
+ for(u32bit j = start; j <= end; ++j)
+ {
+ if(j % 2)
+ continue;
+
+ if(2*j > z_size)
+ return 0;
+
+ if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
+ {
+ if(j % 4 == 2 &&
+ (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
+ return j+2;
+ return j;
+ }
+ }
+
+ return 0;
+ }
+
+/*************************************************
+* Pick a good size for the Karatsuba squaring *
+*************************************************/
+u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw)
+ {
+ if(x_sw == x_size)
+ {
+ if(x_sw % 2)
+ return 0;
+ return x_sw;
+ }
+
+ for(u32bit j = x_sw; j <= x_size; ++j)
+ {
+ if(j % 2)
+ continue;
+
+ if(2*j > z_size)
+ return 0;
+
+ if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
+ return j+2;
+ return j;
+ }
+
+ return 0;
+ }
+
+}
+
+/*************************************************
+* Multiplication Algorithm Dispatcher *
+*************************************************/
+void bigint_mul(word z[], u32bit z_size, word workspace[],
+ const word x[], u32bit x_size, u32bit x_sw,
+ const word y[], u32bit y_size, u32bit y_sw)
+ {
+ if(x_sw == 1)
+ {
+ bigint_linmul3(z, y, y_sw, x[0]);
+ }
+ else if(y_sw == 1)
+ {
+ bigint_linmul3(z, x, x_sw, y[0]);
+ }
+ else if(x_sw <= 4 && x_size >= 4 &&
+ y_sw <= 4 && y_size >= 4 && z_size >= 8)
+ {
+ bigint_comba_mul4(z, x, y);
+ }
+ else if(x_sw <= 6 && x_size >= 6 &&
+ y_sw <= 6 && y_size >= 6 && z_size >= 12)
+ {
+ bigint_comba_mul6(z, x, y);
+ }
+ else if(x_sw <= 8 && x_size >= 8 &&
+ y_sw <= 8 && y_size >= 8 && z_size >= 16)
+ {
+ bigint_comba_mul8(z, x, y);
+ }
+ else if(x_sw <= 16 && x_size >= 16 &&
+ y_sw <= 16 && y_size >= 16 && z_size >= 32)
+ {
+ bigint_comba_mul16(z, x, y);
+ }
+ else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || y_sw < BOTAN_KARAT_MUL_THRESHOLD)
+ bigint_simple_mul(z, x, x_sw, y, y_sw);
+ else
+ {
+ const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
+
+ if(N)
+ {
+ clear_mem(workspace, 2*N);
+ karatsuba_mul(z, x, y, N, workspace);
+ }
+ else
+ bigint_simple_mul(z, x, x_sw, y, y_sw);
+ }
+ }
+
+/*************************************************
+* Squaring Algorithm Dispatcher *
+*************************************************/
+void bigint_sqr(word z[], u32bit z_size, word workspace[],
+ const word x[], u32bit x_size, u32bit x_sw)
+ {
+ if(x_sw == 1)
+ {
+ bigint_linmul3(z, x, x_sw, x[0]);
+ }
+ else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
+ {
+ bigint_comba_sqr4(z, x);
+ }
+ else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
+ {
+ bigint_comba_sqr6(z, x);
+ }
+ else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
+ {
+ bigint_comba_sqr8(z, x);
+ }
+ else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
+ {
+ bigint_comba_sqr16(z, x);
+ }
+ else if(x_size < BOTAN_KARAT_SQR_THRESHOLD)
+ {
+ bigint_simple_sqr(z, x, x_sw);
+ }
+ else
+ {
+ const u32bit N = karatsuba_size(z_size, x_size, x_sw);
+
+ if(N)
+ {
+ clear_mem(workspace, 2*N);
+ karatsuba_sqr(z, x, N, workspace);
+ }
+ else
+ bigint_simple_sqr(z, x, x_sw);
+ }
+ }
+
+}