diff options
author | lloyd <[email protected]> | 2010-09-28 14:50:16 +0000 |
---|---|---|
committer | lloyd <[email protected]> | 2010-09-28 14:50:16 +0000 |
commit | 526f7067844eac0f1dccf2d0c4489b9c68e34632 (patch) | |
tree | 676bd818e3a98eb9157992da9a6b9d4787eb526a /src | |
parent | 7c0a8f58cfbbefcb37d0337ba4f8c592998bf38e (diff) |
Cleanup Karatsuba a bit
Diffstat (limited to 'src')
-rw-r--r-- | src/math/mp/mp_karat.cpp | 210 |
1 files changed, 107 insertions, 103 deletions
diff --git a/src/math/mp/mp_karat.cpp b/src/math/mp/mp_karat.cpp index 8ae346f1e..1cb278367 100644 --- a/src/math/mp/mp_karat.cpp +++ b/src/math/mp/mp_karat.cpp @@ -1,13 +1,13 @@ /* * Karatsuba Multiplication/Squaring -* (C) 1999-2008 Jack Lloyd +* (C) 1999-2010 Jack Lloyd * * Distributed under the terms of the Botan license */ #include <botan/internal/mp_core.h> -#include <botan/mem_ops.h> #include <botan/internal/mp_asmi.h> +#include <botan/mem_ops.h> namespace Botan { @@ -19,78 +19,79 @@ namespace { void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, word workspace[]) { - if(N == 6) - bigint_comba_mul6(z, x, y); - else if(N == 8) - bigint_comba_mul8(z, x, y); - else if(N == 16) - bigint_comba_mul16(z, x, y); - else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) - bigint_simple_mul(z, x, N, y, N); - else + if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) { - const u32bit N2 = N / 2; + if(N == 6) + return bigint_comba_mul6(z, x, y); + else if(N == 8) + return bigint_comba_mul8(z, x, y); + else if(N == 16) + return bigint_comba_mul16(z, x, y); + else + return bigint_simple_mul(z, x, N, y, N); + } - const word* x0 = x; - const word* x1 = x + N2; - const word* y0 = y; - const word* y1 = y + N2; - word* z0 = z; - word* z1 = z + N; + const u32bit N2 = N / 2; - const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2); - const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2); + const word* x0 = x; + const word* x1 = x + N2; + const word* y0 = y; + const word* y1 = y + N2; + word* z0 = z; + word* z1 = z + N; - clear_mem(workspace, 2*N); + const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2); + const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2); - if(cmp0 && cmp1) - { - if(cmp0 > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); + clear_mem(workspace, 2*N); - if(cmp1 > 0) - bigint_sub3(z1, y1, N2, y0, N2); - else - bigint_sub3(z1, y0, N2, y1, N2); + if(cmp0 && cmp1) + { + if(cmp0 > 0) + bigint_sub3(z0, x0, N2, x1, N2); + else + bigint_sub3(z0, x1, N2, x0, N2); - karatsuba_mul(workspace, z0, z1, N2, workspace+N); - } + if(cmp1 > 0) + bigint_sub3(z1, y1, N2, y0, N2); + else + bigint_sub3(z1, y0, N2, y1, N2); - karatsuba_mul(z0, x0, y0, N2, workspace+N); - karatsuba_mul(z1, x1, y1, N2, workspace+N); + karatsuba_mul(workspace, z0, z1, N2, workspace+N); + } - const u32bit blocks_of_8 = N - (N % 8); + karatsuba_mul(z0, x0, y0, N2, workspace+N); + karatsuba_mul(z1, x1, y1, N2, workspace+N); - word carry = 0; + const u32bit blocks_of_8 = N - (N % 8); - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); + word ws_carry = 0; - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); + for(u32bit j = 0; j != blocks_of_8; j += 8) + ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry); - word carry2 = 0; + for(u32bit j = blocks_of_8; j != N; ++j) + workspace[N + j] = word_add(z0[j], z1[j], &ws_carry); - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); + word z_carry = 0; - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); + for(u32bit j = 0; j != blocks_of_8; j += 8) + z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry); - z[N + N2] = word_add(z[N + N2], carry2, &carry); + for(u32bit j = blocks_of_8; j != N; ++j) + z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry); - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; + z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry); - if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0)) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } + if(z_carry) + for(u32bit j = 1; j != N2; ++j) + if(++z[N + N2 + j]) + break; + + if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0)) + bigint_add2(z + N2, 2*N-N2, workspace, N); + else + bigint_sub2(z + N2, 2*N-N2, workspace, N); } /* @@ -98,70 +99,73 @@ void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, */ void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[]) { - if(N == 6) - bigint_comba_sqr6(z, x); - else if(N == 8) - bigint_comba_sqr8(z, x); - else if(N == 16) - bigint_comba_sqr16(z, x); - else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) - bigint_simple_sqr(z, x, N); - else + if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) { - const u32bit N2 = N / 2; + if(N == 6) + return bigint_comba_sqr6(z, x); + else if(N == 8) + return bigint_comba_sqr8(z, x); + else if(N == 16) + return bigint_comba_sqr16(z, x); + else + return bigint_simple_sqr(z, x, N); + } - const word* x0 = x; - const word* x1 = x + N2; - word* z0 = z; - word* z1 = z + N; + const u32bit N2 = N / 2; - const s32bit cmp = bigint_cmp(x0, N2, x1, N2); + const word* x0 = x; + const word* x1 = x + N2; + word* z0 = z; + word* z1 = z + N; - clear_mem(workspace, 2*N); + const s32bit cmp = bigint_cmp(x0, N2, x1, N2); - if(cmp) - { - if(cmp > 0) - bigint_sub3(z0, x0, N2, x1, N2); - else - bigint_sub3(z0, x1, N2, x0, N2); + clear_mem(workspace, 2*N); - karatsuba_sqr(workspace, z0, N2, workspace+N); - } + if(cmp) + { + if(cmp > 0) + bigint_sub3(z0, x0, N2, x1, N2); + else + bigint_sub3(z0, x1, N2, x0, N2); - karatsuba_sqr(z0, x0, N2, workspace+N); - karatsuba_sqr(z1, x1, N2, workspace+N); + karatsuba_sqr(workspace, z0, N2, workspace+N); + } - const u32bit blocks_of_8 = N - (N % 8); + karatsuba_sqr(z0, x0, N2, workspace+N); + karatsuba_sqr(z1, x1, N2, workspace+N); - word carry = 0; + const u32bit blocks_of_8 = N - (N % 8); - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry); + word ws_carry = 0; - for(u32bit j = blocks_of_8; j != N; ++j) - workspace[N + j] = word_add(z0[j], z1[j], &carry); + for(u32bit j = 0; j != blocks_of_8; j += 8) + ws_carry = word8_add3(workspace + N + j, z0 + j, z1 + j, ws_carry); - word carry2 = 0; + for(u32bit j = blocks_of_8; j != N; ++j) + workspace[N + j] = word_add(z0[j], z1[j], &ws_carry); - for(u32bit j = 0; j != blocks_of_8; j += 8) - carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2); + word z_carry = 0; - for(u32bit j = blocks_of_8; j != N; ++j) - z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2); + for(u32bit j = 0; j != blocks_of_8; j += 8) + z_carry = word8_add2(z + N2 + j, workspace + N + j, z_carry); - z[N + N2] = word_add(z[N + N2], carry2, &carry); + for(u32bit j = blocks_of_8; j != N; ++j) + z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &z_carry); - if(carry) - for(u32bit j = 1; j != N2; ++j) - if(++z[N + N2 + j]) - break; + z[N + N2] = word_add(z[N + N2], ws_carry, &z_carry); - if(cmp == 0) - bigint_add2(z + N2, 2*N-N2, workspace, N); - else - bigint_sub2(z + N2, 2*N-N2, workspace, N); - } + if(z_carry) + for(u32bit j = 1; j != N2; ++j) + if(++z[N + N2 + j]) + break; + + /* + * This is only actually required if cmp is != 0, however + * if cmp==0 then workspace[0:N] == 0 and avoiding the jump + * hides a timing channel. + */ + bigint_sub2(z + N2, 2*N-N2, workspace, N); } /* |