From 57e8e85be6342b9495eb7ba9a3d96e508b984418 Mon Sep 17 00:00:00 2001 From: lloyd Date: Tue, 9 Sep 2008 22:27:16 +0000 Subject: Add 16x16->32 word Comba multiply and square --- src/mp_karat.cpp | 130 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 67 insertions(+), 63 deletions(-) (limited to 'src/mp_karat.cpp') diff --git a/src/mp_karat.cpp b/src/mp_karat.cpp index 770846b4e..15b0551fd 100644 --- a/src/mp_karat.cpp +++ b/src/mp_karat.cpp @@ -17,13 +17,13 @@ namespace { void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, word workspace[]) { - const u32bit KARATSUBA_MUL_LOWER_SIZE = BOTAN_KARAT_MUL_THRESHOLD; - if(N == 6) bigint_comba_mul6(z, x, y); else if(N == 8) bigint_comba_mul8(z, x, y); - else if(N < KARATSUBA_MUL_LOWER_SIZE || N % 2) + else if(N == 16) + bigint_comba_mul16(z, x, y); + else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2) bigint_simple_mul(z, x, N, y, N); else { @@ -96,13 +96,13 @@ void karatsuba_mul(word z[], const word x[], const word y[], u32bit N, *************************************************/ void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[]) { - const u32bit KARATSUBA_SQR_LOWER_SIZE = BOTAN_KARAT_SQR_THRESHOLD; - if(N == 6) bigint_comba_sqr6(z, x); else if(N == 8) bigint_comba_sqr8(z, x); - else if(N < KARATSUBA_SQR_LOWER_SIZE || N % 2) + else if(N == 16) + bigint_comba_sqr16(z, x); + else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2) bigint_simple_sqr(z, x, N); else { @@ -234,97 +234,101 @@ u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw) return 0; } +} + /************************************************* -* Handle small operand multiplies * +* Multiplication Algorithm Dispatcher * *************************************************/ -void handle_small_mul(word z[], u32bit z_size, - const word x[], u32bit x_size, u32bit x_sw, - const word y[], u32bit y_size, u32bit y_sw) +void bigint_mul(word z[], u32bit z_size, word workspace[], + const word x[], u32bit x_size, u32bit x_sw, + const word y[], u32bit y_size, u32bit y_sw) { - if(x_sw == 1) bigint_linmul3(z, y, y_sw, x[0]); - else if(y_sw == 1) bigint_linmul3(z, x, x_sw, y[0]); - + if(x_sw == 1) + { + bigint_linmul3(z, y, y_sw, x[0]); + } + else if(y_sw == 1) + { + bigint_linmul3(z, x, x_sw, y[0]); + } else if(x_sw <= 4 && x_size >= 4 && y_sw <= 4 && y_size >= 4 && z_size >= 8) + { bigint_comba_mul4(z, x, y); - + } else if(x_sw <= 6 && x_size >= 6 && y_sw <= 6 && y_size >= 6 && z_size >= 12) + { bigint_comba_mul6(z, x, y); - + } else if(x_sw <= 8 && x_size >= 8 && y_sw <= 8 && y_size >= 8 && z_size >= 16) + { bigint_comba_mul8(z, x, y); - - else + } + else if(x_sw <= 16 && x_size >= 16 && + y_sw <= 16 && y_size >= 16 && z_size >= 32) + { + bigint_comba_mul16(z, x, y); + } + else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || y_sw < BOTAN_KARAT_MUL_THRESHOLD) bigint_simple_mul(z, x, x_sw, y, y_sw); + else + { + const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); + + if(N) + { + clear_mem(workspace, 2*N); + karatsuba_mul(z, x, y, N, workspace); + } + else + bigint_simple_mul(z, x, x_sw, y, y_sw); + } } /************************************************* -* Handle small operand squarings * +* Squaring Algorithm Dispatcher * *************************************************/ -void handle_small_sqr(word z[], u32bit z_size, - const word x[], u32bit x_size, u32bit x_sw) +void bigint_sqr(word z[], u32bit z_size, word workspace[], + const word x[], u32bit x_size, u32bit x_sw) { if(x_sw == 1) + { bigint_linmul3(z, x, x_sw, x[0]); + } else if(x_sw <= 4 && x_size >= 4 && z_size >= 8) + { bigint_comba_sqr4(z, x); + } else if(x_sw <= 6 && x_size >= 6 && z_size >= 12) + { bigint_comba_sqr6(z, x); + } else if(x_sw <= 8 && x_size >= 8 && z_size >= 16) + { bigint_comba_sqr8(z, x); - else - bigint_simple_sqr(z, x, x_sw); - } - -} - -/************************************************* -* Multiplication Algorithm Dispatcher * -*************************************************/ -void bigint_mul(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw, - const word y[], u32bit y_size, u32bit y_sw) - { - if(x_size <= 8 || y_size <= 8) + } + else if(x_sw <= 16 && x_size >= 16 && z_size >= 32) { - handle_small_mul(z, z_size, x, x_size, x_sw, y, y_size, y_sw); - return; + bigint_comba_sqr16(z, x); } - - const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); - - if(N) + else if(x_size < BOTAN_KARAT_SQR_THRESHOLD) { - clear_mem(workspace, 2*N); - karatsuba_mul(z, x, y, N, workspace); + bigint_simple_sqr(z, x, x_sw); } else - bigint_simple_mul(z, x, x_sw, y, y_sw); - } - -/************************************************* -* Squaring Algorithm Dispatcher * -*************************************************/ -void bigint_sqr(word z[], u32bit z_size, word workspace[], - const word x[], u32bit x_size, u32bit x_sw) - { - if(x_size <= 8 || x_sw <= 8) { - handle_small_sqr(z, z_size, x, x_size, x_sw); - return; - } - - const u32bit N = karatsuba_size(z_size, x_size, x_sw); + const u32bit N = karatsuba_size(z_size, x_size, x_sw); - if(N) - { - clear_mem(workspace, 2*N); - karatsuba_sqr(z, x, N, workspace); + if(N) + { + clear_mem(workspace, 2*N); + karatsuba_sqr(z, x, N, workspace); + } + else + bigint_simple_sqr(z, x, x_sw); } - else - bigint_simple_sqr(z, x, x_sw); } } -- cgit v1.2.3