aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2008-09-09 22:27:16 +0000
committerlloyd <[email protected]>2008-09-09 22:27:16 +0000
commit57e8e85be6342b9495eb7ba9a3d96e508b984418 (patch)
tree7b35bc430f56c9b5848fd2a3e09c667e709c1780 /src
parent456c1ef79167ef0cba5748842ca3748fa9ed7fc0 (diff)
Add 16x16->32 word Comba multiply and square
Diffstat (limited to 'src')
-rw-r--r--src/mp_comba.cpp762
-rw-r--r--src/mp_karat.cpp130
2 files changed, 716 insertions, 176 deletions
diff --git a/src/mp_comba.cpp b/src/mp_comba.cpp
index 589a4037c..635a7547a 100644
--- a/src/mp_comba.cpp
+++ b/src/mp_comba.cpp
@@ -11,6 +11,39 @@ namespace Botan {
extern "C" {
/*************************************************
+* Comba 4x4 Squaring *
+*************************************************/
+void bigint_comba_sqr4(word z[8], const word x[4])
+ {
+ word w2 = 0, w1 = 0, w0 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+ z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+ z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+ word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+ z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+ z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+ word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+ z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+ z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+ z[6] = w0;
+ z[7] = w1;
+ }
+
+/*************************************************
* Comba 4x4 Multiplication *
*************************************************/
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
@@ -50,6 +83,58 @@ void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
}
/*************************************************
+* Comba 6x6 Squaring *
+*************************************************/
+void bigint_comba_sqr6(word z[12], const word x[6])
+ {
+ word w2 = 0, w1 = 0, w0 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+ z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+ z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+ word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+ z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+ z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+ word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+ z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+ z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
+ word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+ z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
+ z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
+ word3_muladd(&w2, &w1, &w0, x[4], x[4]);
+ z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
+ z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[5], x[5]);
+ z[10] = w0;
+ z[11] = w1;
+ }
+
+/*************************************************
* Comba 6x6 Multiplication *
*************************************************/
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
@@ -117,6 +202,81 @@ void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
}
/*************************************************
+* Comba 8x8 Squaring *
+*************************************************/
+void bigint_comba_sqr8(word z[16], const word x[8])
+ {
+ word w2 = 0, w1 = 0, w0 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+ z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+ z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+ word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+ z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+ z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+ word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+ z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+ z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
+ word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+ z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
+ z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
+ word3_muladd(&w2, &w1, &w0, x[4], x[4]);
+ z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
+ word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
+ z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
+ word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
+ word3_muladd(&w2, &w1, &w0, x[5], x[5]);
+ z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
+ word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
+ z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
+ word3_muladd(&w2, &w1, &w0, x[6], x[6]);
+ z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
+ z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[7], x[7]);
+ z[14] = w0;
+ z[15] = w1;
+ }
+
+/*************************************************
* Comba 8x8 Multiplication *
*************************************************/
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
@@ -220,163 +380,539 @@ void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
}
/*************************************************
-* Comba 4x4 Squaring *
+* Comba 16x16 Squaring *
*************************************************/
-void bigint_comba_sqr4(word z[8], const word x[4])
+void bigint_comba_sqr16(word z[32], const word x[16])
{
- word w2 = 0, w1 = 0, w0 = 0;
-
- word3_muladd(&w2, &w1, &w0, x[0], x[0]);
- z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+ return bigint_comba_mul16(z, x, x);
- word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
- z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
- word3_muladd(&w2, &w1, &w0, x[1], x[1]);
- z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
- z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
- word3_muladd(&w2, &w1, &w0, x[2], x[2]);
- z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
- z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd(&w2, &w1, &w0, x[3], x[3]);
- z[6] = w0;
- z[7] = w1;
- }
-
-/*************************************************
-* Comba 6x6 Squaring *
-*************************************************/
-void bigint_comba_sqr6(word z[12], const word x[6])
- {
word w2 = 0, w1 = 0, w0 = 0;
- word3_muladd(&w2, &w1, &w0, x[0], x[0]);
- z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
- z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
- word3_muladd(&w2, &w1, &w0, x[1], x[1]);
- z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
- z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
- word3_muladd(&w2, &w1, &w0, x[2], x[2]);
- z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
- z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
- word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
- word3_muladd(&w2, &w1, &w0, x[3], x[3]);
- z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+ word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
+ z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]);
+ z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]);
+ z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
+ z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]);
+ z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]);
+ z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
+ z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]);
+ z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]);
+ z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
+ z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]);
+ z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
- z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]);
+ z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
- word3_muladd(&w2, &w1, &w0, x[4], x[4]);
- z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
+ z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
- z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]);
+ z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd(&w2, &w1, &w0, x[5], x[5]);
- z[10] = w0;
- z[11] = w1;
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]);
+ z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
+ z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]);
+ z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]);
+ z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]);
+ z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]);
+ z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]);
+ word3_muladd(&w2, &w1, &w0, x[10], x[10]);
+ z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]);
+ word3_muladd_2(&w2, &w1, &w0, x[10], x[11]);
+ z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[10], x[12]);
+ word3_muladd(&w2, &w1, &w0, x[11], x[11]);
+ z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[10], x[13]);
+ word3_muladd_2(&w2, &w1, &w0, x[11], x[12]);
+ z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[10], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[11], x[13]);
+ word3_muladd(&w2, &w1, &w0, x[12], x[12]);
+ z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[10], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[11], x[14]);
+ word3_muladd_2(&w2, &w1, &w0, x[12], x[13]);
+ z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[11], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[12], x[14]);
+ word3_muladd(&w2, &w1, &w0, x[13], x[13]);
+ z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[12], x[15]);
+ word3_muladd_2(&w2, &w1, &w0, x[13], x[14]);
+ z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[13], x[15]);
+ word3_muladd(&w2, &w1, &w0, x[14], x[14]);
+ z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd_2(&w2, &w1, &w0, x[14], x[15]);
+ z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[15], x[15]);
+ z[30] = w0;
+ z[31] = w1;
}
/*************************************************
-* Comba 8x8 Squaring *
+* Comba 16x16 Multiplication *
*************************************************/
-void bigint_comba_sqr8(word z[16], const word x[8])
+void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
{
word w2 = 0, w1 = 0, w0 = 0;
- word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]);
z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]);
z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
- word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]);
z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]);
z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
- word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]);
z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]);
z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
- word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]);
z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
- word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
- word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]);
z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
- word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
- word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
- word3_muladd(&w2, &w1, &w0, x[4], x[4]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]);
z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
- word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
- word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]);
z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
- word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
- word3_muladd(&w2, &w1, &w0, x[5], x[5]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 0]);
z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
- word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 0]);
z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
- word3_muladd(&w2, &w1, &w0, x[6], x[6]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 0]);
z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 0]);
z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
- word3_muladd(&w2, &w1, &w0, x[7], x[7]);
- z[14] = w0;
- z[15] = w1;
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 0]);
+ z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 0], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 1]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 0]);
+ z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 1], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 2]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 1]);
+ z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 2], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 3]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 2]);
+ z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 3], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 4]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 3]);
+ z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 4], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 5]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 4]);
+ z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 5], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 6]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 5]);
+ z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 6], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 7]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 6]);
+ z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 7], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 8]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 7]);
+ z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 8], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[ 9]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 8]);
+ z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[ 9], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[10], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[10]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[ 9]);
+ z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[10], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[11], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[11]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[10]);
+ z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[11], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[12], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[12]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[11]);
+ z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[12], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[13], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[13]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[12]);
+ z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[13], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[14], y[14]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[13]);
+ z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[14], y[15]);
+ word3_muladd(&w2, &w1, &w0, x[15], y[14]);
+ z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+ word3_muladd(&w2, &w1, &w0, x[15], y[15]);
+ z[30] = w0;
+ z[31] = w1;
}
}
diff --git a/src/mp_karat.cpp b/src/mp_karat.cpp
index 770846b4e..15b0551fd 100644
--- a/src/mp_karat.cpp
+++ b/src/mp_karat.cpp
@@ -17,13 +17,13 @@ namespace {
void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
word workspace[])
{
- const u32bit KARATSUBA_MUL_LOWER_SIZE = BOTAN_KARAT_MUL_THRESHOLD;
-
if(N == 6)
bigint_comba_mul6(z, x, y);
else if(N == 8)
bigint_comba_mul8(z, x, y);
- else if(N < KARATSUBA_MUL_LOWER_SIZE || N % 2)
+ else if(N == 16)
+ bigint_comba_mul16(z, x, y);
+ else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
bigint_simple_mul(z, x, N, y, N);
else
{
@@ -96,13 +96,13 @@ void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
*************************************************/
void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[])
{
- const u32bit KARATSUBA_SQR_LOWER_SIZE = BOTAN_KARAT_SQR_THRESHOLD;
-
if(N == 6)
bigint_comba_sqr6(z, x);
else if(N == 8)
bigint_comba_sqr8(z, x);
- else if(N < KARATSUBA_SQR_LOWER_SIZE || N % 2)
+ else if(N == 16)
+ bigint_comba_sqr16(z, x);
+ else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
bigint_simple_sqr(z, x, N);
else
{
@@ -234,97 +234,101 @@ u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw)
return 0;
}
+}
+
/*************************************************
-* Handle small operand multiplies *
+* Multiplication Algorithm Dispatcher *
*************************************************/
-void handle_small_mul(word z[], u32bit z_size,
- const word x[], u32bit x_size, u32bit x_sw,
- const word y[], u32bit y_size, u32bit y_sw)
+void bigint_mul(word z[], u32bit z_size, word workspace[],
+ const word x[], u32bit x_size, u32bit x_sw,
+ const word y[], u32bit y_size, u32bit y_sw)
{
- if(x_sw == 1) bigint_linmul3(z, y, y_sw, x[0]);
- else if(y_sw == 1) bigint_linmul3(z, x, x_sw, y[0]);
-
+ if(x_sw == 1)
+ {
+ bigint_linmul3(z, y, y_sw, x[0]);
+ }
+ else if(y_sw == 1)
+ {
+ bigint_linmul3(z, x, x_sw, y[0]);
+ }
else if(x_sw <= 4 && x_size >= 4 &&
y_sw <= 4 && y_size >= 4 && z_size >= 8)
+ {
bigint_comba_mul4(z, x, y);
-
+ }
else if(x_sw <= 6 && x_size >= 6 &&
y_sw <= 6 && y_size >= 6 && z_size >= 12)
+ {
bigint_comba_mul6(z, x, y);
-
+ }
else if(x_sw <= 8 && x_size >= 8 &&
y_sw <= 8 && y_size >= 8 && z_size >= 16)
+ {
bigint_comba_mul8(z, x, y);
-
- else
+ }
+ else if(x_sw <= 16 && x_size >= 16 &&
+ y_sw <= 16 && y_size >= 16 && z_size >= 32)
+ {
+ bigint_comba_mul16(z, x, y);
+ }
+ else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD || y_sw < BOTAN_KARAT_MUL_THRESHOLD)
bigint_simple_mul(z, x, x_sw, y, y_sw);
+ else
+ {
+ const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
+
+ if(N)
+ {
+ clear_mem(workspace, 2*N);
+ karatsuba_mul(z, x, y, N, workspace);
+ }
+ else
+ bigint_simple_mul(z, x, x_sw, y, y_sw);
+ }
}
/*************************************************
-* Handle small operand squarings *
+* Squaring Algorithm Dispatcher *
*************************************************/
-void handle_small_sqr(word z[], u32bit z_size,
- const word x[], u32bit x_size, u32bit x_sw)
+void bigint_sqr(word z[], u32bit z_size, word workspace[],
+ const word x[], u32bit x_size, u32bit x_sw)
{
if(x_sw == 1)
+ {
bigint_linmul3(z, x, x_sw, x[0]);
+ }
else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
+ {
bigint_comba_sqr4(z, x);
+ }
else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
+ {
bigint_comba_sqr6(z, x);
+ }
else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
+ {
bigint_comba_sqr8(z, x);
- else
- bigint_simple_sqr(z, x, x_sw);
- }
-
-}
-
-/*************************************************
-* Multiplication Algorithm Dispatcher *
-*************************************************/
-void bigint_mul(word z[], u32bit z_size, word workspace[],
- const word x[], u32bit x_size, u32bit x_sw,
- const word y[], u32bit y_size, u32bit y_sw)
- {
- if(x_size <= 8 || y_size <= 8)
+ }
+ else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
{
- handle_small_mul(z, z_size, x, x_size, x_sw, y, y_size, y_sw);
- return;
+ bigint_comba_sqr16(z, x);
}
-
- const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
-
- if(N)
+ else if(x_size < BOTAN_KARAT_SQR_THRESHOLD)
{
- clear_mem(workspace, 2*N);
- karatsuba_mul(z, x, y, N, workspace);
+ bigint_simple_sqr(z, x, x_sw);
}
else
- bigint_simple_mul(z, x, x_sw, y, y_sw);
- }
-
-/*************************************************
-* Squaring Algorithm Dispatcher *
-*************************************************/
-void bigint_sqr(word z[], u32bit z_size, word workspace[],
- const word x[], u32bit x_size, u32bit x_sw)
- {
- if(x_size <= 8 || x_sw <= 8)
{
- handle_small_sqr(z, z_size, x, x_size, x_sw);
- return;
- }
-
- const u32bit N = karatsuba_size(z_size, x_size, x_sw);
+ const u32bit N = karatsuba_size(z_size, x_size, x_sw);
- if(N)
- {
- clear_mem(workspace, 2*N);
- karatsuba_sqr(z, x, N, workspace);
+ if(N)
+ {
+ clear_mem(workspace, 2*N);
+ karatsuba_sqr(z, x, N, workspace);
+ }
+ else
+ bigint_simple_sqr(z, x, x_sw);
}
- else
- bigint_simple_sqr(z, x, x_sw);
}
}