From 3d0ac39eab74c6f74fe41eda9e5f057d1b396f10 Mon Sep 17 00:00:00 2001
From: lloyd <lloyd@randombit.net>
Date: Fri, 24 Sep 2010 21:58:47 +0000
Subject: Move the core MPI functions to src/math/mp, leaving src/math/bigint
 just for the implementation of the BigInt class

---
 src/math/bigint/info.txt                   |  14 +-
 src/math/bigint/monty_generic/info.txt     |   5 -
 src/math/bigint/monty_generic/mp_monty.cpp |  72 ---
 src/math/bigint/mp_amd64/info.txt          |  18 -
 src/math/bigint/mp_amd64/mp_asm.h          |  69 ---
 src/math/bigint/mp_amd64/mp_asmi.h         | 248 --------
 src/math/bigint/mp_asm.cpp                 | 183 ------
 src/math/bigint/mp_asm64/info.txt          |  25 -
 src/math/bigint/mp_asm64/mp_asm.h          | 122 ----
 src/math/bigint/mp_comba.cpp               | 920 -----------------------------
 src/math/bigint/mp_core.h                  | 144 -----
 src/math/bigint/mp_generic/info.txt        |   6 -
 src/math/bigint/mp_generic/mp_asm.h        |  54 --
 src/math/bigint/mp_generic/mp_asmi.h       | 207 -------
 src/math/bigint/mp_ia32/info.txt           |  18 -
 src/math/bigint/mp_ia32/mp_asm.h           |  67 ---
 src/math/bigint/mp_ia32/mp_asmi.h          | 240 --------
 src/math/bigint/mp_ia32_msvc/info.txt      |  16 -
 src/math/bigint/mp_ia32_msvc/mp_asmi.h     | 542 -----------------
 src/math/bigint/mp_karat.cpp               | 340 -----------
 src/math/bigint/mp_misc.cpp                | 102 ----
 src/math/bigint/mp_msvc64/info.txt         |  17 -
 src/math/bigint/mp_msvc64/mp_asm.h         |  61 --
 src/math/bigint/mp_shift.cpp               | 138 -----
 src/math/bigint/mp_types.h                 |  33 --
 src/math/bigint/mulop_generic/info.txt     |   5 -
 src/math/bigint/mulop_generic/mp_mulop.cpp |  77 ---
 src/math/mp/info.txt                       |  23 +
 src/math/mp/monty_generic/info.txt         |   5 +
 src/math/mp/monty_generic/mp_monty.cpp     |  72 +++
 src/math/mp/mp_amd64/info.txt              |  18 +
 src/math/mp/mp_amd64/mp_asm.h              |  69 +++
 src/math/mp/mp_amd64/mp_asmi.h             | 248 ++++++++
 src/math/mp/mp_asm.cpp                     | 183 ++++++
 src/math/mp/mp_asm64/info.txt              |  25 +
 src/math/mp/mp_asm64/mp_asm.h              | 122 ++++
 src/math/mp/mp_comba.cpp                   | 920 +++++++++++++++++++++++++++++
 src/math/mp/mp_core.h                      | 144 +++++
 src/math/mp/mp_generic/info.txt            |   6 +
 src/math/mp/mp_generic/mp_asm.h            |  54 ++
 src/math/mp/mp_generic/mp_asmi.h           | 207 +++++++
 src/math/mp/mp_ia32/info.txt               |  18 +
 src/math/mp/mp_ia32/mp_asm.h               |  67 +++
 src/math/mp/mp_ia32/mp_asmi.h              | 240 ++++++++
 src/math/mp/mp_ia32_msvc/info.txt          |  16 +
 src/math/mp/mp_ia32_msvc/mp_asmi.h         | 542 +++++++++++++++++
 src/math/mp/mp_karat.cpp                   | 340 +++++++++++
 src/math/mp/mp_misc.cpp                    | 102 ++++
 src/math/mp/mp_msvc64/info.txt             |  17 +
 src/math/mp/mp_msvc64/mp_asm.h             |  61 ++
 src/math/mp/mp_shift.cpp                   | 138 +++++
 src/math/mp/mp_types.h                     |  33 ++
 src/math/mp/mulop_generic/info.txt         |   5 +
 src/math/mp/mulop_generic/mp_mulop.cpp     |  77 +++
 54 files changed, 3753 insertions(+), 3742 deletions(-)
 delete mode 100644 src/math/bigint/monty_generic/info.txt
 delete mode 100644 src/math/bigint/monty_generic/mp_monty.cpp
 delete mode 100644 src/math/bigint/mp_amd64/info.txt
 delete mode 100644 src/math/bigint/mp_amd64/mp_asm.h
 delete mode 100644 src/math/bigint/mp_amd64/mp_asmi.h
 delete mode 100644 src/math/bigint/mp_asm.cpp
 delete mode 100644 src/math/bigint/mp_asm64/info.txt
 delete mode 100644 src/math/bigint/mp_asm64/mp_asm.h
 delete mode 100644 src/math/bigint/mp_comba.cpp
 delete mode 100644 src/math/bigint/mp_core.h
 delete mode 100644 src/math/bigint/mp_generic/info.txt
 delete mode 100644 src/math/bigint/mp_generic/mp_asm.h
 delete mode 100644 src/math/bigint/mp_generic/mp_asmi.h
 delete mode 100644 src/math/bigint/mp_ia32/info.txt
 delete mode 100644 src/math/bigint/mp_ia32/mp_asm.h
 delete mode 100644 src/math/bigint/mp_ia32/mp_asmi.h
 delete mode 100644 src/math/bigint/mp_ia32_msvc/info.txt
 delete mode 100644 src/math/bigint/mp_ia32_msvc/mp_asmi.h
 delete mode 100644 src/math/bigint/mp_karat.cpp
 delete mode 100644 src/math/bigint/mp_misc.cpp
 delete mode 100644 src/math/bigint/mp_msvc64/info.txt
 delete mode 100644 src/math/bigint/mp_msvc64/mp_asm.h
 delete mode 100644 src/math/bigint/mp_shift.cpp
 delete mode 100644 src/math/bigint/mp_types.h
 delete mode 100644 src/math/bigint/mulop_generic/info.txt
 delete mode 100644 src/math/bigint/mulop_generic/mp_mulop.cpp
 create mode 100644 src/math/mp/info.txt
 create mode 100644 src/math/mp/monty_generic/info.txt
 create mode 100644 src/math/mp/monty_generic/mp_monty.cpp
 create mode 100644 src/math/mp/mp_amd64/info.txt
 create mode 100644 src/math/mp/mp_amd64/mp_asm.h
 create mode 100644 src/math/mp/mp_amd64/mp_asmi.h
 create mode 100644 src/math/mp/mp_asm.cpp
 create mode 100644 src/math/mp/mp_asm64/info.txt
 create mode 100644 src/math/mp/mp_asm64/mp_asm.h
 create mode 100644 src/math/mp/mp_comba.cpp
 create mode 100644 src/math/mp/mp_core.h
 create mode 100644 src/math/mp/mp_generic/info.txt
 create mode 100644 src/math/mp/mp_generic/mp_asm.h
 create mode 100644 src/math/mp/mp_generic/mp_asmi.h
 create mode 100644 src/math/mp/mp_ia32/info.txt
 create mode 100644 src/math/mp/mp_ia32/mp_asm.h
 create mode 100644 src/math/mp/mp_ia32/mp_asmi.h
 create mode 100644 src/math/mp/mp_ia32_msvc/info.txt
 create mode 100644 src/math/mp/mp_ia32_msvc/mp_asmi.h
 create mode 100644 src/math/mp/mp_karat.cpp
 create mode 100644 src/math/mp/mp_misc.cpp
 create mode 100644 src/math/mp/mp_msvc64/info.txt
 create mode 100644 src/math/mp/mp_msvc64/mp_asm.h
 create mode 100644 src/math/mp/mp_shift.cpp
 create mode 100644 src/math/mp/mp_types.h
 create mode 100644 src/math/mp/mulop_generic/info.txt
 create mode 100644 src/math/mp/mulop_generic/mp_mulop.cpp

(limited to 'src')

diff --git a/src/math/bigint/info.txt b/src/math/bigint/info.txt
index 7892a6edf..6057c708e 100644
--- a/src/math/bigint/info.txt
+++ b/src/math/bigint/info.txt
@@ -5,13 +5,8 @@ define BIGINT
 <header:public>
 bigint.h
 divide.h
-mp_types.h
 </header:public>
 
-<header:internal>
-mp_core.h
-</header:internal>
-
 <source>
 big_code.cpp
 big_io.cpp
@@ -20,18 +15,11 @@ big_ops3.cpp
 big_rand.cpp
 bigint.cpp
 divide.cpp
-mp_asm.cpp
-mp_comba.cpp
-mp_karat.cpp
-mp_misc.cpp
-mp_shift.cpp
 </source>
 
 <requires>
 alloc
+mp
 hex
-mp_amd64|mp_msvc64|mp_asm64|mp_ia32|mp_ia32_msvc|mp_generic
-monty_generic
-mulop_generic
 rng
 </requires>
diff --git a/src/math/bigint/monty_generic/info.txt b/src/math/bigint/monty_generic/info.txt
deleted file mode 100644
index cd05ccdc0..000000000
--- a/src/math/bigint/monty_generic/info.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-load_on dep
-
-<source>
-mp_monty.cpp
-</source>
diff --git a/src/math/bigint/monty_generic/mp_monty.cpp b/src/math/bigint/monty_generic/mp_monty.cpp
deleted file mode 100644
index bce35259a..000000000
--- a/src/math/bigint/monty_generic/mp_monty.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-* Montgomery Reduction
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/internal/mp_asm.h>
-#include <botan/internal/mp_asmi.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Montgomery Reduction Algorithm
-*/
-void bigint_monty_redc(word z[], u32bit z_size,
-                       word ws[],
-                       const word x[], u32bit x_size,
-                       word u)
-   {
-   const u32bit blocks_of_8 = x_size - (x_size % 8);
-
-   for(u32bit i = 0; i != x_size; ++i)
-      {
-      word* z_i = z + i;
-
-      const word y = z_i[0] * u;
-
-      /*
-      bigint_linmul3(ws, x, x_size, y);
-      bigint_add2(z_i, z_size - i, ws, x_size+1);
-      */
-      word carry = 0;
-
-      for(u32bit j = 0; j != blocks_of_8; j += 8)
-         carry = word8_madd3(z_i + j, x + j, y, carry);
-
-      for(u32bit j = blocks_of_8; j != x_size; ++j)
-         z_i[j] = word_madd3(x[j], y, z_i[j], &carry);
-
-      word z_sum = z_i[x_size] + carry;
-      carry = (z_sum < z_i[x_size]);
-      z_i[x_size] = z_sum;
-
-      // Note: not constant time
-      for(u32bit j = x_size + 1; carry && j != z_size - i; ++j)
-         {
-         ++z_i[j];
-         carry = !z_i[j];
-         }
-      }
-
-   word borrow = 0;
-   for(u32bit i = 0; i != x_size; ++i)
-      ws[i] = word_sub(z[x_size + i], x[i], &borrow);
-
-   ws[x_size] = word_sub(z[x_size+x_size], 0, &borrow);
-
-   copy_mem(ws + x_size + 1, z + x_size, x_size + 1);
-
-   copy_mem(z, ws + borrow*(x_size+1), x_size + 1);
-   clear_mem(z + x_size + 1, z_size - x_size - 1);
-   }
-
-}
-
-}
diff --git a/src/math/bigint/mp_amd64/info.txt b/src/math/bigint/mp_amd64/info.txt
deleted file mode 100644
index 11cc380e2..000000000
--- a/src/math/bigint/mp_amd64/info.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-load_on dep
-
-mp_bits 64
-
-<header:internal>
-mp_asm.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-amd64
-</arch>
-
-<cc>
-clang
-gcc
-icc
-</cc>
diff --git a/src/math/bigint/mp_amd64/mp_asm.h b/src/math/bigint/mp_amd64/mp_asm.h
deleted file mode 100644
index fa66d04f3..000000000
--- a/src/math/bigint/mp_amd64/mp_asm.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2008 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_H__
-#define BOTAN_MP_ASM_H__
-
-#include <botan/mp_types.h>
-
-#if (BOTAN_MP_WORD_BITS != 64)
-   #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Helper Macros for amd64 Assembly
-*/
-#define ASM(x) x "\n\t"
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
-   {
-   asm(
-      ASM("mulq %[b]")
-      ASM("addq %[c],%[a]")
-      ASM("adcq $0,%[carry]")
-
-      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
-      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
-
-   return a;
-   }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
-   {
-   asm(
-      ASM("mulq %[b]")
-
-      ASM("addq %[c],%[a]")
-      ASM("adcq $0,%[carry]")
-
-      ASM("addq %[d],%[a]")
-      ASM("adcq $0,%[carry]")
-
-      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
-      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
-
-   return a;
-   }
-
-#undef ASM
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_amd64/mp_asmi.h b/src/math/bigint/mp_amd64/mp_asmi.h
deleted file mode 100644
index adf7774ef..000000000
--- a/src/math/bigint/mp_amd64/mp_asmi.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_asm.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Helper Macros for amd64 Assembly
-*/
-#ifndef ASM
-  #define ASM(x) x "\n\t"
-#endif
-
-#define ADDSUB2_OP(OPERATION, INDEX)                     \
-        ASM("movq 8*" #INDEX "(%[y]), %[carry]")         \
-        ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")   \
-
-#define ADDSUB3_OP(OPERATION, INDEX)                     \
-        ASM("movq 8*" #INDEX "(%[x]), %[carry]")         \
-        ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]")   \
-        ASM("movq %[carry], 8*" #INDEX "(%[z])")         \
-
-#define LINMUL_OP(WRITE_TO, INDEX)                       \
-        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
-        ASM("mulq %[y]")                                 \
-        ASM("addq %[carry],%%rax")                       \
-        ASM("adcq $0,%%rdx")                             \
-        ASM("movq %%rdx,%[carry]")                       \
-        ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
-
-#define MULADD_OP(IGNORED, INDEX)                        \
-        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
-        ASM("mulq %[y]")                                 \
-        ASM("addq %[carry],%%rax")                       \
-        ASM("adcq $0,%%rdx")                             \
-        ASM("addq 8*" #INDEX "(%[z]),%%rax")             \
-        ASM("adcq $0,%%rdx")                             \
-        ASM("movq %%rdx,%[carry]")                       \
-        ASM("movq %%rax, 8*" #INDEX " (%[z])")
-
-#define DO_8_TIMES(MACRO, ARG) \
-        MACRO(ARG, 0) \
-        MACRO(ARG, 1) \
-        MACRO(ARG, 2) \
-        MACRO(ARG, 3) \
-        MACRO(ARG, 4) \
-        MACRO(ARG, 5) \
-        MACRO(ARG, 6) \
-        MACRO(ARG, 7)
-
-#define ADD_OR_SUBTRACT(CORE_CODE)     \
-        ASM("rorq %[carry]")           \
-        CORE_CODE                      \
-        ASM("sbbq %[carry],%[carry]")  \
-        ASM("negq %[carry]")
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
-      : [x]"=r"(x), [carry]"=r"(*carry)
-      : "0"(x), [y]"rm"(y), "1"(*carry)
-      : "cc");
-   return x;
-   }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
-      : [x]"=r"(x), [carry]"=r"(*carry)
-      : "0"(x), [y]"rm"(y), "1"(*carry)
-      : "cc");
-   return x;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
-      : [carry]"=r"(carry)
-      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(LINMUL_OP, "x")
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%rax", "%rdx");
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(LINMUL_OP, "z")
-      : [carry]"=r"(carry)
-      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%rax", "%rdx");
-   return carry;
-   }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(MULADD_OP, "")
-      : [carry]"=r"(carry)
-      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%rax", "%rdx");
-   return carry;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
-   {
-   asm(
-      ASM("mulq %[y]")
-
-      ASM("addq %[x],%[w0]")
-      ASM("adcq %[y],%[w1]")
-      ASM("adcq $0,%[w2]")
-
-      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
-      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
-      : "cc");
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
-   {
-   asm(
-      ASM("mulq %[y]")
-
-      ASM("addq %[x],%[w0]")
-      ASM("adcq %[y],%[w1]")
-      ASM("adcq $0,%[w2]")
-
-      ASM("addq %[x],%[w0]")
-      ASM("adcq %[y],%[w1]")
-      ASM("adcq $0,%[w2]")
-
-      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
-      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
-      : "cc");
-   }
-
-
-#undef ASM
-#undef DO_8_TIMES
-#undef ADD_OR_SUBTRACT
-#undef ADDSUB2_OP
-#undef ADDSUB3_OP
-#undef LINMUL_OP
-#undef MULADD_OP
-
-}
-
-}
-#endif
diff --git a/src/math/bigint/mp_asm.cpp b/src/math/bigint/mp_asm.cpp
deleted file mode 100644
index 4fcdee7a4..000000000
--- a/src/math/bigint/mp_asm.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_asm.h>
-#include <botan/internal/mp_asmi.h>
-#include <botan/internal/mp_core.h>
-#include <botan/exceptn.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Two Operand Addition, No Carry
-*/
-word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size)
-   {
-   word carry = 0;
-
-   const u32bit blocks = y_size - (y_size % 8);
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      carry = word8_add2(x + i, y + i, carry);
-
-   for(u32bit i = blocks; i != y_size; ++i)
-      x[i] = word_add(x[i], y[i], &carry);
-
-   for(u32bit i = y_size; i != x_size; ++i)
-      x[i] = word_add(x[i], 0, &carry);
-
-   return carry;
-   }
-
-/*
-* Three Operand Addition, No Carry
-*/
-word bigint_add3_nc(word z[], const word x[], u32bit x_size,
-                              const word y[], u32bit y_size)
-   {
-   if(x_size < y_size)
-      { return bigint_add3_nc(z, y, y_size, x, x_size); }
-
-   word carry = 0;
-
-   const u32bit blocks = y_size - (y_size % 8);
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      carry = word8_add3(z + i, x + i, y + i, carry);
-
-   for(u32bit i = blocks; i != y_size; ++i)
-      z[i] = word_add(x[i], y[i], &carry);
-
-   for(u32bit i = y_size; i != x_size; ++i)
-      z[i] = word_add(x[i], 0, &carry);
-
-   return carry;
-   }
-
-/*
-* Two Operand Addition
-*/
-void bigint_add2(word x[], u32bit x_size, const word y[], u32bit y_size)
-   {
-   x[x_size] += bigint_add2_nc(x, x_size, y, y_size);
-   }
-
-/*
-* Three Operand Addition
-*/
-void bigint_add3(word z[], const word x[], u32bit x_size,
-                           const word y[], u32bit y_size)
-   {
-   z[(x_size > y_size ? x_size : y_size)] +=
-      bigint_add3_nc(z, x, x_size, y, y_size);
-   }
-
-/*
-* Two Operand Subtraction
-*/
-word bigint_sub2(word x[], u32bit x_size, const word y[], u32bit y_size)
-   {
-   word borrow = 0;
-
-   const u32bit blocks = y_size - (y_size % 8);
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      borrow = word8_sub2(x + i, y + i, borrow);
-
-   for(u32bit i = blocks; i != y_size; ++i)
-      x[i] = word_sub(x[i], y[i], &borrow);
-
-   for(u32bit i = y_size; i != x_size; ++i)
-      x[i] = word_sub(x[i], 0, &borrow);
-
-   return borrow;
-   }
-
-/*
-* Two Operand Subtraction x = y - x
-*/
-void bigint_sub2_rev(word x[],  const word y[], u32bit y_size)
-   {
-   word borrow = 0;
-
-   const u32bit blocks = y_size - (y_size % 8);
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      borrow = word8_sub2_rev(x + i, y + i, borrow);
-
-   for(u32bit i = blocks; i != y_size; ++i)
-      x[i] = word_sub(y[i], x[i], &borrow);
-
-   if(borrow)
-      throw Internal_Error("bigint_sub2_rev: x >= y");
-   }
-
-/*
-* Three Operand Subtraction
-*/
-word bigint_sub3(word z[], const word x[], u32bit x_size,
-                           const word y[], u32bit y_size)
-   {
-   word borrow = 0;
-
-   const u32bit blocks = y_size - (y_size % 8);
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      borrow = word8_sub3(z + i, x + i, y + i, borrow);
-
-   for(u32bit i = blocks; i != y_size; ++i)
-      z[i] = word_sub(x[i], y[i], &borrow);
-
-   for(u32bit i = y_size; i != x_size; ++i)
-      z[i] = word_sub(x[i], 0, &borrow);
-
-   return borrow;
-   }
-
-/*
-* Two Operand Linear Multiply
-*/
-void bigint_linmul2(word x[], u32bit x_size, word y)
-   {
-   const u32bit blocks = x_size - (x_size % 8);
-
-   word carry = 0;
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      carry = word8_linmul2(x + i, y, carry);
-
-   for(u32bit i = blocks; i != x_size; ++i)
-      x[i] = word_madd2(x[i], y, &carry);
-
-   x[x_size] = carry;
-   }
-
-/*
-* Three Operand Linear Multiply
-*/
-void bigint_linmul3(word z[], const word x[], u32bit x_size, word y)
-   {
-   const u32bit blocks = x_size - (x_size % 8);
-
-   word carry = 0;
-
-   for(u32bit i = 0; i != blocks; i += 8)
-      carry = word8_linmul3(z + i, x + i, y, carry);
-
-   for(u32bit i = blocks; i != x_size; ++i)
-      z[i] = word_madd2(x[i], y, &carry);
-
-   z[x_size] = carry;
-   }
-
-}
-
-}
diff --git a/src/math/bigint/mp_asm64/info.txt b/src/math/bigint/mp_asm64/info.txt
deleted file mode 100644
index fd0242a7a..000000000
--- a/src/math/bigint/mp_asm64/info.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-mp_bits 64
-
-load_on dep
-
-<header:internal>
-mp_asm.h
-mp_generic:mp_asmi.h
-</header:internal>
-
-<arch>
-#amd64
-alpha
-ia64
-mips64
-ppc64
-sparc64
-</arch>
-
-# The inline asm only works with gcc, but it looks like (at least on
-# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25%
-# win, so it's probably worth using elsewhere.
-<cc>
-gcc
-sunwspro
-</cc>
diff --git a/src/math/bigint/mp_asm64/mp_asm.h b/src/math/bigint/mp_asm64/mp_asm.h
deleted file mode 100644
index b0906095d..000000000
--- a/src/math/bigint/mp_asm64/mp_asm.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
-* MPI Multiply-Add Core
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_MADD_H__
-#define BOTAN_MP_MADD_H__
-
-#include <botan/mp_types.h>
-
-namespace Botan {
-
-#if (BOTAN_MP_WORD_BITS != 64)
-   #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-#if defined(BOTAN_TARGET_ARCH_IS_ALPHA)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do {                   \
-   asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \
-   z1 = a * b;                                           \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_AMD64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do {       \
-   asm("mulq %3" : "=d" (z0), "=a" (z1) :    \
-       "a" (a), "rm" (b) : "cc");            \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do {                     \
-   asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \
-   z1 = a * b;                                             \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do {                           \
-   asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \
-   z1 = a * b;                                                   \
-} while(0);
-
-#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64)
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) do {                            \
-   typedef unsigned int uint128_t __attribute__((mode(TI)));      \
-   uint128_t r = (uint128_t)a * b;                                \
-   z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF;                           \
-   z1 = (r      ) & 0xFFFFFFFFFFFFFFFF;                           \
-} while(0);
-
-#else
-
-// Do a 64x64->128 multiply using four 64x64->64 multiplies
-// plus some adds and shifts. Last resort for CPUs like UltraSPARC,
-// with 64-bit registers/ALU, but no 64x64->128 multiply.
-inline void bigint_2word_mul(word a, word b, word* z1, word* z0)
-   {
-   const u32bit MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2;
-   const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1;
-
-   const word a_hi = (a >> MP_HWORD_BITS);
-   const word a_lo = (a & MP_HWORD_MASK);
-   const word b_hi = (b >> MP_HWORD_BITS);
-   const word b_lo = (b & MP_HWORD_MASK);
-
-   word x0 = a_hi * b_hi;
-   word x1 = a_lo * b_hi;
-   word x2 = a_hi * b_lo;
-   word x3 = a_lo * b_lo;
-
-   x2 += x3 >> (MP_HWORD_BITS);
-   x2 += x1;
-   if(x2 < x1)
-      x0 += ((word)1 << MP_HWORD_BITS);
-
-   *z0 = x0 + (x2 >> MP_HWORD_BITS);
-   *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK);
-   }
-
-#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0)
-
-#endif
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd2(word a, word b, word* c)
-   {
-   word z0 = 0, z1 = 0;
-
-   BOTAN_WORD_MUL(a, b, z1, z0);
-
-   z1 += *c; if(z1 < *c) z0++;
-
-   *c = z0;
-   return z1;
-   }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
-   {
-   word z0 = 0, z1 = 0;
-
-   BOTAN_WORD_MUL(a, b, z1, z0);
-
-   z1 += c; if(z1 < c) z0++;
-   z1 += *d; if(z1 < *d) z0++;
-
-   *d = z0;
-   return z1;
-   }
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_comba.cpp b/src/math/bigint/mp_comba.cpp
deleted file mode 100644
index 2770d3f0a..000000000
--- a/src/math/bigint/mp_comba.cpp
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
-* Comba Multiplication and Squaring
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/internal/mp_asmi.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Comba 4x4 Squaring
-*/
-void bigint_comba_sqr4(word z[8], const word x[4])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
-   z[6] = w0;
-   z[7] = w1;
-   }
-
-/*
-* Comba 4x4 Multiplication
-*/
-void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
-   z[6] = w0;
-   z[7] = w1;
-   }
-
-/*
-* Comba 6x6 Squaring
-*/
-void bigint_comba_sqr6(word z[12], const word x[6])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
-   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
-   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
-   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
-   word3_muladd(&w2, &w1, &w0, x[4], x[4]);
-   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
-   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[5], x[5]);
-   z[10] = w0;
-   z[11] = w1;
-   }
-
-/*
-* Comba 6x6 Multiplication
-*/
-void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[0]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[0]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[1], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[1]);
-   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[2], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[2]);
-   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[3], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[3]);
-   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[4], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[4]);
-   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[5], y[5]);
-   z[10] = w0;
-   z[11] = w1;
-   }
-
-/*
-* Comba 8x8 Squaring
-*/
-void bigint_comba_sqr8(word z[16], const word x[8])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
-   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
-   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
-   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
-   word3_muladd(&w2, &w1, &w0, x[4], x[4]);
-   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
-   word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
-   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
-   word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
-   word3_muladd(&w2, &w1, &w0, x[5], x[5]);
-   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
-   word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
-   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
-   word3_muladd(&w2, &w1, &w0, x[6], x[6]);
-   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
-   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[7], x[7]);
-   z[14] = w0;
-   z[15] = w1;
-   }
-
-/*
-* Comba 8x8 Multiplication
-*/
-void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[0]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[0]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[0]);
-   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[0], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[1], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[1]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[0]);
-   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[1], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[2], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[2]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[1]);
-   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[2], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[3], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[3]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[2]);
-   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[3], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[4], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[4]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[3]);
-   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[4], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[5], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[5]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[4]);
-   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[5], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[6], y[6]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[5]);
-   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[6], y[7]);
-   word3_muladd(&w2, &w1, &w0, x[7], y[6]);
-   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[7], y[7]);
-   z[14] = w0;
-   z[15] = w1;
-   }
-
-/*
-* Comba 16x16 Squaring
-*/
-void bigint_comba_sqr16(word z[32], const word x[16])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
-   z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]);
-   z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]);
-   z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
-   z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]);
-   z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]);
-   z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
-   z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]);
-   z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]);
-   z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
-   z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]);
-   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]);
-   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
-   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]);
-   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]);
-   z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
-   z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]);
-   z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]);
-   z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]);
-   z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]);
-   z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]);
-   word3_muladd(&w2, &w1, &w0, x[10], x[10]);
-   z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]);
-   word3_muladd_2(&w2, &w1, &w0, x[10], x[11]);
-   z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[10], x[12]);
-   word3_muladd(&w2, &w1, &w0, x[11], x[11]);
-   z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[10], x[13]);
-   word3_muladd_2(&w2, &w1, &w0, x[11], x[12]);
-   z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[10], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[11], x[13]);
-   word3_muladd(&w2, &w1, &w0, x[12], x[12]);
-   z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[10], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[11], x[14]);
-   word3_muladd_2(&w2, &w1, &w0, x[12], x[13]);
-   z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[11], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[12], x[14]);
-   word3_muladd(&w2, &w1, &w0, x[13], x[13]);
-   z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[12], x[15]);
-   word3_muladd_2(&w2, &w1, &w0, x[13], x[14]);
-   z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[13], x[15]);
-   word3_muladd(&w2, &w1, &w0, x[14], x[14]);
-   z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd_2(&w2, &w1, &w0, x[14], x[15]);
-   z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[15], x[15]);
-   z[30] = w0;
-   z[31] = w1;
-   }
-
-/*
-* Comba 16x16 Multiplication
-*/
-void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
-   {
-   word w2 = 0, w1 = 0, w0 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]);
-   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]);
-   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]);
-   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]);
-   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]);
-   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]);
-   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]);
-   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]);
-   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]);
-   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]);
-   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 0]);
-   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 0]);
-   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 0]);
-   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 0]);
-   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 0]);
-   z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 0], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 1]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 0]);
-   z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 1], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 2]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 1]);
-   z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 2], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 3]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 2]);
-   z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 3], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 4]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 3]);
-   z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 4], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 5]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 4]);
-   z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 5], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 6]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 5]);
-   z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 6], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 7]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 6]);
-   z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 7], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 8]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 7]);
-   z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 8], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[ 9]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 8]);
-   z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[ 9], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[10], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[10]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[ 9]);
-   z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[10], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[11], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[11]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[10]);
-   z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[11], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[12], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[12]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[11]);
-   z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[12], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[13], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[13]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[12]);
-   z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[13], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[14], y[14]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[13]);
-   z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[14], y[15]);
-   word3_muladd(&w2, &w1, &w0, x[15], y[14]);
-   z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
-
-   word3_muladd(&w2, &w1, &w0, x[15], y[15]);
-   z[30] = w0;
-   z[31] = w1;
-   }
-
-}
-
-}
diff --git a/src/math/bigint/mp_core.h b/src/math/bigint/mp_core.h
deleted file mode 100644
index 63082795f..000000000
--- a/src/math/bigint/mp_core.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-* MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_CORE_H__
-#define BOTAN_MP_CORE_H__
-
-#include <botan/mp_types.h>
-
-namespace Botan {
-
-/*
-* The size of the word type, in bits
-*/
-const u32bit MP_WORD_BITS = BOTAN_MP_WORD_BITS;
-
-extern "C" {
-
-/*
-* Addition/Subtraction Operations
-*/
-void bigint_add2(word x[], u32bit x_size,
-                 const word y[], u32bit y_size);
-
-void bigint_add3(word z[],
-                 const word x[], u32bit x_size,
-                 const word y[], u32bit y_size);
-
-word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size);
-
-word bigint_add3_nc(word z[],
-                    const word x[], u32bit x_size,
-                    const word y[], u32bit y_size);
-
-word bigint_sub2(word x[], u32bit x_size,
-                 const word y[], u32bit y_size);
-
-/**
-* x = y - x; assumes y >= x
-*/
-void bigint_sub2_rev(word x[], const word y[], u32bit y_size);
-
-word bigint_sub3(word z[],
-                 const word x[], u32bit x_size,
-                 const word y[], u32bit y_size);
-
-/*
-* Shift Operations
-*/
-void bigint_shl1(word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift);
-
-void bigint_shr1(word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift);
-
-void bigint_shl2(word y[], const word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift);
-
-void bigint_shr2(word y[], const word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift);
-
-/*
-* Simple O(N^2) Multiplication and Squaring
-*/
-void bigint_simple_mul(word z[],
-                       const word x[], u32bit x_size,
-                       const word y[], u32bit y_size);
-
-void bigint_simple_sqr(word z[], const word x[], u32bit x_size);
-
-/*
-* Linear Multiply
-*/
-void bigint_linmul2(word x[], u32bit x_size, word y);
-void bigint_linmul3(word z[], const word x[], u32bit x_size, word y);
-
-/*
-* Montgomery Reduction
-* @param z integer to reduce (also output in first x_size+1 words)
-* @param z_size size of z (should be >= 2*x_size+1)
-* @param workspace array of at least 2*(x_size+1) words
-* @param x modulus
-* @param x_size size of x
-* @param u Montgomery value
-*/
-void bigint_monty_redc(word z[], u32bit z_size,
-                       word workspace[],
-                       const word x[], u32bit x_size,
-                       word u);
-
-/*
-* Division operation
-*/
-u32bit bigint_divcore(word q, word y2, word y1,
-                      word x3, word x2, word x1);
-
-/**
-* Compare x and y
-*/
-s32bit bigint_cmp(const word x[], u32bit x_size,
-                  const word y[], u32bit y_size);
-
-/**
-* Compute ((n1<<bits) + n0) / d
-*/
-word bigint_divop(word n1, word n0, word d);
-
-/**
-* Compute ((n1<<bits) + n0) % d
-*/
-word bigint_modop(word n1, word n0, word d);
-
-/*
-* Comba Multiplication / Squaring
-*/
-void bigint_comba_mul4(word z[8], const word x[4], const word y[4]);
-void bigint_comba_mul6(word z[12], const word x[6], const word y[6]);
-void bigint_comba_mul8(word z[16], const word x[8], const word y[8]);
-void bigint_comba_mul16(word z[32], const word x[16], const word y[16]);
-
-void bigint_comba_sqr4(word out[8], const word in[4]);
-void bigint_comba_sqr6(word out[12], const word in[6]);
-void bigint_comba_sqr8(word out[16], const word in[8]);
-void bigint_comba_sqr8(word out[32], const word in[16]);
-void bigint_comba_sqr16(word out[64], const word in[32]);
-
-}
-
-/*
-* High Level Multiplication/Squaring Interfaces
-*/
-void bigint_mul(word z[], u32bit z_size, word workspace[],
-                const word x[], u32bit x_size, u32bit x_sw,
-                const word y[], u32bit y_size, u32bit y_sw);
-
-void bigint_sqr(word z[], u32bit z_size, word workspace[],
-                const word x[], u32bit x_size, u32bit x_sw);
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_generic/info.txt b/src/math/bigint/mp_generic/info.txt
deleted file mode 100644
index ab4d7406a..000000000
--- a/src/math/bigint/mp_generic/info.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-load_on dep
-
-<header:internal>
-mp_asm.h
-mp_asmi.h
-</header:internal>
diff --git a/src/math/bigint/mp_generic/mp_asm.h b/src/math/bigint/mp_generic/mp_asm.h
deleted file mode 100644
index 7c18343ef..000000000
--- a/src/math/bigint/mp_generic/mp_asm.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2008 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_H__
-#define BOTAN_MP_ASM_H__
-
-#include <botan/mp_types.h>
-
-#if (BOTAN_MP_WORD_BITS == 8)
-  typedef Botan::u16bit dword;
-#elif (BOTAN_MP_WORD_BITS == 16)
-  typedef Botan::u32bit dword;
-#elif (BOTAN_MP_WORD_BITS == 32)
-  typedef Botan::u64bit dword;
-#elif (BOTAN_MP_WORD_BITS == 64)
-  #error BOTAN_MP_WORD_BITS can be 64 only with assembly support
-#else
-  #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64
-#endif
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd2(word a, word b, word* c)
-   {
-   dword z = (dword)a * b + *c;
-   *c = (word)(z >> BOTAN_MP_WORD_BITS);
-   return (word)z;
-   }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
-   {
-   dword z = (dword)a * b + c + *d;
-   *d = (word)(z >> BOTAN_MP_WORD_BITS);
-   return (word)z;
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_generic/mp_asmi.h b/src/math/bigint/mp_generic/mp_asmi.h
deleted file mode 100644
index 8225f372d..000000000
--- a/src/math/bigint/mp_generic/mp_asmi.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_asm.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
-   {
-   word z = x + y;
-   word c1 = (z < x);
-   z += *carry;
-   *carry = c1 | (z < *carry);
-   return z;
-   }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
-   {
-   x[0] = word_add(x[0], y[0], &carry);
-   x[1] = word_add(x[1], y[1], &carry);
-   x[2] = word_add(x[2], y[2], &carry);
-   x[3] = word_add(x[3], y[3], &carry);
-   x[4] = word_add(x[4], y[4], &carry);
-   x[5] = word_add(x[5], y[5], &carry);
-   x[6] = word_add(x[6], y[6], &carry);
-   x[7] = word_add(x[7], y[7], &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8],
-                       const word y[8], word carry)
-   {
-   z[0] = word_add(x[0], y[0], &carry);
-   z[1] = word_add(x[1], y[1], &carry);
-   z[2] = word_add(x[2], y[2], &carry);
-   z[3] = word_add(x[3], y[3], &carry);
-   z[4] = word_add(x[4], y[4], &carry);
-   z[5] = word_add(x[5], y[5], &carry);
-   z[6] = word_add(x[6], y[6], &carry);
-   z[7] = word_add(x[7], y[7], &carry);
-   return carry;
-   }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
-   {
-   word t0 = x - y;
-   word c1 = (t0 > x);
-   word z = t0 - *carry;
-   *carry = c1 | (z > t0);
-   return z;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
-   {
-   x[0] = word_sub(x[0], y[0], &carry);
-   x[1] = word_sub(x[1], y[1], &carry);
-   x[2] = word_sub(x[2], y[2], &carry);
-   x[3] = word_sub(x[3], y[3], &carry);
-   x[4] = word_sub(x[4], y[4], &carry);
-   x[5] = word_sub(x[5], y[5], &carry);
-   x[6] = word_sub(x[6], y[6], &carry);
-   x[7] = word_sub(x[7], y[7], &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
-   {
-   x[0] = word_sub(y[0], x[0], &carry);
-   x[1] = word_sub(y[1], x[1], &carry);
-   x[2] = word_sub(y[2], x[2], &carry);
-   x[3] = word_sub(y[3], x[3], &carry);
-   x[4] = word_sub(y[4], x[4], &carry);
-   x[5] = word_sub(y[5], x[5], &carry);
-   x[6] = word_sub(y[6], x[6], &carry);
-   x[7] = word_sub(y[7], x[7], &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8],
-                       const word y[8], word carry)
-   {
-   z[0] = word_sub(x[0], y[0], &carry);
-   z[1] = word_sub(x[1], y[1], &carry);
-   z[2] = word_sub(x[2], y[2], &carry);
-   z[3] = word_sub(x[3], y[3], &carry);
-   z[4] = word_sub(x[4], y[4], &carry);
-   z[5] = word_sub(x[5], y[5], &carry);
-   z[6] = word_sub(x[6], y[6], &carry);
-   z[7] = word_sub(x[7], y[7], &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
-   {
-   x[0] = word_madd2(x[0], y, &carry);
-   x[1] = word_madd2(x[1], y, &carry);
-   x[2] = word_madd2(x[2], y, &carry);
-   x[3] = word_madd2(x[3], y, &carry);
-   x[4] = word_madd2(x[4], y, &carry);
-   x[5] = word_madd2(x[5], y, &carry);
-   x[6] = word_madd2(x[6], y, &carry);
-   x[7] = word_madd2(x[7], y, &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
-   {
-   z[0] = word_madd2(x[0], y, &carry);
-   z[1] = word_madd2(x[1], y, &carry);
-   z[2] = word_madd2(x[2], y, &carry);
-   z[3] = word_madd2(x[3], y, &carry);
-   z[4] = word_madd2(x[4], y, &carry);
-   z[5] = word_madd2(x[5], y, &carry);
-   z[6] = word_madd2(x[6], y, &carry);
-   z[7] = word_madd2(x[7], y, &carry);
-   return carry;
-   }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
-   {
-   z[0] = word_madd3(x[0], y, z[0], &carry);
-   z[1] = word_madd3(x[1], y, z[1], &carry);
-   z[2] = word_madd3(x[2], y, z[2], &carry);
-   z[3] = word_madd3(x[3], y, z[3], &carry);
-   z[4] = word_madd3(x[4], y, z[4], &carry);
-   z[5] = word_madd3(x[5], y, z[5], &carry);
-   z[6] = word_madd3(x[6], y, z[6], &carry);
-   z[7] = word_madd3(x[7], y, z[7], &carry);
-   return carry;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
-   {
-   word carry = *w0;
-   *w0 = word_madd2(a, b, &carry);
-   *w1 += carry;
-   *w2 += (*w1 < carry) ? 1 : 0;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
-   {
-   word carry = 0;
-   a = word_madd2(a, b, &carry);
-   b = carry;
-
-   word top = (b >> (BOTAN_MP_WORD_BITS-1));
-   b <<= 1;
-   b |= (a >> (BOTAN_MP_WORD_BITS-1));
-   a <<= 1;
-
-   carry = 0;
-   *w0 = word_add(*w0, a, &carry);
-   *w1 = word_add(*w1, b, &carry);
-   *w2 = word_add(*w2, top, &carry);
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_ia32/info.txt b/src/math/bigint/mp_ia32/info.txt
deleted file mode 100644
index 1659f74cf..000000000
--- a/src/math/bigint/mp_ia32/info.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-load_on dep
-
-mp_bits 32
-
-<header:internal>
-mp_asm.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-ia32
-</arch>
-
-<cc>
-clang
-gcc
-icc
-</cc>
diff --git a/src/math/bigint/mp_ia32/mp_asm.h b/src/math/bigint/mp_ia32/mp_asm.h
deleted file mode 100644
index 4d3afc992..000000000
--- a/src/math/bigint/mp_ia32/mp_asm.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2008 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_H__
-#define BOTAN_MP_ASM_H__
-
-#include <botan/mp_types.h>
-
-#if (BOTAN_MP_WORD_BITS != 32)
-   #error The mp_ia32 module requires that BOTAN_MP_WORD_BITS == 32
-#endif
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Helper Macros for x86 Assembly
-*/
-#define ASM(x) x "\n\t"
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
-   {
-   asm(
-      ASM("mull %[b]")
-      ASM("addl %[c],%[a]")
-      ASM("adcl $0,%[carry]")
-
-      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
-      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
-
-   return a;
-   }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
-   {
-   asm(
-      ASM("mull %[b]")
-
-      ASM("addl %[c],%[a]")
-      ASM("adcl $0,%[carry]")
-
-      ASM("addl %[d],%[a]")
-      ASM("adcl $0,%[carry]")
-
-      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
-      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
-
-   return a;
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_ia32/mp_asmi.h b/src/math/bigint/mp_ia32/mp_asmi.h
deleted file mode 100644
index c7b679e80..000000000
--- a/src/math/bigint/mp_ia32/mp_asmi.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_asm.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Helper Macros for x86 Assembly
-*/
-#ifndef ASM
-  #define ASM(x) x "\n\t"
-#endif
-
-#define ADDSUB2_OP(OPERATION, INDEX)                     \
-        ASM("movl 4*" #INDEX "(%[y]), %[carry]")         \
-        ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")   \
-
-#define ADDSUB3_OP(OPERATION, INDEX)                     \
-        ASM("movl 4*" #INDEX "(%[x]), %[carry]")         \
-        ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]")   \
-        ASM("movl %[carry], 4*" #INDEX "(%[z])")         \
-
-#define LINMUL_OP(WRITE_TO, INDEX)                       \
-        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
-        ASM("mull %[y]")                                 \
-        ASM("addl %[carry],%%eax")                       \
-        ASM("adcl $0,%%edx")                             \
-        ASM("movl %%edx,%[carry]")                       \
-        ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
-
-#define MULADD_OP(IGNORED, INDEX)                        \
-        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
-        ASM("mull %[y]")                                 \
-        ASM("addl %[carry],%%eax")                       \
-        ASM("adcl $0,%%edx")                             \
-        ASM("addl 4*" #INDEX "(%[z]),%%eax")             \
-        ASM("adcl $0,%%edx")                             \
-        ASM("movl %%edx,%[carry]")                       \
-        ASM("movl %%eax, 4*" #INDEX " (%[z])")
-
-#define DO_8_TIMES(MACRO, ARG) \
-        MACRO(ARG, 0) \
-        MACRO(ARG, 1) \
-        MACRO(ARG, 2) \
-        MACRO(ARG, 3) \
-        MACRO(ARG, 4) \
-        MACRO(ARG, 5) \
-        MACRO(ARG, 6) \
-        MACRO(ARG, 7)
-
-#define ADD_OR_SUBTRACT(CORE_CODE)     \
-        ASM("rorl %[carry]")           \
-        CORE_CODE                      \
-        ASM("sbbl %[carry],%[carry]")  \
-        ASM("negl %[carry]")
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
-      : [x]"=r"(x), [carry]"=r"(*carry)
-      : "0"(x), [y]"rm"(y), "1"(*carry)
-      : "cc");
-   return x;
-   }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
-      : [x]"=r"(x), [carry]"=r"(*carry)
-      : "0"(x), [y]"rm"(y), "1"(*carry)
-      : "cc");
-   return x;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
-      : [carry]"=r"(carry)
-      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
-   {
-   asm(
-      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
-      : "cc", "memory");
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(LINMUL_OP, "x")
-      : [carry]"=r"(carry)
-      : [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%eax", "%edx");
-   return carry;
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(LINMUL_OP, "z")
-      : [carry]"=r"(carry)
-      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%eax", "%edx");
-   return carry;
-   }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
-   {
-   asm(
-      DO_8_TIMES(MULADD_OP, "")
-      : [carry]"=r"(carry)
-      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
-      : "cc", "%eax", "%edx");
-   return carry;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
-   {
-   asm(
-      ASM("mull %[y]")
-
-      ASM("addl %[x],%[w0]")
-      ASM("adcl %[y],%[w1]")
-      ASM("adcl $0,%[w2]")
-
-      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
-      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
-      : "cc");
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
-   {
-   asm(
-      ASM("mull %[y]")
-
-      ASM("addl %[x],%[w0]")
-      ASM("adcl %[y],%[w1]")
-      ASM("adcl $0,%[w2]")
-
-      ASM("addl %[x],%[w0]")
-      ASM("adcl %[y],%[w1]")
-      ASM("adcl $0,%[w2]")
-
-      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
-      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
-      : "cc");
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_ia32_msvc/info.txt b/src/math/bigint/mp_ia32_msvc/info.txt
deleted file mode 100644
index 55a42c310..000000000
--- a/src/math/bigint/mp_ia32_msvc/info.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-mp_bits 32
-
-load_on dep
-
-<header:internal>
-mp_generic:mp_asm.h
-mp_asmi.h
-</header:internal>
-
-<arch>
-ia32
-</arch>
-
-<cc>
-msvc
-</cc>
diff --git a/src/math/bigint/mp_ia32_msvc/mp_asmi.h b/src/math/bigint/mp_ia32_msvc/mp_asmi.h
deleted file mode 100644
index aee457d65..000000000
--- a/src/math/bigint/mp_ia32_msvc/mp_asmi.h
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
-* Lowest Level MPI Algorithms
-* (C) 1999-2010 Jack Lloyd
-*     2006 Luca Piccarreta
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_INTERNAL_H__
-#define BOTAN_MP_ASM_INTERNAL_H__
-
-#include <botan/internal/mp_asm.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Word Addition
-*/
-inline word word_add(word x, word y, word* carry)
-   {
-   word z = x + y;
-   word c1 = (z < x);
-   z += *carry;
-   *carry = c1 | (z < *carry);
-   return z;
-   }
-
-/*
-* Eight Word Block Addition, Two Argument
-*/
-inline word word8_add2(word x[8], const word y[8], word carry)
-   {
-   __asm {
-      mov edx,[x]
-      mov esi,[y]
-      xor eax,eax
-      sub eax,[carry] //force CF=1 iff *carry==1
-      mov eax,[esi]
-      adc [edx],eax
-      mov eax,[esi+4]
-      adc [edx+4],eax
-      mov eax,[esi+8]
-      adc [edx+8],eax
-      mov eax,[esi+12]
-      adc [edx+12],eax
-      mov eax,[esi+16]
-      adc [edx+16],eax
-      mov eax,[esi+20]
-      adc [edx+20],eax
-      mov eax,[esi+24]
-      adc [edx+24],eax
-      mov eax,[esi+28]
-      adc [edx+28],eax
-      sbb eax,eax
-      neg eax
-      }
-   }
-
-/*
-* Eight Word Block Addition, Three Argument
-*/
-inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
-   {
-    __asm {
-      mov edi,[x]
-      mov esi,[y]
-      mov ebx,[z]
-      xor eax,eax
-      sub eax,[carry] //force CF=1 iff *carry==1
-      mov eax,[edi]
-      adc eax,[esi]
-      mov [ebx],eax
-
-      mov eax,[edi+4]
-      adc eax,[esi+4]
-      mov [ebx+4],eax
-
-      mov eax,[edi+8]
-      adc eax,[esi+8]
-      mov [ebx+8],eax
-
-      mov eax,[edi+12]
-      adc eax,[esi+12]
-      mov [ebx+12],eax
-
-      mov eax,[edi+16]
-      adc eax,[esi+16]
-      mov [ebx+16],eax
-
-      mov eax,[edi+20]
-      adc eax,[esi+20]
-      mov [ebx+20],eax
-
-      mov eax,[edi+24]
-      adc eax,[esi+24]
-      mov [ebx+24],eax
-
-      mov eax,[edi+28]
-      adc eax,[esi+28]
-      mov [ebx+28],eax
-
-      sbb eax,eax
-      neg eax
-      }
-   }
-
-/*
-* Word Subtraction
-*/
-inline word word_sub(word x, word y, word* carry)
-   {
-   word t0 = x - y;
-   word c1 = (t0 > x);
-   word z = t0 - *carry;
-   *carry = c1 | (z > t0);
-   return z;
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2(word x[8], const word y[8], word carry)
-   {
-    __asm {
-      mov edi,[x]
-      mov esi,[y]
-      xor eax,eax
-      sub eax,[carry] //force CF=1 iff *carry==1
-      mov eax,[edi]
-      sbb eax,[esi]
-      mov [edi],eax
-      mov eax,[edi+4]
-      sbb eax,[esi+4]
-      mov [edi+4],eax
-      mov eax,[edi+8]
-      sbb eax,[esi+8]
-      mov [edi+8],eax
-      mov eax,[edi+12]
-      sbb eax,[esi+12]
-      mov [edi+12],eax
-      mov eax,[edi+16]
-      sbb eax,[esi+16]
-      mov [edi+16],eax
-      mov eax,[edi+20]
-      sbb eax,[esi+20]
-      mov [edi+20],eax
-      mov eax,[edi+24]
-      sbb eax,[esi+24]
-      mov [edi+24],eax
-      mov eax,[edi+28]
-      sbb eax,[esi+28]
-      mov [edi+28],eax
-      sbb eax,eax
-      neg eax
-      }
-   }
-
-/*
-* Eight Word Block Subtraction, Two Argument
-*/
-inline word word8_sub2_rev(word x[8], const word y[8], word carry)
-   {
-   x[0] = word_sub(y[0], x[0], &carry);
-   x[1] = word_sub(y[1], x[1], &carry);
-   x[2] = word_sub(y[2], x[2], &carry);
-   x[3] = word_sub(y[3], x[3], &carry);
-   x[4] = word_sub(y[4], x[4], &carry);
-   x[5] = word_sub(y[5], x[5], &carry);
-   x[6] = word_sub(y[6], x[6], &carry);
-   x[7] = word_sub(y[7], x[7], &carry);
-   return carry;
-   }
-
-
-/*
-* Eight Word Block Subtraction, Three Argument
-*/
-inline word word8_sub3(word z[8], const word x[8],
-                       const word y[8], word carry)
-   {
-    __asm {
-      mov edi,[x]
-      mov esi,[y]
-      xor eax,eax
-      sub eax,[carry] //force CF=1 iff *carry==1
-      mov ebx,[z]
-      mov eax,[edi]
-      sbb eax,[esi]
-      mov [ebx],eax
-      mov eax,[edi+4]
-      sbb eax,[esi+4]
-      mov [ebx+4],eax
-      mov eax,[edi+8]
-      sbb eax,[esi+8]
-      mov [ebx+8],eax
-      mov eax,[edi+12]
-      sbb eax,[esi+12]
-      mov [ebx+12],eax
-      mov eax,[edi+16]
-      sbb eax,[esi+16]
-      mov [ebx+16],eax
-      mov eax,[edi+20]
-      sbb eax,[esi+20]
-      mov [ebx+20],eax
-      mov eax,[edi+24]
-      sbb eax,[esi+24]
-      mov [ebx+24],eax
-      mov eax,[edi+28]
-      sbb eax,[esi+28]
-      mov [ebx+28],eax
-      sbb eax,eax
-      neg eax
-      }
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_linmul2(word x[8], word y, word carry)
-   {
-   __asm {
-      mov esi,[x]
-      mov eax,[esi]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,[carry]      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi],eax        //load a
-
-      mov eax,[esi+4]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+4],eax        //load a
-
-      mov eax,[esi+8]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+8],eax        //load a
-
-      mov eax,[esi+12]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+12],eax        //load a
-
-      mov eax,[esi+16]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+16],eax        //load a
-
-      mov eax,[esi+20]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+20],eax        //load a
-
-      mov eax,[esi+24]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [esi+24],eax        //load a
-
-      mov eax,[esi+28]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov [esi+28],eax        //load a
-
-      mov eax,edx      //store carry
-      }
-   }
-
-/*
-* Eight Word Block Linear Multiplication
-*/
-inline word word8_muladd(word z[8], const word x[8],
-                         word y, word carry)
-   {
-   __asm {
-      mov esi,[x]
-      mov ebx,[y]
-      mov edi,[z]
-      mov eax,[esi]     //load a
-      mul ebx           //edx(hi):eax(lo)=a*b
-      add eax,[carry]   //sum lo carry
-      adc edx,0         //sum hi carry
-      add eax,[edi]     //sum lo z
-      adc edx,0         //sum hi z
-      mov ecx,edx       //carry for next block = hi z
-      mov [edi],eax     //save lo z
-
-      mov eax,[esi+4]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+4]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+4],eax
-
-      mov eax,[esi+8]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+8]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+8],eax
-
-      mov eax,[esi+12]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+12]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+12],eax
-
-      mov eax,[esi+16]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+16]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+16],eax
-
-      mov eax,[esi+20]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+20]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+20],eax
-
-      mov eax,[esi+24]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+24]
-      adc edx,0
-      mov ecx,edx
-      mov [edi+24],eax
-
-      mov eax,[esi+28]
-      mul ebx
-      add eax,ecx
-      adc edx,0
-      add eax,[edi+28]
-      adc edx,0
-      mov [edi+28],eax
-      mov eax,edx
-      }
-   }
-
-inline word word8_linmul3(word z[4], const word x[4], word y, word carry)
-   {
-   __asm {
-#if 0
-      //it's slower!!!
-      mov edx,[z]
-      mov eax,[x]
-      movd mm7,[y]
-
-      movd mm0,[eax]
-      movd mm1,[eax+4]
-      movd mm2,[eax+8]
-      pmuludq mm0,mm7
-      pmuludq mm1,mm7
-      pmuludq mm2,mm7
-
-      movd mm6,[carry]
-      paddq mm0,mm6
-      movd [edx],mm0
-
-      psrlq mm0,32
-      paddq mm1,mm0
-      movd [edx+4],mm1
-
-      movd mm3,[eax+12]
-      psrlq mm1,32
-      paddq mm2,mm1
-      movd [edx+8],mm2
-
-      pmuludq mm3,mm7
-      movd mm4,[eax+16]
-      psrlq mm2,32
-      paddq mm3,mm2
-      movd [edx+12],mm3
-
-      pmuludq mm4,mm7
-      movd mm5,[eax+20]
-      psrlq mm3,32
-      paddq mm4,mm3
-      movd [edx+16],mm4
-
-      pmuludq mm5,mm7
-      movd mm0,[eax+24]
-      psrlq mm4,32
-      paddq mm5,mm4
-      movd [edx+20],mm5
-
-      pmuludq mm0,mm7
-      movd mm1,[eax+28]
-      psrlq mm5,32
-      paddq mm0,mm5
-      movd [edx+24],mm0
-
-      pmuludq mm1,mm7
-      psrlq mm0,32
-      paddq mm1,mm0
-      movd [edx+28],mm1
-      psrlq mm1,32
-
-      movd eax,mm1
-      emms
-#else
-      mov edi,[z]
-      mov esi,[x]
-      mov eax,[esi]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,[carry]    //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi],eax        //load a
-
-      mov eax,[esi+4]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+4],eax        //load a
-
-      mov eax,[esi+8]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+8],eax        //load a
-
-      mov eax,[esi+12]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+12],eax        //load a
-
-      mov eax,[esi+16]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+16],eax        //load a
-
-      mov eax,[esi+20]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+20],eax        //load a
-
-      mov eax,[esi+24]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov ecx,edx      //store carry
-      mov [edi+24],eax        //load a
-
-      mov eax,[esi+28]        //load a
-      mul [y]           //edx(hi):eax(lo)=a*b
-      add eax,ecx      //sum lo carry
-      adc edx,0          //sum hi carry
-      mov [edi+28],eax        //load a
-      mov eax,edx      //store carry
-#endif
-      }
-   }
-
-/*
-* Eight Word Block Multiply/Add
-*/
-inline word word8_madd3(word z[8], const word x[8], word y, word carry)
-   {
-   z[0] = word_madd3(x[0], y, z[0], &carry);
-   z[1] = word_madd3(x[1], y, z[1], &carry);
-   z[2] = word_madd3(x[2], y, z[2], &carry);
-   z[3] = word_madd3(x[3], y, z[3], &carry);
-   z[4] = word_madd3(x[4], y, z[4], &carry);
-   z[5] = word_madd3(x[5], y, z[5], &carry);
-   z[6] = word_madd3(x[6], y, z[6], &carry);
-   z[7] = word_madd3(x[7], y, z[7], &carry);
-   return carry;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
-   {
-   word carry = *w0;
-   *w0 = word_madd2(a, b, &carry);
-   *w1 += carry;
-   *w2 += (*w1 < carry) ? 1 : 0;
-   }
-
-/*
-* Multiply-Add Accumulator
-*/
-inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
-   {
-   word carry = 0;
-   a = word_madd2(a, b, &carry);
-   b = carry;
-
-   word top = (b >> (BOTAN_MP_WORD_BITS-1));
-   b <<= 1;
-   b |= (a >> (BOTAN_MP_WORD_BITS-1));
-   a <<= 1;
-
-   carry = 0;
-   *w0 = word_add(*w0, a, &carry);
-   *w1 = word_add(*w1, b, &carry);
-   *w2 = word_add(*w2, top, &carry);
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_karat.cpp b/src/math/bigint/mp_karat.cpp
deleted file mode 100644
index 8ae346f1e..000000000
--- a/src/math/bigint/mp_karat.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
-* Karatsuba Multiplication/Squaring
-* (C) 1999-2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
-#include <botan/internal/mp_asmi.h>
-
-namespace Botan {
-
-namespace {
-
-/*
-* Karatsuba Multiplication Operation
-*/
-void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
-                   word workspace[])
-   {
-   if(N == 6)
-      bigint_comba_mul6(z, x, y);
-   else if(N == 8)
-      bigint_comba_mul8(z, x, y);
-   else if(N == 16)
-      bigint_comba_mul16(z, x, y);
-   else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
-      bigint_simple_mul(z, x, N, y, N);
-   else
-      {
-      const u32bit N2 = N / 2;
-
-      const word* x0 = x;
-      const word* x1 = x + N2;
-      const word* y0 = y;
-      const word* y1 = y + N2;
-      word* z0 = z;
-      word* z1 = z + N;
-
-      const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
-      const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
-
-      clear_mem(workspace, 2*N);
-
-      if(cmp0 && cmp1)
-         {
-         if(cmp0 > 0)
-            bigint_sub3(z0, x0, N2, x1, N2);
-         else
-            bigint_sub3(z0, x1, N2, x0, N2);
-
-         if(cmp1 > 0)
-            bigint_sub3(z1, y1, N2, y0, N2);
-         else
-            bigint_sub3(z1, y0, N2, y1, N2);
-
-         karatsuba_mul(workspace, z0, z1, N2, workspace+N);
-         }
-
-      karatsuba_mul(z0, x0, y0, N2, workspace+N);
-      karatsuba_mul(z1, x1, y1, N2, workspace+N);
-
-      const u32bit blocks_of_8 = N - (N % 8);
-
-      word carry = 0;
-
-      for(u32bit j = 0; j != blocks_of_8; j += 8)
-         carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
-
-      for(u32bit j = blocks_of_8; j != N; ++j)
-         workspace[N + j] = word_add(z0[j], z1[j], &carry);
-
-      word carry2 = 0;
-
-      for(u32bit j = 0; j != blocks_of_8; j += 8)
-         carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
-
-      for(u32bit j = blocks_of_8; j != N; ++j)
-         z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
-
-      z[N + N2] = word_add(z[N + N2], carry2, &carry);
-
-      if(carry)
-         for(u32bit j = 1; j != N2; ++j)
-            if(++z[N + N2 + j])
-               break;
-
-      if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
-         bigint_add2(z + N2, 2*N-N2, workspace, N);
-      else
-         bigint_sub2(z + N2, 2*N-N2, workspace, N);
-      }
-   }
-
-/*
-* Karatsuba Squaring Operation
-*/
-void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[])
-   {
-   if(N == 6)
-      bigint_comba_sqr6(z, x);
-   else if(N == 8)
-      bigint_comba_sqr8(z, x);
-   else if(N == 16)
-      bigint_comba_sqr16(z, x);
-   else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
-      bigint_simple_sqr(z, x, N);
-   else
-      {
-      const u32bit N2 = N / 2;
-
-      const word* x0 = x;
-      const word* x1 = x + N2;
-      word* z0 = z;
-      word* z1 = z + N;
-
-      const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
-
-      clear_mem(workspace, 2*N);
-
-      if(cmp)
-         {
-         if(cmp > 0)
-            bigint_sub3(z0, x0, N2, x1, N2);
-         else
-            bigint_sub3(z0, x1, N2, x0, N2);
-
-         karatsuba_sqr(workspace, z0, N2, workspace+N);
-         }
-
-      karatsuba_sqr(z0, x0, N2, workspace+N);
-      karatsuba_sqr(z1, x1, N2, workspace+N);
-
-      const u32bit blocks_of_8 = N - (N % 8);
-
-      word carry = 0;
-
-      for(u32bit j = 0; j != blocks_of_8; j += 8)
-         carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
-
-      for(u32bit j = blocks_of_8; j != N; ++j)
-         workspace[N + j] = word_add(z0[j], z1[j], &carry);
-
-      word carry2 = 0;
-
-      for(u32bit j = 0; j != blocks_of_8; j += 8)
-         carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
-
-      for(u32bit j = blocks_of_8; j != N; ++j)
-         z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
-
-      z[N + N2] = word_add(z[N + N2], carry2, &carry);
-
-      if(carry)
-         for(u32bit j = 1; j != N2; ++j)
-            if(++z[N + N2 + j])
-               break;
-
-      if(cmp == 0)
-         bigint_add2(z + N2, 2*N-N2, workspace, N);
-      else
-         bigint_sub2(z + N2, 2*N-N2, workspace, N);
-      }
-   }
-
-/*
-* Pick a good size for the Karatsuba multiply
-*/
-u32bit karatsuba_size(u32bit z_size,
-                      u32bit x_size, u32bit x_sw,
-                      u32bit y_size, u32bit y_sw)
-   {
-   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
-      return 0;
-
-   if(((x_size == x_sw) && (x_size % 2)) ||
-      ((y_size == y_sw) && (y_size % 2)))
-      return 0;
-
-   const u32bit start = (x_sw > y_sw) ? x_sw : y_sw;
-   const u32bit end = (x_size < y_size) ? x_size : y_size;
-
-   if(start == end)
-      {
-      if(start % 2)
-         return 0;
-      return start;
-      }
-
-   for(u32bit j = start; j <= end; ++j)
-      {
-      if(j % 2)
-         continue;
-
-      if(2*j > z_size)
-         return 0;
-
-      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
-         {
-         if(j % 4 == 2 &&
-            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
-            return j+2;
-         return j;
-         }
-      }
-
-   return 0;
-   }
-
-/*
-* Pick a good size for the Karatsuba squaring
-*/
-u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw)
-   {
-   if(x_sw == x_size)
-      {
-      if(x_sw % 2)
-         return 0;
-      return x_sw;
-      }
-
-   for(u32bit j = x_sw; j <= x_size; ++j)
-      {
-      if(j % 2)
-         continue;
-
-      if(2*j > z_size)
-         return 0;
-
-      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
-         return j+2;
-      return j;
-      }
-
-   return 0;
-   }
-
-}
-
-/*
-* Multiplication Algorithm Dispatcher
-*/
-void bigint_mul(word z[], u32bit z_size, word workspace[],
-                const word x[], u32bit x_size, u32bit x_sw,
-                const word y[], u32bit y_size, u32bit y_sw)
-   {
-   if(x_sw == 1)
-      {
-      bigint_linmul3(z, y, y_sw, x[0]);
-      }
-   else if(y_sw == 1)
-      {
-      bigint_linmul3(z, x, x_sw, y[0]);
-      }
-   else if(x_sw <= 4 && x_size >= 4 &&
-           y_sw <= 4 && y_size >= 4 && z_size >= 8)
-      {
-      bigint_comba_mul4(z, x, y);
-      }
-   else if(x_sw <= 6 && x_size >= 6 &&
-           y_sw <= 6 && y_size >= 6 && z_size >= 12)
-      {
-      bigint_comba_mul6(z, x, y);
-      }
-   else if(x_sw <= 8 && x_size >= 8 &&
-           y_sw <= 8 && y_size >= 8 && z_size >= 16)
-      {
-      bigint_comba_mul8(z, x, y);
-      }
-   else if(x_sw <= 16 && x_size >= 16 &&
-           y_sw <= 16 && y_size >= 16 && z_size >= 32)
-      {
-      bigint_comba_mul16(z, x, y);
-      }
-   else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD ||
-           y_sw < BOTAN_KARAT_MUL_THRESHOLD ||
-           !workspace)
-      {
-      bigint_simple_mul(z, x, x_sw, y, y_sw);
-      }
-   else
-      {
-      const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
-
-      if(N)
-         {
-         clear_mem(workspace, 2*N);
-         karatsuba_mul(z, x, y, N, workspace);
-         }
-      else
-         bigint_simple_mul(z, x, x_sw, y, y_sw);
-      }
-   }
-
-/*
-* Squaring Algorithm Dispatcher
-*/
-void bigint_sqr(word z[], u32bit z_size, word workspace[],
-                const word x[], u32bit x_size, u32bit x_sw)
-   {
-   if(x_sw == 1)
-      {
-      bigint_linmul3(z, x, x_sw, x[0]);
-      }
-   else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
-      {
-      bigint_comba_sqr4(z, x);
-      }
-   else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
-      {
-      bigint_comba_sqr6(z, x);
-      }
-   else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
-      {
-      bigint_comba_sqr8(z, x);
-      }
-   else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
-      {
-      bigint_comba_sqr16(z, x);
-      }
-   else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace)
-      {
-      bigint_simple_sqr(z, x, x_sw);
-      }
-   else
-      {
-      const u32bit N = karatsuba_size(z_size, x_size, x_sw);
-
-      if(N)
-         {
-         clear_mem(workspace, 2*N);
-         karatsuba_sqr(z, x, N, workspace);
-         }
-      else
-         bigint_simple_sqr(z, x, x_sw);
-      }
-   }
-
-}
diff --git a/src/math/bigint/mp_misc.cpp b/src/math/bigint/mp_misc.cpp
deleted file mode 100644
index 77b8e6f51..000000000
--- a/src/math/bigint/mp_misc.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
-* MP Misc Functions
-* (C) 1999-2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/internal/mp_asm.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Core Division Operation
-*/
-u32bit bigint_divcore(word q, word y2, word y1,
-                      word x3, word x2, word x1)
-   {
-   // Compute (y2,y1) * q
-
-   word y3 = 0;
-   y1 = word_madd2(q, y1, &y3);
-   y2 = word_madd2(q, y2, &y3);
-
-   // Return (y3,y2,y1) >? (x3,x2,x1)
-
-   if(y3 > x3) return 1;
-   if(y3 < x3) return 0;
-   if(y2 > x2) return 1;
-   if(y2 < x2) return 0;
-   if(y1 > x1) return 1;
-   if(y1 < x1) return 0;
-   return 0;
-   }
-
-/*
-* Compare two MP integers
-*/
-s32bit bigint_cmp(const word x[], u32bit x_size,
-                  const word y[], u32bit y_size)
-   {
-   if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); }
-
-   while(x_size > y_size)
-      {
-      if(x[x_size-1])
-         return 1;
-      x_size--;
-      }
-
-   for(u32bit j = x_size; j > 0; --j)
-      {
-      if(x[j-1] > y[j-1])
-         return 1;
-      if(x[j-1] < y[j-1])
-         return -1;
-      }
-
-   return 0;
-   }
-
-/*
-* Do a 2-word/1-word Division
-*/
-word bigint_divop(word n1, word n0, word d)
-   {
-   word high = n1 % d, quotient = 0;
-
-   for(u32bit j = 0; j != MP_WORD_BITS; ++j)
-      {
-      word high_top_bit = (high & MP_WORD_TOP_BIT);
-
-      high <<= 1;
-      high |= (n0 >> (MP_WORD_BITS-1-j)) & 1;
-      quotient <<= 1;
-
-      if(high_top_bit || high >= d)
-         {
-         high -= d;
-         quotient |= 1;
-         }
-      }
-
-   return quotient;
-   }
-
-/*
-* Do a 2-word/1-word Modulo
-*/
-word bigint_modop(word n1, word n0, word d)
-   {
-   word z = bigint_divop(n1, n0, d);
-   word dummy = 0;
-   z = word_madd2(z, d, &dummy);
-   return (n0-z);
-   }
-
-}
-
-}
diff --git a/src/math/bigint/mp_msvc64/info.txt b/src/math/bigint/mp_msvc64/info.txt
deleted file mode 100644
index 56ae05927..000000000
--- a/src/math/bigint/mp_msvc64/info.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-load_on dep
-
-mp_bits 64
-
-<header:internal>
-mp_asm.h
-mp_generic:mp_asmi.h
-</header:internal>
-
-<arch>
-amd64
-ia64
-</arch>
-
-<cc>
-msvc
-</cc>
diff --git a/src/math/bigint/mp_msvc64/mp_asm.h b/src/math/bigint/mp_msvc64/mp_asm.h
deleted file mode 100644
index 8e4535c35..000000000
--- a/src/math/bigint/mp_msvc64/mp_asm.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-* Multiply-Add for 64-bit MSVC
-* (C) 2010 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MP_ASM_H__
-#define BOTAN_MP_ASM_H__
-
-#include <botan/mp_types.h>
-#include <intrin.h>
-
-#if (BOTAN_MP_WORD_BITS != 64)
-   #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64
-#endif
-
-#pragma intrinsic(_umul128)
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Word Multiply
-*/
-inline word word_madd2(word a, word b, word* c)
-   {
-   word hi, lo;
-   lo = _umul128(a, b, &hi);
-
-   lo += *c;
-   hi += (lo < *c); // carry?
-
-   *c = hi;
-   return lo;
-   }
-
-/*
-* Word Multiply/Add
-*/
-inline word word_madd3(word a, word b, word c, word* d)
-   {
-   word hi, lo;
-   lo = _umul128(a, b, &hi);
-
-   lo += c;
-   hi += (lo < c); // carry?
-
-   lo += *d;
-   hi += (lo < *d); // carry?
-
-   *d = hi;
-   return lo;
-   }
-
-}
-
-}
-
-#endif
diff --git a/src/math/bigint/mp_shift.cpp b/src/math/bigint/mp_shift.cpp
deleted file mode 100644
index f1d609bfb..000000000
--- a/src/math/bigint/mp_shift.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
-* MP Shift Algorithms
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Single Operand Left Shift
-*/
-void bigint_shl1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift)
-   {
-   if(word_shift)
-      {
-      for(u32bit j = 1; j != x_size + 1; ++j)
-         x[(x_size - j) + word_shift] = x[x_size - j];
-      clear_mem(x, word_shift);
-      }
-
-   if(bit_shift)
-      {
-      word carry = 0;
-      for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j)
-         {
-         word temp = x[j];
-         x[j] = (temp << bit_shift) | carry;
-         carry = (temp >> (MP_WORD_BITS - bit_shift));
-         }
-      }
-   }
-
-/*
-* Single Operand Right Shift
-*/
-void bigint_shr1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift)
-   {
-   if(x_size < word_shift)
-      {
-      clear_mem(x, x_size);
-      return;
-      }
-
-   if(word_shift)
-      {
-      copy_mem(x, x + word_shift, x_size - word_shift);
-      clear_mem(x + x_size - word_shift, word_shift);
-      }
-
-   if(bit_shift)
-      {
-      word carry = 0;
-
-      u32bit top = x_size - word_shift;
-
-      while(top >= 4)
-         {
-         word w = x[top-1];
-         x[top-1] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-
-         w = x[top-2];
-         x[top-2] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-
-         w = x[top-3];
-         x[top-3] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-
-         w = x[top-4];
-         x[top-4] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-
-         top -= 4;
-         }
-
-      while(top)
-         {
-         word w = x[top-1];
-         x[top-1] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-
-         top--;
-         }
-      }
-   }
-
-/*
-* Two Operand Left Shift
-*/
-void bigint_shl2(word y[], const word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift)
-   {
-   for(u32bit j = 0; j != x_size; ++j)
-      y[j + word_shift] = x[j];
-   if(bit_shift)
-      {
-      word carry = 0;
-      for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j)
-         {
-         word w = y[j];
-         y[j] = (w << bit_shift) | carry;
-         carry = (w >> (MP_WORD_BITS - bit_shift));
-         }
-      }
-   }
-
-/*
-* Two Operand Right Shift
-*/
-void bigint_shr2(word y[], const word x[], u32bit x_size,
-                 u32bit word_shift, u32bit bit_shift)
-   {
-   if(x_size < word_shift) return;
-
-   for(u32bit j = 0; j != x_size - word_shift; ++j)
-      y[j] = x[j + word_shift];
-   if(bit_shift)
-      {
-      word carry = 0;
-      for(u32bit j = x_size - word_shift; j > 0; --j)
-         {
-         word w = y[j-1];
-         y[j-1] = (w >> bit_shift) | carry;
-         carry = (w << (MP_WORD_BITS - bit_shift));
-         }
-      }
-   }
-
-}
-
-}
diff --git a/src/math/bigint/mp_types.h b/src/math/bigint/mp_types.h
deleted file mode 100644
index 1648713ed..000000000
--- a/src/math/bigint/mp_types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-* Low Level MPI Types
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#ifndef BOTAN_MPI_TYPES_H__
-#define BOTAN_MPI_TYPES_H__
-
-#include <botan/types.h>
-
-namespace Botan {
-
-#if (BOTAN_MP_WORD_BITS == 8)
-  typedef byte word;
-#elif (BOTAN_MP_WORD_BITS == 16)
-  typedef u16bit word;
-#elif (BOTAN_MP_WORD_BITS == 32)
-  typedef u32bit word;
-#elif (BOTAN_MP_WORD_BITS == 64)
-  typedef u64bit word;
-#else
-  #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64
-#endif
-
-const word MP_WORD_MASK = ~static_cast<word>(0);
-const word MP_WORD_TOP_BIT = static_cast<word>(1) << (8*sizeof(word) - 1);
-const word MP_WORD_MAX = MP_WORD_MASK;
-
-}
-
-#endif
diff --git a/src/math/bigint/mulop_generic/info.txt b/src/math/bigint/mulop_generic/info.txt
deleted file mode 100644
index 548d0f44b..000000000
--- a/src/math/bigint/mulop_generic/info.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-load_on dep
-
-<source>
-mp_mulop.cpp
-</source>
diff --git a/src/math/bigint/mulop_generic/mp_mulop.cpp b/src/math/bigint/mulop_generic/mp_mulop.cpp
deleted file mode 100644
index 33ee2af32..000000000
--- a/src/math/bigint/mulop_generic/mp_mulop.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
-* Simple O(N^2) Multiplication and Squaring
-* (C) 1999-2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_asm.h>
-#include <botan/internal/mp_asmi.h>
-#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Simple O(N^2) Multiplication
-*/
-void bigint_simple_mul(word z[], const word x[], u32bit x_size,
-                                 const word y[], u32bit y_size)
-   {
-   const u32bit x_size_8 = x_size - (x_size % 8);
-
-   clear_mem(z, x_size + y_size);
-
-   for(u32bit i = 0; i != y_size; ++i)
-      {
-      const word y_i = y[i];
-
-      word carry = 0;
-
-      for(u32bit j = 0; j != x_size_8; j += 8)
-         carry = word8_madd3(z + i + j, x + j, y_i, carry);
-
-      for(u32bit j = x_size_8; j != x_size; ++j)
-         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
-
-      z[x_size+i] = carry;
-      }
-   }
-
-/*
-* Simple O(N^2) Squaring
-
-This is exactly the same algorithm as bigint_simple_mul,
-however because C/C++ compilers suck at alias analysis it
-is good to have the version where the compiler knows
-that x == y
-
-There is an O(n^1.5) squaring algorithm specified in Handbook of
-Applied Cryptography, chapter 14
-*/
-void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
-   {
-   const u32bit x_size_8 = x_size - (x_size % 8);
-
-   clear_mem(z, 2*x_size);
-
-   for(u32bit i = 0; i != x_size; ++i)
-      {
-      const word x_i = x[i];
-      word carry = 0;
-
-      for(u32bit j = 0; j != x_size_8; j += 8)
-         carry = word8_madd3(z + i + j, x + j, x_i, carry);
-
-      for(u32bit j = x_size_8; j != x_size; ++j)
-         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
-
-      z[x_size+i] = carry;
-      }
-   }
-
-}
-
-}
diff --git a/src/math/mp/info.txt b/src/math/mp/info.txt
new file mode 100644
index 000000000..a3c994d8b
--- /dev/null
+++ b/src/math/mp/info.txt
@@ -0,0 +1,23 @@
+define BIGINT_MP
+
+<source>
+mp_asm.cpp
+mp_comba.cpp
+mp_karat.cpp
+mp_misc.cpp
+mp_shift.cpp
+</source>
+
+<header:public>
+mp_types.h
+</header:public>
+
+<header:internal>
+mp_core.h
+</header:internal>
+
+<requires>
+mp_amd64|mp_msvc64|mp_asm64|mp_ia32|mp_ia32_msvc|mp_generic
+monty_generic
+mulop_generic
+</requires>
diff --git a/src/math/mp/monty_generic/info.txt b/src/math/mp/monty_generic/info.txt
new file mode 100644
index 000000000..cd05ccdc0
--- /dev/null
+++ b/src/math/mp/monty_generic/info.txt
@@ -0,0 +1,5 @@
+load_on dep
+
+<source>
+mp_monty.cpp
+</source>
diff --git a/src/math/mp/monty_generic/mp_monty.cpp b/src/math/mp/monty_generic/mp_monty.cpp
new file mode 100644
index 000000000..bce35259a
--- /dev/null
+++ b/src/math/mp/monty_generic/mp_monty.cpp
@@ -0,0 +1,72 @@
+/*
+* Montgomery Reduction
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/internal/mp_asm.h>
+#include <botan/internal/mp_asmi.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Montgomery Reduction Algorithm
+*/
+void bigint_monty_redc(word z[], u32bit z_size,
+                       word ws[],
+                       const word x[], u32bit x_size,
+                       word u)
+   {
+   const u32bit blocks_of_8 = x_size - (x_size % 8);
+
+   for(u32bit i = 0; i != x_size; ++i)
+      {
+      word* z_i = z + i;
+
+      const word y = z_i[0] * u;
+
+      /*
+      bigint_linmul3(ws, x, x_size, y);
+      bigint_add2(z_i, z_size - i, ws, x_size+1);
+      */
+      word carry = 0;
+
+      for(u32bit j = 0; j != blocks_of_8; j += 8)
+         carry = word8_madd3(z_i + j, x + j, y, carry);
+
+      for(u32bit j = blocks_of_8; j != x_size; ++j)
+         z_i[j] = word_madd3(x[j], y, z_i[j], &carry);
+
+      word z_sum = z_i[x_size] + carry;
+      carry = (z_sum < z_i[x_size]);
+      z_i[x_size] = z_sum;
+
+      // Note: not constant time
+      for(u32bit j = x_size + 1; carry && j != z_size - i; ++j)
+         {
+         ++z_i[j];
+         carry = !z_i[j];
+         }
+      }
+
+   word borrow = 0;
+   for(u32bit i = 0; i != x_size; ++i)
+      ws[i] = word_sub(z[x_size + i], x[i], &borrow);
+
+   ws[x_size] = word_sub(z[x_size+x_size], 0, &borrow);
+
+   copy_mem(ws + x_size + 1, z + x_size, x_size + 1);
+
+   copy_mem(z, ws + borrow*(x_size+1), x_size + 1);
+   clear_mem(z + x_size + 1, z_size - x_size - 1);
+   }
+
+}
+
+}
diff --git a/src/math/mp/mp_amd64/info.txt b/src/math/mp/mp_amd64/info.txt
new file mode 100644
index 000000000..11cc380e2
--- /dev/null
+++ b/src/math/mp/mp_amd64/info.txt
@@ -0,0 +1,18 @@
+load_on dep
+
+mp_bits 64
+
+<header:internal>
+mp_asm.h
+mp_asmi.h
+</header:internal>
+
+<arch>
+amd64
+</arch>
+
+<cc>
+clang
+gcc
+icc
+</cc>
diff --git a/src/math/mp/mp_amd64/mp_asm.h b/src/math/mp/mp_amd64/mp_asm.h
new file mode 100644
index 000000000..fa66d04f3
--- /dev/null
+++ b/src/math/mp/mp_amd64/mp_asm.h
@@ -0,0 +1,69 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2008 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_H__
+#define BOTAN_MP_ASM_H__
+
+#include <botan/mp_types.h>
+
+#if (BOTAN_MP_WORD_BITS != 64)
+   #error The mp_amd64 module requires that BOTAN_MP_WORD_BITS == 64
+#endif
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Helper Macros for amd64 Assembly
+*/
+#define ASM(x) x "\n\t"
+
+/*
+* Word Multiply
+*/
+inline word word_madd2(word a, word b, word* c)
+   {
+   asm(
+      ASM("mulq %[b]")
+      ASM("addq %[c],%[a]")
+      ASM("adcq $0,%[carry]")
+
+      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
+      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
+
+   return a;
+   }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+   {
+   asm(
+      ASM("mulq %[b]")
+
+      ASM("addq %[c],%[a]")
+      ASM("adcq $0,%[carry]")
+
+      ASM("addq %[d],%[a]")
+      ASM("adcq $0,%[carry]")
+
+      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
+      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
+
+   return a;
+   }
+
+#undef ASM
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_amd64/mp_asmi.h b/src/math/mp/mp_amd64/mp_asmi.h
new file mode 100644
index 000000000..adf7774ef
--- /dev/null
+++ b/src/math/mp/mp_amd64/mp_asmi.h
@@ -0,0 +1,248 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/internal/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Helper Macros for amd64 Assembly
+*/
+#ifndef ASM
+  #define ASM(x) x "\n\t"
+#endif
+
+#define ADDSUB2_OP(OPERATION, INDEX)                     \
+        ASM("movq 8*" #INDEX "(%[y]), %[carry]")         \
+        ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")   \
+
+#define ADDSUB3_OP(OPERATION, INDEX)                     \
+        ASM("movq 8*" #INDEX "(%[x]), %[carry]")         \
+        ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]")   \
+        ASM("movq %[carry], 8*" #INDEX "(%[z])")         \
+
+#define LINMUL_OP(WRITE_TO, INDEX)                       \
+        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
+        ASM("mulq %[y]")                                 \
+        ASM("addq %[carry],%%rax")                       \
+        ASM("adcq $0,%%rdx")                             \
+        ASM("movq %%rdx,%[carry]")                       \
+        ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
+
+#define MULADD_OP(IGNORED, INDEX)                        \
+        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
+        ASM("mulq %[y]")                                 \
+        ASM("addq %[carry],%%rax")                       \
+        ASM("adcq $0,%%rdx")                             \
+        ASM("addq 8*" #INDEX "(%[z]),%%rax")             \
+        ASM("adcq $0,%%rdx")                             \
+        ASM("movq %%rdx,%[carry]")                       \
+        ASM("movq %%rax, 8*" #INDEX " (%[z])")
+
+#define DO_8_TIMES(MACRO, ARG) \
+        MACRO(ARG, 0) \
+        MACRO(ARG, 1) \
+        MACRO(ARG, 2) \
+        MACRO(ARG, 3) \
+        MACRO(ARG, 4) \
+        MACRO(ARG, 5) \
+        MACRO(ARG, 6) \
+        MACRO(ARG, 7)
+
+#define ADD_OR_SUBTRACT(CORE_CODE)     \
+        ASM("rorq %[carry]")           \
+        CORE_CODE                      \
+        ASM("sbbq %[carry],%[carry]")  \
+        ASM("negq %[carry]")
+
+/*
+* Word Addition
+*/
+inline word word_add(word x, word y, word* carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
+      : [x]"=r"(x), [carry]"=r"(*carry)
+      : "0"(x), [y]"rm"(y), "1"(*carry)
+      : "cc");
+   return x;
+   }
+
+/*
+* Eight Word Block Addition, Two Argument
+*/
+inline word word8_add2(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Addition, Three Argument
+*/
+inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Word Subtraction
+*/
+inline word word_sub(word x, word y, word* carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
+      : [x]"=r"(x), [carry]"=r"(*carry)
+      : "0"(x), [y]"rm"(y), "1"(*carry)
+      : "cc");
+   return x;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2_rev(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
+      : [carry]"=r"(carry)
+      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Three Argument
+*/
+inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul2(word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(LINMUL_OP, "x")
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%rax", "%rdx");
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(LINMUL_OP, "z")
+      : [carry]"=r"(carry)
+      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%rax", "%rdx");
+   return carry;
+   }
+
+/*
+* Eight Word Block Multiply/Add
+*/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(MULADD_OP, "")
+      : [carry]"=r"(carry)
+      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%rax", "%rdx");
+   return carry;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
+   {
+   asm(
+      ASM("mulq %[y]")
+
+      ASM("addq %[x],%[w0]")
+      ASM("adcq %[y],%[w1]")
+      ASM("adcq $0,%[w2]")
+
+      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+      : "cc");
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
+   {
+   asm(
+      ASM("mulq %[y]")
+
+      ASM("addq %[x],%[w0]")
+      ASM("adcq %[y],%[w1]")
+      ASM("adcq $0,%[w2]")
+
+      ASM("addq %[x],%[w0]")
+      ASM("adcq %[y],%[w1]")
+      ASM("adcq $0,%[w2]")
+
+      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+      : "cc");
+   }
+
+
+#undef ASM
+#undef DO_8_TIMES
+#undef ADD_OR_SUBTRACT
+#undef ADDSUB2_OP
+#undef ADDSUB3_OP
+#undef LINMUL_OP
+#undef MULADD_OP
+
+}
+
+}
+#endif
diff --git a/src/math/mp/mp_asm.cpp b/src/math/mp/mp_asm.cpp
new file mode 100644
index 000000000..4fcdee7a4
--- /dev/null
+++ b/src/math/mp/mp_asm.cpp
@@ -0,0 +1,183 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_asm.h>
+#include <botan/internal/mp_asmi.h>
+#include <botan/internal/mp_core.h>
+#include <botan/exceptn.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Two Operand Addition, No Carry
+*/
+word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size)
+   {
+   word carry = 0;
+
+   const u32bit blocks = y_size - (y_size % 8);
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      carry = word8_add2(x + i, y + i, carry);
+
+   for(u32bit i = blocks; i != y_size; ++i)
+      x[i] = word_add(x[i], y[i], &carry);
+
+   for(u32bit i = y_size; i != x_size; ++i)
+      x[i] = word_add(x[i], 0, &carry);
+
+   return carry;
+   }
+
+/*
+* Three Operand Addition, No Carry
+*/
+word bigint_add3_nc(word z[], const word x[], u32bit x_size,
+                              const word y[], u32bit y_size)
+   {
+   if(x_size < y_size)
+      { return bigint_add3_nc(z, y, y_size, x, x_size); }
+
+   word carry = 0;
+
+   const u32bit blocks = y_size - (y_size % 8);
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      carry = word8_add3(z + i, x + i, y + i, carry);
+
+   for(u32bit i = blocks; i != y_size; ++i)
+      z[i] = word_add(x[i], y[i], &carry);
+
+   for(u32bit i = y_size; i != x_size; ++i)
+      z[i] = word_add(x[i], 0, &carry);
+
+   return carry;
+   }
+
+/*
+* Two Operand Addition
+*/
+void bigint_add2(word x[], u32bit x_size, const word y[], u32bit y_size)
+   {
+   x[x_size] += bigint_add2_nc(x, x_size, y, y_size);
+   }
+
+/*
+* Three Operand Addition
+*/
+void bigint_add3(word z[], const word x[], u32bit x_size,
+                           const word y[], u32bit y_size)
+   {
+   z[(x_size > y_size ? x_size : y_size)] +=
+      bigint_add3_nc(z, x, x_size, y, y_size);
+   }
+
+/*
+* Two Operand Subtraction
+*/
+word bigint_sub2(word x[], u32bit x_size, const word y[], u32bit y_size)
+   {
+   word borrow = 0;
+
+   const u32bit blocks = y_size - (y_size % 8);
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      borrow = word8_sub2(x + i, y + i, borrow);
+
+   for(u32bit i = blocks; i != y_size; ++i)
+      x[i] = word_sub(x[i], y[i], &borrow);
+
+   for(u32bit i = y_size; i != x_size; ++i)
+      x[i] = word_sub(x[i], 0, &borrow);
+
+   return borrow;
+   }
+
+/*
+* Two Operand Subtraction x = y - x
+*/
+void bigint_sub2_rev(word x[],  const word y[], u32bit y_size)
+   {
+   word borrow = 0;
+
+   const u32bit blocks = y_size - (y_size % 8);
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      borrow = word8_sub2_rev(x + i, y + i, borrow);
+
+   for(u32bit i = blocks; i != y_size; ++i)
+      x[i] = word_sub(y[i], x[i], &borrow);
+
+   if(borrow)
+      throw Internal_Error("bigint_sub2_rev: x >= y");
+   }
+
+/*
+* Three Operand Subtraction
+*/
+word bigint_sub3(word z[], const word x[], u32bit x_size,
+                           const word y[], u32bit y_size)
+   {
+   word borrow = 0;
+
+   const u32bit blocks = y_size - (y_size % 8);
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      borrow = word8_sub3(z + i, x + i, y + i, borrow);
+
+   for(u32bit i = blocks; i != y_size; ++i)
+      z[i] = word_sub(x[i], y[i], &borrow);
+
+   for(u32bit i = y_size; i != x_size; ++i)
+      z[i] = word_sub(x[i], 0, &borrow);
+
+   return borrow;
+   }
+
+/*
+* Two Operand Linear Multiply
+*/
+void bigint_linmul2(word x[], u32bit x_size, word y)
+   {
+   const u32bit blocks = x_size - (x_size % 8);
+
+   word carry = 0;
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      carry = word8_linmul2(x + i, y, carry);
+
+   for(u32bit i = blocks; i != x_size; ++i)
+      x[i] = word_madd2(x[i], y, &carry);
+
+   x[x_size] = carry;
+   }
+
+/*
+* Three Operand Linear Multiply
+*/
+void bigint_linmul3(word z[], const word x[], u32bit x_size, word y)
+   {
+   const u32bit blocks = x_size - (x_size % 8);
+
+   word carry = 0;
+
+   for(u32bit i = 0; i != blocks; i += 8)
+      carry = word8_linmul3(z + i, x + i, y, carry);
+
+   for(u32bit i = blocks; i != x_size; ++i)
+      z[i] = word_madd2(x[i], y, &carry);
+
+   z[x_size] = carry;
+   }
+
+}
+
+}
diff --git a/src/math/mp/mp_asm64/info.txt b/src/math/mp/mp_asm64/info.txt
new file mode 100644
index 000000000..fd0242a7a
--- /dev/null
+++ b/src/math/mp/mp_asm64/info.txt
@@ -0,0 +1,25 @@
+mp_bits 64
+
+load_on dep
+
+<header:internal>
+mp_asm.h
+mp_generic:mp_asmi.h
+</header:internal>
+
+<arch>
+#amd64
+alpha
+ia64
+mips64
+ppc64
+sparc64
+</arch>
+
+# The inline asm only works with gcc, but it looks like (at least on
+# UltraSPARC), using 64-bit words and the sythensized multiply is a 5 to 25%
+# win, so it's probably worth using elsewhere.
+<cc>
+gcc
+sunwspro
+</cc>
diff --git a/src/math/mp/mp_asm64/mp_asm.h b/src/math/mp/mp_asm64/mp_asm.h
new file mode 100644
index 000000000..b0906095d
--- /dev/null
+++ b/src/math/mp/mp_asm64/mp_asm.h
@@ -0,0 +1,122 @@
+/*
+* MPI Multiply-Add Core
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_MADD_H__
+#define BOTAN_MP_MADD_H__
+
+#include <botan/mp_types.h>
+
+namespace Botan {
+
+#if (BOTAN_MP_WORD_BITS != 64)
+   #error The mp_asm64 module requires that BOTAN_MP_WORD_BITS == 64
+#endif
+
+#if defined(BOTAN_TARGET_ARCH_IS_ALPHA)
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) do {                   \
+   asm("umulh %1,%2,%0" : "=r" (z0) : "r" (a), "r" (b)); \
+   z1 = a * b;                                           \
+} while(0);
+
+#elif defined(BOTAN_TARGET_ARCH_IS_AMD64)
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) do {       \
+   asm("mulq %3" : "=d" (z0), "=a" (z1) :    \
+       "a" (a), "rm" (b) : "cc");            \
+} while(0);
+
+#elif defined(BOTAN_TARGET_ARCH_IS_IA64)
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) do {                     \
+   asm("xmpy.hu %0=%1,%2" : "=f" (z0) : "f" (a), "f" (b)); \
+   z1 = a * b;                                             \
+} while(0);
+
+#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) do {                           \
+   asm("mulhdu %0,%1,%2" : "=r" (z0) : "r" (a), "r" (b) : "cc"); \
+   z1 = a * b;                                                   \
+} while(0);
+
+#elif defined(BOTAN_TARGET_ARCH_IS_MIPS64)
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) do {                            \
+   typedef unsigned int uint128_t __attribute__((mode(TI)));      \
+   uint128_t r = (uint128_t)a * b;                                \
+   z0 = (r >> 64) & 0xFFFFFFFFFFFFFFFF;                           \
+   z1 = (r      ) & 0xFFFFFFFFFFFFFFFF;                           \
+} while(0);
+
+#else
+
+// Do a 64x64->128 multiply using four 64x64->64 multiplies
+// plus some adds and shifts. Last resort for CPUs like UltraSPARC,
+// with 64-bit registers/ALU, but no 64x64->128 multiply.
+inline void bigint_2word_mul(word a, word b, word* z1, word* z0)
+   {
+   const u32bit MP_HWORD_BITS = BOTAN_MP_WORD_BITS / 2;
+   const word MP_HWORD_MASK = ((word)1 << MP_HWORD_BITS) - 1;
+
+   const word a_hi = (a >> MP_HWORD_BITS);
+   const word a_lo = (a & MP_HWORD_MASK);
+   const word b_hi = (b >> MP_HWORD_BITS);
+   const word b_lo = (b & MP_HWORD_MASK);
+
+   word x0 = a_hi * b_hi;
+   word x1 = a_lo * b_hi;
+   word x2 = a_hi * b_lo;
+   word x3 = a_lo * b_lo;
+
+   x2 += x3 >> (MP_HWORD_BITS);
+   x2 += x1;
+   if(x2 < x1)
+      x0 += ((word)1 << MP_HWORD_BITS);
+
+   *z0 = x0 + (x2 >> MP_HWORD_BITS);
+   *z1 = ((x2 & MP_HWORD_MASK) << MP_HWORD_BITS) + (x3 & MP_HWORD_MASK);
+   }
+
+#define BOTAN_WORD_MUL(a,b,z1,z0) bigint_2word_mul(a, b, &z1, &z0)
+
+#endif
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd2(word a, word b, word* c)
+   {
+   word z0 = 0, z1 = 0;
+
+   BOTAN_WORD_MUL(a, b, z1, z0);
+
+   z1 += *c; if(z1 < *c) z0++;
+
+   *c = z0;
+   return z1;
+   }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+   {
+   word z0 = 0, z1 = 0;
+
+   BOTAN_WORD_MUL(a, b, z1, z0);
+
+   z1 += c; if(z1 < c) z0++;
+   z1 += *d; if(z1 < *d) z0++;
+
+   *d = z0;
+   return z1;
+   }
+
+}
+
+#endif
diff --git a/src/math/mp/mp_comba.cpp b/src/math/mp/mp_comba.cpp
new file mode 100644
index 000000000..2770d3f0a
--- /dev/null
+++ b/src/math/mp/mp_comba.cpp
@@ -0,0 +1,920 @@
+/*
+* Comba Multiplication and Squaring
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/internal/mp_asmi.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Comba 4x4 Squaring
+*/
+void bigint_comba_sqr4(word z[8], const word x[4])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+   z[6] = w0;
+   z[7] = w1;
+   }
+
+/*
+* Comba 4x4 Multiplication
+*/
+void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
+   z[6] = w0;
+   z[7] = w1;
+   }
+
+/*
+* Comba 6x6 Squaring
+*/
+void bigint_comba_sqr6(word z[12], const word x[6])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
+   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
+   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
+   word3_muladd(&w2, &w1, &w0, x[4], x[4]);
+   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
+   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[5], x[5]);
+   z[10] = w0;
+   z[11] = w1;
+   }
+
+/*
+* Comba 6x6 Multiplication
+*/
+void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[0]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[0]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[1], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[1]);
+   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[2], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[2]);
+   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[3], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[3]);
+   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[4], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[4]);
+   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[5], y[5]);
+   z[10] = w0;
+   z[11] = w1;
+   }
+
+/*
+* Comba 8x8 Squaring
+*/
+void bigint_comba_sqr8(word z[16], const word x[8])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], x[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], x[1]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], x[2]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
+   word3_muladd(&w2, &w1, &w0, x[3], x[3]);
+   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
+   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
+   word3_muladd(&w2, &w1, &w0, x[4], x[4]);
+   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
+   word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
+   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
+   word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
+   word3_muladd(&w2, &w1, &w0, x[5], x[5]);
+   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
+   word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
+   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
+   word3_muladd(&w2, &w1, &w0, x[6], x[6]);
+   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
+   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[7], x[7]);
+   z[14] = w0;
+   z[15] = w1;
+   }
+
+/*
+* Comba 8x8 Multiplication
+*/
+void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[0]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[0]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[0]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[0]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[0]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[0]);
+   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[0], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[1], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[1]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[0]);
+   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[1], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[2], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[2]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[1]);
+   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[2], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[3], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[3]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[2]);
+   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[3], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[4], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[4]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[3]);
+   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[4], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[5], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[5]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[4]);
+   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[5], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[6], y[6]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[5]);
+   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[6], y[7]);
+   word3_muladd(&w2, &w1, &w0, x[7], y[6]);
+   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[7], y[7]);
+   z[14] = w0;
+   z[15] = w1;
+   }
+
+/*
+* Comba 16x16 Squaring
+*/
+void bigint_comba_sqr16(word z[32], const word x[16])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
+   z[ 0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 1]);
+   z[ 1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], x[ 1]);
+   z[ 2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
+   z[ 3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 4]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], x[ 2]);
+   z[ 4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 5]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 4]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 3]);
+   z[ 5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
+   z[ 6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 7]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 6]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 5]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 4]);
+   z[ 7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 7]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 6]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], x[ 4]);
+   z[ 8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
+   z[ 9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 7]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], x[ 5]);
+   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 7]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 6]);
+   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
+   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 8]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 7]);
+   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], x[ 7]);
+   z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
+   z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 1], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], x[ 8]);
+   z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 2], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[10]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[ 9]);
+   z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]);
+   z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 4], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[11]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[10]);
+   z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 5], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[11]);
+   word3_muladd(&w2, &w1, &w0, x[10], x[10]);
+   z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]);
+   word3_muladd_2(&w2, &w1, &w0, x[10], x[11]);
+   z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 7], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[10], x[12]);
+   word3_muladd(&w2, &w1, &w0, x[11], x[11]);
+   z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 8], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[10], x[13]);
+   word3_muladd_2(&w2, &w1, &w0, x[11], x[12]);
+   z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[10], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[11], x[13]);
+   word3_muladd(&w2, &w1, &w0, x[12], x[12]);
+   z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[10], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[11], x[14]);
+   word3_muladd_2(&w2, &w1, &w0, x[12], x[13]);
+   z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[11], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[12], x[14]);
+   word3_muladd(&w2, &w1, &w0, x[13], x[13]);
+   z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[12], x[15]);
+   word3_muladd_2(&w2, &w1, &w0, x[13], x[14]);
+   z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[13], x[15]);
+   word3_muladd(&w2, &w1, &w0, x[14], x[14]);
+   z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd_2(&w2, &w1, &w0, x[14], x[15]);
+   z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[15], x[15]);
+   z[30] = w0;
+   z[31] = w1;
+   }
+
+/*
+* Comba 16x16 Multiplication
+*/
+void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
+   {
+   word w2 = 0, w1 = 0, w0 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]);
+   z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 0]);
+   z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 0]);
+   z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]);
+   z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 0]);
+   z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 0]);
+   z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]);
+   z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 0]);
+   z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 0]);
+   z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]);
+   z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 0]);
+   z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 0]);
+   z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 0]);
+   z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 0]);
+   z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 0]);
+   z[14] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 0], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 1]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 0]);
+   z[15] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 1], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 2]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 1]);
+   z[16] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 2], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 3]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 2]);
+   z[17] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 3], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 4]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 3]);
+   z[18] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 4], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 5]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 4]);
+   z[19] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 5], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 6]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 5]);
+   z[20] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 6], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 7]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 6]);
+   z[21] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 7], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 8]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 7]);
+   z[22] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 8], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[ 9]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 8]);
+   z[23] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[ 9], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[10], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[10]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[ 9]);
+   z[24] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[10], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[11], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[11]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[10]);
+   z[25] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[11], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[12], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[12]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[11]);
+   z[26] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[12], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[13], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[13]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[12]);
+   z[27] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[13], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[14], y[14]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[13]);
+   z[28] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[14], y[15]);
+   word3_muladd(&w2, &w1, &w0, x[15], y[14]);
+   z[29] = w0; w0 = w1; w1 = w2; w2 = 0;
+
+   word3_muladd(&w2, &w1, &w0, x[15], y[15]);
+   z[30] = w0;
+   z[31] = w1;
+   }
+
+}
+
+}
diff --git a/src/math/mp/mp_core.h b/src/math/mp/mp_core.h
new file mode 100644
index 000000000..63082795f
--- /dev/null
+++ b/src/math/mp/mp_core.h
@@ -0,0 +1,144 @@
+/*
+* MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_CORE_H__
+#define BOTAN_MP_CORE_H__
+
+#include <botan/mp_types.h>
+
+namespace Botan {
+
+/*
+* The size of the word type, in bits
+*/
+const u32bit MP_WORD_BITS = BOTAN_MP_WORD_BITS;
+
+extern "C" {
+
+/*
+* Addition/Subtraction Operations
+*/
+void bigint_add2(word x[], u32bit x_size,
+                 const word y[], u32bit y_size);
+
+void bigint_add3(word z[],
+                 const word x[], u32bit x_size,
+                 const word y[], u32bit y_size);
+
+word bigint_add2_nc(word x[], u32bit x_size, const word y[], u32bit y_size);
+
+word bigint_add3_nc(word z[],
+                    const word x[], u32bit x_size,
+                    const word y[], u32bit y_size);
+
+word bigint_sub2(word x[], u32bit x_size,
+                 const word y[], u32bit y_size);
+
+/**
+* x = y - x; assumes y >= x
+*/
+void bigint_sub2_rev(word x[], const word y[], u32bit y_size);
+
+word bigint_sub3(word z[],
+                 const word x[], u32bit x_size,
+                 const word y[], u32bit y_size);
+
+/*
+* Shift Operations
+*/
+void bigint_shl1(word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift);
+
+void bigint_shr1(word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift);
+
+void bigint_shl2(word y[], const word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift);
+
+void bigint_shr2(word y[], const word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift);
+
+/*
+* Simple O(N^2) Multiplication and Squaring
+*/
+void bigint_simple_mul(word z[],
+                       const word x[], u32bit x_size,
+                       const word y[], u32bit y_size);
+
+void bigint_simple_sqr(word z[], const word x[], u32bit x_size);
+
+/*
+* Linear Multiply
+*/
+void bigint_linmul2(word x[], u32bit x_size, word y);
+void bigint_linmul3(word z[], const word x[], u32bit x_size, word y);
+
+/*
+* Montgomery Reduction
+* @param z integer to reduce (also output in first x_size+1 words)
+* @param z_size size of z (should be >= 2*x_size+1)
+* @param workspace array of at least 2*(x_size+1) words
+* @param x modulus
+* @param x_size size of x
+* @param u Montgomery value
+*/
+void bigint_monty_redc(word z[], u32bit z_size,
+                       word workspace[],
+                       const word x[], u32bit x_size,
+                       word u);
+
+/*
+* Division operation
+*/
+u32bit bigint_divcore(word q, word y2, word y1,
+                      word x3, word x2, word x1);
+
+/**
+* Compare x and y
+*/
+s32bit bigint_cmp(const word x[], u32bit x_size,
+                  const word y[], u32bit y_size);
+
+/**
+* Compute ((n1<<bits) + n0) / d
+*/
+word bigint_divop(word n1, word n0, word d);
+
+/**
+* Compute ((n1<<bits) + n0) % d
+*/
+word bigint_modop(word n1, word n0, word d);
+
+/*
+* Comba Multiplication / Squaring
+*/
+void bigint_comba_mul4(word z[8], const word x[4], const word y[4]);
+void bigint_comba_mul6(word z[12], const word x[6], const word y[6]);
+void bigint_comba_mul8(word z[16], const word x[8], const word y[8]);
+void bigint_comba_mul16(word z[32], const word x[16], const word y[16]);
+
+void bigint_comba_sqr4(word out[8], const word in[4]);
+void bigint_comba_sqr6(word out[12], const word in[6]);
+void bigint_comba_sqr8(word out[16], const word in[8]);
+void bigint_comba_sqr8(word out[32], const word in[16]);
+void bigint_comba_sqr16(word out[64], const word in[32]);
+
+}
+
+/*
+* High Level Multiplication/Squaring Interfaces
+*/
+void bigint_mul(word z[], u32bit z_size, word workspace[],
+                const word x[], u32bit x_size, u32bit x_sw,
+                const word y[], u32bit y_size, u32bit y_sw);
+
+void bigint_sqr(word z[], u32bit z_size, word workspace[],
+                const word x[], u32bit x_size, u32bit x_sw);
+
+}
+
+#endif
diff --git a/src/math/mp/mp_generic/info.txt b/src/math/mp/mp_generic/info.txt
new file mode 100644
index 000000000..ab4d7406a
--- /dev/null
+++ b/src/math/mp/mp_generic/info.txt
@@ -0,0 +1,6 @@
+load_on dep
+
+<header:internal>
+mp_asm.h
+mp_asmi.h
+</header:internal>
diff --git a/src/math/mp/mp_generic/mp_asm.h b/src/math/mp/mp_generic/mp_asm.h
new file mode 100644
index 000000000..7c18343ef
--- /dev/null
+++ b/src/math/mp/mp_generic/mp_asm.h
@@ -0,0 +1,54 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2008 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_H__
+#define BOTAN_MP_ASM_H__
+
+#include <botan/mp_types.h>
+
+#if (BOTAN_MP_WORD_BITS == 8)
+  typedef Botan::u16bit dword;
+#elif (BOTAN_MP_WORD_BITS == 16)
+  typedef Botan::u32bit dword;
+#elif (BOTAN_MP_WORD_BITS == 32)
+  typedef Botan::u64bit dword;
+#elif (BOTAN_MP_WORD_BITS == 64)
+  #error BOTAN_MP_WORD_BITS can be 64 only with assembly support
+#else
+  #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64
+#endif
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd2(word a, word b, word* c)
+   {
+   dword z = (dword)a * b + *c;
+   *c = (word)(z >> BOTAN_MP_WORD_BITS);
+   return (word)z;
+   }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+   {
+   dword z = (dword)a * b + c + *d;
+   *d = (word)(z >> BOTAN_MP_WORD_BITS);
+   return (word)z;
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_generic/mp_asmi.h b/src/math/mp/mp_generic/mp_asmi.h
new file mode 100644
index 000000000..8225f372d
--- /dev/null
+++ b/src/math/mp/mp_generic/mp_asmi.h
@@ -0,0 +1,207 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/internal/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Word Addition
+*/
+inline word word_add(word x, word y, word* carry)
+   {
+   word z = x + y;
+   word c1 = (z < x);
+   z += *carry;
+   *carry = c1 | (z < *carry);
+   return z;
+   }
+
+/*
+* Eight Word Block Addition, Two Argument
+*/
+inline word word8_add2(word x[8], const word y[8], word carry)
+   {
+   x[0] = word_add(x[0], y[0], &carry);
+   x[1] = word_add(x[1], y[1], &carry);
+   x[2] = word_add(x[2], y[2], &carry);
+   x[3] = word_add(x[3], y[3], &carry);
+   x[4] = word_add(x[4], y[4], &carry);
+   x[5] = word_add(x[5], y[5], &carry);
+   x[6] = word_add(x[6], y[6], &carry);
+   x[7] = word_add(x[7], y[7], &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Addition, Three Argument
+*/
+inline word word8_add3(word z[8], const word x[8],
+                       const word y[8], word carry)
+   {
+   z[0] = word_add(x[0], y[0], &carry);
+   z[1] = word_add(x[1], y[1], &carry);
+   z[2] = word_add(x[2], y[2], &carry);
+   z[3] = word_add(x[3], y[3], &carry);
+   z[4] = word_add(x[4], y[4], &carry);
+   z[5] = word_add(x[5], y[5], &carry);
+   z[6] = word_add(x[6], y[6], &carry);
+   z[7] = word_add(x[7], y[7], &carry);
+   return carry;
+   }
+
+/*
+* Word Subtraction
+*/
+inline word word_sub(word x, word y, word* carry)
+   {
+   word t0 = x - y;
+   word c1 = (t0 > x);
+   word z = t0 - *carry;
+   *carry = c1 | (z > t0);
+   return z;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+   {
+   x[0] = word_sub(x[0], y[0], &carry);
+   x[1] = word_sub(x[1], y[1], &carry);
+   x[2] = word_sub(x[2], y[2], &carry);
+   x[3] = word_sub(x[3], y[3], &carry);
+   x[4] = word_sub(x[4], y[4], &carry);
+   x[5] = word_sub(x[5], y[5], &carry);
+   x[6] = word_sub(x[6], y[6], &carry);
+   x[7] = word_sub(x[7], y[7], &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2_rev(word x[8], const word y[8], word carry)
+   {
+   x[0] = word_sub(y[0], x[0], &carry);
+   x[1] = word_sub(y[1], x[1], &carry);
+   x[2] = word_sub(y[2], x[2], &carry);
+   x[3] = word_sub(y[3], x[3], &carry);
+   x[4] = word_sub(y[4], x[4], &carry);
+   x[5] = word_sub(y[5], x[5], &carry);
+   x[6] = word_sub(y[6], x[6], &carry);
+   x[7] = word_sub(y[7], x[7], &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Three Argument
+*/
+inline word word8_sub3(word z[8], const word x[8],
+                       const word y[8], word carry)
+   {
+   z[0] = word_sub(x[0], y[0], &carry);
+   z[1] = word_sub(x[1], y[1], &carry);
+   z[2] = word_sub(x[2], y[2], &carry);
+   z[3] = word_sub(x[3], y[3], &carry);
+   z[4] = word_sub(x[4], y[4], &carry);
+   z[5] = word_sub(x[5], y[5], &carry);
+   z[6] = word_sub(x[6], y[6], &carry);
+   z[7] = word_sub(x[7], y[7], &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul2(word x[8], word y, word carry)
+   {
+   x[0] = word_madd2(x[0], y, &carry);
+   x[1] = word_madd2(x[1], y, &carry);
+   x[2] = word_madd2(x[2], y, &carry);
+   x[3] = word_madd2(x[3], y, &carry);
+   x[4] = word_madd2(x[4], y, &carry);
+   x[5] = word_madd2(x[5], y, &carry);
+   x[6] = word_madd2(x[6], y, &carry);
+   x[7] = word_madd2(x[7], y, &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
+   {
+   z[0] = word_madd2(x[0], y, &carry);
+   z[1] = word_madd2(x[1], y, &carry);
+   z[2] = word_madd2(x[2], y, &carry);
+   z[3] = word_madd2(x[3], y, &carry);
+   z[4] = word_madd2(x[4], y, &carry);
+   z[5] = word_madd2(x[5], y, &carry);
+   z[6] = word_madd2(x[6], y, &carry);
+   z[7] = word_madd2(x[7], y, &carry);
+   return carry;
+   }
+
+/*
+* Eight Word Block Multiply/Add
+*/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+   {
+   z[0] = word_madd3(x[0], y, z[0], &carry);
+   z[1] = word_madd3(x[1], y, z[1], &carry);
+   z[2] = word_madd3(x[2], y, z[2], &carry);
+   z[3] = word_madd3(x[3], y, z[3], &carry);
+   z[4] = word_madd3(x[4], y, z[4], &carry);
+   z[5] = word_madd3(x[5], y, z[5], &carry);
+   z[6] = word_madd3(x[6], y, z[6], &carry);
+   z[7] = word_madd3(x[7], y, z[7], &carry);
+   return carry;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
+   {
+   word carry = *w0;
+   *w0 = word_madd2(a, b, &carry);
+   *w1 += carry;
+   *w2 += (*w1 < carry) ? 1 : 0;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
+   {
+   word carry = 0;
+   a = word_madd2(a, b, &carry);
+   b = carry;
+
+   word top = (b >> (BOTAN_MP_WORD_BITS-1));
+   b <<= 1;
+   b |= (a >> (BOTAN_MP_WORD_BITS-1));
+   a <<= 1;
+
+   carry = 0;
+   *w0 = word_add(*w0, a, &carry);
+   *w1 = word_add(*w1, b, &carry);
+   *w2 = word_add(*w2, top, &carry);
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_ia32/info.txt b/src/math/mp/mp_ia32/info.txt
new file mode 100644
index 000000000..1659f74cf
--- /dev/null
+++ b/src/math/mp/mp_ia32/info.txt
@@ -0,0 +1,18 @@
+load_on dep
+
+mp_bits 32
+
+<header:internal>
+mp_asm.h
+mp_asmi.h
+</header:internal>
+
+<arch>
+ia32
+</arch>
+
+<cc>
+clang
+gcc
+icc
+</cc>
diff --git a/src/math/mp/mp_ia32/mp_asm.h b/src/math/mp/mp_ia32/mp_asm.h
new file mode 100644
index 000000000..4d3afc992
--- /dev/null
+++ b/src/math/mp/mp_ia32/mp_asm.h
@@ -0,0 +1,67 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2008 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_H__
+#define BOTAN_MP_ASM_H__
+
+#include <botan/mp_types.h>
+
+#if (BOTAN_MP_WORD_BITS != 32)
+   #error The mp_ia32 module requires that BOTAN_MP_WORD_BITS == 32
+#endif
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Helper Macros for x86 Assembly
+*/
+#define ASM(x) x "\n\t"
+
+/*
+* Word Multiply
+*/
+inline word word_madd2(word a, word b, word* c)
+   {
+   asm(
+      ASM("mull %[b]")
+      ASM("addl %[c],%[a]")
+      ASM("adcl $0,%[carry]")
+
+      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
+      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
+
+   return a;
+   }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+   {
+   asm(
+      ASM("mull %[b]")
+
+      ASM("addl %[c],%[a]")
+      ASM("adcl $0,%[carry]")
+
+      ASM("addl %[d],%[a]")
+      ASM("adcl $0,%[carry]")
+
+      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
+      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
+
+   return a;
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_ia32/mp_asmi.h b/src/math/mp/mp_ia32/mp_asmi.h
new file mode 100644
index 000000000..c7b679e80
--- /dev/null
+++ b/src/math/mp/mp_ia32/mp_asmi.h
@@ -0,0 +1,240 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/internal/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Helper Macros for x86 Assembly
+*/
+#ifndef ASM
+  #define ASM(x) x "\n\t"
+#endif
+
+#define ADDSUB2_OP(OPERATION, INDEX)                     \
+        ASM("movl 4*" #INDEX "(%[y]), %[carry]")         \
+        ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")   \
+
+#define ADDSUB3_OP(OPERATION, INDEX)                     \
+        ASM("movl 4*" #INDEX "(%[x]), %[carry]")         \
+        ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]")   \
+        ASM("movl %[carry], 4*" #INDEX "(%[z])")         \
+
+#define LINMUL_OP(WRITE_TO, INDEX)                       \
+        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
+        ASM("mull %[y]")                                 \
+        ASM("addl %[carry],%%eax")                       \
+        ASM("adcl $0,%%edx")                             \
+        ASM("movl %%edx,%[carry]")                       \
+        ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
+
+#define MULADD_OP(IGNORED, INDEX)                        \
+        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
+        ASM("mull %[y]")                                 \
+        ASM("addl %[carry],%%eax")                       \
+        ASM("adcl $0,%%edx")                             \
+        ASM("addl 4*" #INDEX "(%[z]),%%eax")             \
+        ASM("adcl $0,%%edx")                             \
+        ASM("movl %%edx,%[carry]")                       \
+        ASM("movl %%eax, 4*" #INDEX " (%[z])")
+
+#define DO_8_TIMES(MACRO, ARG) \
+        MACRO(ARG, 0) \
+        MACRO(ARG, 1) \
+        MACRO(ARG, 2) \
+        MACRO(ARG, 3) \
+        MACRO(ARG, 4) \
+        MACRO(ARG, 5) \
+        MACRO(ARG, 6) \
+        MACRO(ARG, 7)
+
+#define ADD_OR_SUBTRACT(CORE_CODE)     \
+        ASM("rorl %[carry]")           \
+        CORE_CODE                      \
+        ASM("sbbl %[carry],%[carry]")  \
+        ASM("negl %[carry]")
+
+/*
+* Word Addition
+*/
+inline word word_add(word x, word y, word* carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
+      : [x]"=r"(x), [carry]"=r"(*carry)
+      : "0"(x), [y]"rm"(y), "1"(*carry)
+      : "cc");
+   return x;
+   }
+
+/*
+* Eight Word Block Addition, Two Argument
+*/
+inline word word8_add2(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Addition, Three Argument
+*/
+inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Word Subtraction
+*/
+inline word word_sub(word x, word y, word* carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
+      : [x]"=r"(x), [carry]"=r"(*carry)
+      : "0"(x), [y]"rm"(y), "1"(*carry)
+      : "cc");
+   return x;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2_rev(word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
+      : [carry]"=r"(carry)
+      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Subtraction, Three Argument
+*/
+inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
+   {
+   asm(
+      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
+      : "cc", "memory");
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul2(word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(LINMUL_OP, "x")
+      : [carry]"=r"(carry)
+      : [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%eax", "%edx");
+   return carry;
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(LINMUL_OP, "z")
+      : [carry]"=r"(carry)
+      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%eax", "%edx");
+   return carry;
+   }
+
+/*
+* Eight Word Block Multiply/Add
+*/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+   {
+   asm(
+      DO_8_TIMES(MULADD_OP, "")
+      : [carry]"=r"(carry)
+      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
+      : "cc", "%eax", "%edx");
+   return carry;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
+   {
+   asm(
+      ASM("mull %[y]")
+
+      ASM("addl %[x],%[w0]")
+      ASM("adcl %[y],%[w1]")
+      ASM("adcl $0,%[w2]")
+
+      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+      : "cc");
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
+   {
+   asm(
+      ASM("mull %[y]")
+
+      ASM("addl %[x],%[w0]")
+      ASM("adcl %[y],%[w1]")
+      ASM("adcl $0,%[w2]")
+
+      ASM("addl %[x],%[w0]")
+      ASM("adcl %[y],%[w1]")
+      ASM("adcl $0,%[w2]")
+
+      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
+      : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
+      : "cc");
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_ia32_msvc/info.txt b/src/math/mp/mp_ia32_msvc/info.txt
new file mode 100644
index 000000000..55a42c310
--- /dev/null
+++ b/src/math/mp/mp_ia32_msvc/info.txt
@@ -0,0 +1,16 @@
+mp_bits 32
+
+load_on dep
+
+<header:internal>
+mp_generic:mp_asm.h
+mp_asmi.h
+</header:internal>
+
+<arch>
+ia32
+</arch>
+
+<cc>
+msvc
+</cc>
diff --git a/src/math/mp/mp_ia32_msvc/mp_asmi.h b/src/math/mp/mp_ia32_msvc/mp_asmi.h
new file mode 100644
index 000000000..aee457d65
--- /dev/null
+++ b/src/math/mp/mp_ia32_msvc/mp_asmi.h
@@ -0,0 +1,542 @@
+/*
+* Lowest Level MPI Algorithms
+* (C) 1999-2010 Jack Lloyd
+*     2006 Luca Piccarreta
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_INTERNAL_H__
+#define BOTAN_MP_ASM_INTERNAL_H__
+
+#include <botan/internal/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Word Addition
+*/
+inline word word_add(word x, word y, word* carry)
+   {
+   word z = x + y;
+   word c1 = (z < x);
+   z += *carry;
+   *carry = c1 | (z < *carry);
+   return z;
+   }
+
+/*
+* Eight Word Block Addition, Two Argument
+*/
+inline word word8_add2(word x[8], const word y[8], word carry)
+   {
+   __asm {
+      mov edx,[x]
+      mov esi,[y]
+      xor eax,eax
+      sub eax,[carry] //force CF=1 iff *carry==1
+      mov eax,[esi]
+      adc [edx],eax
+      mov eax,[esi+4]
+      adc [edx+4],eax
+      mov eax,[esi+8]
+      adc [edx+8],eax
+      mov eax,[esi+12]
+      adc [edx+12],eax
+      mov eax,[esi+16]
+      adc [edx+16],eax
+      mov eax,[esi+20]
+      adc [edx+20],eax
+      mov eax,[esi+24]
+      adc [edx+24],eax
+      mov eax,[esi+28]
+      adc [edx+28],eax
+      sbb eax,eax
+      neg eax
+      }
+   }
+
+/*
+* Eight Word Block Addition, Three Argument
+*/
+inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
+   {
+    __asm {
+      mov edi,[x]
+      mov esi,[y]
+      mov ebx,[z]
+      xor eax,eax
+      sub eax,[carry] //force CF=1 iff *carry==1
+      mov eax,[edi]
+      adc eax,[esi]
+      mov [ebx],eax
+
+      mov eax,[edi+4]
+      adc eax,[esi+4]
+      mov [ebx+4],eax
+
+      mov eax,[edi+8]
+      adc eax,[esi+8]
+      mov [ebx+8],eax
+
+      mov eax,[edi+12]
+      adc eax,[esi+12]
+      mov [ebx+12],eax
+
+      mov eax,[edi+16]
+      adc eax,[esi+16]
+      mov [ebx+16],eax
+
+      mov eax,[edi+20]
+      adc eax,[esi+20]
+      mov [ebx+20],eax
+
+      mov eax,[edi+24]
+      adc eax,[esi+24]
+      mov [ebx+24],eax
+
+      mov eax,[edi+28]
+      adc eax,[esi+28]
+      mov [ebx+28],eax
+
+      sbb eax,eax
+      neg eax
+      }
+   }
+
+/*
+* Word Subtraction
+*/
+inline word word_sub(word x, word y, word* carry)
+   {
+   word t0 = x - y;
+   word c1 = (t0 > x);
+   word z = t0 - *carry;
+   *carry = c1 | (z > t0);
+   return z;
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2(word x[8], const word y[8], word carry)
+   {
+    __asm {
+      mov edi,[x]
+      mov esi,[y]
+      xor eax,eax
+      sub eax,[carry] //force CF=1 iff *carry==1
+      mov eax,[edi]
+      sbb eax,[esi]
+      mov [edi],eax
+      mov eax,[edi+4]
+      sbb eax,[esi+4]
+      mov [edi+4],eax
+      mov eax,[edi+8]
+      sbb eax,[esi+8]
+      mov [edi+8],eax
+      mov eax,[edi+12]
+      sbb eax,[esi+12]
+      mov [edi+12],eax
+      mov eax,[edi+16]
+      sbb eax,[esi+16]
+      mov [edi+16],eax
+      mov eax,[edi+20]
+      sbb eax,[esi+20]
+      mov [edi+20],eax
+      mov eax,[edi+24]
+      sbb eax,[esi+24]
+      mov [edi+24],eax
+      mov eax,[edi+28]
+      sbb eax,[esi+28]
+      mov [edi+28],eax
+      sbb eax,eax
+      neg eax
+      }
+   }
+
+/*
+* Eight Word Block Subtraction, Two Argument
+*/
+inline word word8_sub2_rev(word x[8], const word y[8], word carry)
+   {
+   x[0] = word_sub(y[0], x[0], &carry);
+   x[1] = word_sub(y[1], x[1], &carry);
+   x[2] = word_sub(y[2], x[2], &carry);
+   x[3] = word_sub(y[3], x[3], &carry);
+   x[4] = word_sub(y[4], x[4], &carry);
+   x[5] = word_sub(y[5], x[5], &carry);
+   x[6] = word_sub(y[6], x[6], &carry);
+   x[7] = word_sub(y[7], x[7], &carry);
+   return carry;
+   }
+
+
+/*
+* Eight Word Block Subtraction, Three Argument
+*/
+inline word word8_sub3(word z[8], const word x[8],
+                       const word y[8], word carry)
+   {
+    __asm {
+      mov edi,[x]
+      mov esi,[y]
+      xor eax,eax
+      sub eax,[carry] //force CF=1 iff *carry==1
+      mov ebx,[z]
+      mov eax,[edi]
+      sbb eax,[esi]
+      mov [ebx],eax
+      mov eax,[edi+4]
+      sbb eax,[esi+4]
+      mov [ebx+4],eax
+      mov eax,[edi+8]
+      sbb eax,[esi+8]
+      mov [ebx+8],eax
+      mov eax,[edi+12]
+      sbb eax,[esi+12]
+      mov [ebx+12],eax
+      mov eax,[edi+16]
+      sbb eax,[esi+16]
+      mov [ebx+16],eax
+      mov eax,[edi+20]
+      sbb eax,[esi+20]
+      mov [ebx+20],eax
+      mov eax,[edi+24]
+      sbb eax,[esi+24]
+      mov [ebx+24],eax
+      mov eax,[edi+28]
+      sbb eax,[esi+28]
+      mov [ebx+28],eax
+      sbb eax,eax
+      neg eax
+      }
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_linmul2(word x[8], word y, word carry)
+   {
+   __asm {
+      mov esi,[x]
+      mov eax,[esi]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,[carry]      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi],eax        //load a
+
+      mov eax,[esi+4]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+4],eax        //load a
+
+      mov eax,[esi+8]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+8],eax        //load a
+
+      mov eax,[esi+12]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+12],eax        //load a
+
+      mov eax,[esi+16]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+16],eax        //load a
+
+      mov eax,[esi+20]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+20],eax        //load a
+
+      mov eax,[esi+24]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [esi+24],eax        //load a
+
+      mov eax,[esi+28]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov [esi+28],eax        //load a
+
+      mov eax,edx      //store carry
+      }
+   }
+
+/*
+* Eight Word Block Linear Multiplication
+*/
+inline word word8_muladd(word z[8], const word x[8],
+                         word y, word carry)
+   {
+   __asm {
+      mov esi,[x]
+      mov ebx,[y]
+      mov edi,[z]
+      mov eax,[esi]     //load a
+      mul ebx           //edx(hi):eax(lo)=a*b
+      add eax,[carry]   //sum lo carry
+      adc edx,0         //sum hi carry
+      add eax,[edi]     //sum lo z
+      adc edx,0         //sum hi z
+      mov ecx,edx       //carry for next block = hi z
+      mov [edi],eax     //save lo z
+
+      mov eax,[esi+4]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+4]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+4],eax
+
+      mov eax,[esi+8]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+8]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+8],eax
+
+      mov eax,[esi+12]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+12]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+12],eax
+
+      mov eax,[esi+16]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+16]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+16],eax
+
+      mov eax,[esi+20]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+20]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+20],eax
+
+      mov eax,[esi+24]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+24]
+      adc edx,0
+      mov ecx,edx
+      mov [edi+24],eax
+
+      mov eax,[esi+28]
+      mul ebx
+      add eax,ecx
+      adc edx,0
+      add eax,[edi+28]
+      adc edx,0
+      mov [edi+28],eax
+      mov eax,edx
+      }
+   }
+
+inline word word8_linmul3(word z[4], const word x[4], word y, word carry)
+   {
+   __asm {
+#if 0
+      //it's slower!!!
+      mov edx,[z]
+      mov eax,[x]
+      movd mm7,[y]
+
+      movd mm0,[eax]
+      movd mm1,[eax+4]
+      movd mm2,[eax+8]
+      pmuludq mm0,mm7
+      pmuludq mm1,mm7
+      pmuludq mm2,mm7
+
+      movd mm6,[carry]
+      paddq mm0,mm6
+      movd [edx],mm0
+
+      psrlq mm0,32
+      paddq mm1,mm0
+      movd [edx+4],mm1
+
+      movd mm3,[eax+12]
+      psrlq mm1,32
+      paddq mm2,mm1
+      movd [edx+8],mm2
+
+      pmuludq mm3,mm7
+      movd mm4,[eax+16]
+      psrlq mm2,32
+      paddq mm3,mm2
+      movd [edx+12],mm3
+
+      pmuludq mm4,mm7
+      movd mm5,[eax+20]
+      psrlq mm3,32
+      paddq mm4,mm3
+      movd [edx+16],mm4
+
+      pmuludq mm5,mm7
+      movd mm0,[eax+24]
+      psrlq mm4,32
+      paddq mm5,mm4
+      movd [edx+20],mm5
+
+      pmuludq mm0,mm7
+      movd mm1,[eax+28]
+      psrlq mm5,32
+      paddq mm0,mm5
+      movd [edx+24],mm0
+
+      pmuludq mm1,mm7
+      psrlq mm0,32
+      paddq mm1,mm0
+      movd [edx+28],mm1
+      psrlq mm1,32
+
+      movd eax,mm1
+      emms
+#else
+      mov edi,[z]
+      mov esi,[x]
+      mov eax,[esi]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,[carry]    //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi],eax        //load a
+
+      mov eax,[esi+4]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+4],eax        //load a
+
+      mov eax,[esi+8]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+8],eax        //load a
+
+      mov eax,[esi+12]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+12],eax        //load a
+
+      mov eax,[esi+16]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+16],eax        //load a
+
+      mov eax,[esi+20]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+20],eax        //load a
+
+      mov eax,[esi+24]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov ecx,edx      //store carry
+      mov [edi+24],eax        //load a
+
+      mov eax,[esi+28]        //load a
+      mul [y]           //edx(hi):eax(lo)=a*b
+      add eax,ecx      //sum lo carry
+      adc edx,0          //sum hi carry
+      mov [edi+28],eax        //load a
+      mov eax,edx      //store carry
+#endif
+      }
+   }
+
+/*
+* Eight Word Block Multiply/Add
+*/
+inline word word8_madd3(word z[8], const word x[8], word y, word carry)
+   {
+   z[0] = word_madd3(x[0], y, z[0], &carry);
+   z[1] = word_madd3(x[1], y, z[1], &carry);
+   z[2] = word_madd3(x[2], y, z[2], &carry);
+   z[3] = word_madd3(x[3], y, z[3], &carry);
+   z[4] = word_madd3(x[4], y, z[4], &carry);
+   z[5] = word_madd3(x[5], y, z[5], &carry);
+   z[6] = word_madd3(x[6], y, z[6], &carry);
+   z[7] = word_madd3(x[7], y, z[7], &carry);
+   return carry;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
+   {
+   word carry = *w0;
+   *w0 = word_madd2(a, b, &carry);
+   *w1 += carry;
+   *w2 += (*w1 < carry) ? 1 : 0;
+   }
+
+/*
+* Multiply-Add Accumulator
+*/
+inline void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
+   {
+   word carry = 0;
+   a = word_madd2(a, b, &carry);
+   b = carry;
+
+   word top = (b >> (BOTAN_MP_WORD_BITS-1));
+   b <<= 1;
+   b |= (a >> (BOTAN_MP_WORD_BITS-1));
+   a <<= 1;
+
+   carry = 0;
+   *w0 = word_add(*w0, a, &carry);
+   *w1 = word_add(*w1, b, &carry);
+   *w2 = word_add(*w2, top, &carry);
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_karat.cpp b/src/math/mp/mp_karat.cpp
new file mode 100644
index 000000000..8ae346f1e
--- /dev/null
+++ b/src/math/mp/mp_karat.cpp
@@ -0,0 +1,340 @@
+/*
+* Karatsuba Multiplication/Squaring
+* (C) 1999-2008 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/mem_ops.h>
+#include <botan/internal/mp_asmi.h>
+
+namespace Botan {
+
+namespace {
+
+/*
+* Karatsuba Multiplication Operation
+*/
+void karatsuba_mul(word z[], const word x[], const word y[], u32bit N,
+                   word workspace[])
+   {
+   if(N == 6)
+      bigint_comba_mul6(z, x, y);
+   else if(N == 8)
+      bigint_comba_mul8(z, x, y);
+   else if(N == 16)
+      bigint_comba_mul16(z, x, y);
+   else if(N < BOTAN_KARAT_MUL_THRESHOLD || N % 2)
+      bigint_simple_mul(z, x, N, y, N);
+   else
+      {
+      const u32bit N2 = N / 2;
+
+      const word* x0 = x;
+      const word* x1 = x + N2;
+      const word* y0 = y;
+      const word* y1 = y + N2;
+      word* z0 = z;
+      word* z1 = z + N;
+
+      const s32bit cmp0 = bigint_cmp(x0, N2, x1, N2);
+      const s32bit cmp1 = bigint_cmp(y1, N2, y0, N2);
+
+      clear_mem(workspace, 2*N);
+
+      if(cmp0 && cmp1)
+         {
+         if(cmp0 > 0)
+            bigint_sub3(z0, x0, N2, x1, N2);
+         else
+            bigint_sub3(z0, x1, N2, x0, N2);
+
+         if(cmp1 > 0)
+            bigint_sub3(z1, y1, N2, y0, N2);
+         else
+            bigint_sub3(z1, y0, N2, y1, N2);
+
+         karatsuba_mul(workspace, z0, z1, N2, workspace+N);
+         }
+
+      karatsuba_mul(z0, x0, y0, N2, workspace+N);
+      karatsuba_mul(z1, x1, y1, N2, workspace+N);
+
+      const u32bit blocks_of_8 = N - (N % 8);
+
+      word carry = 0;
+
+      for(u32bit j = 0; j != blocks_of_8; j += 8)
+         carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+
+      for(u32bit j = blocks_of_8; j != N; ++j)
+         workspace[N + j] = word_add(z0[j], z1[j], &carry);
+
+      word carry2 = 0;
+
+      for(u32bit j = 0; j != blocks_of_8; j += 8)
+         carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+
+      for(u32bit j = blocks_of_8; j != N; ++j)
+         z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+
+      z[N + N2] = word_add(z[N + N2], carry2, &carry);
+
+      if(carry)
+         for(u32bit j = 1; j != N2; ++j)
+            if(++z[N + N2 + j])
+               break;
+
+      if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
+         bigint_add2(z + N2, 2*N-N2, workspace, N);
+      else
+         bigint_sub2(z + N2, 2*N-N2, workspace, N);
+      }
+   }
+
+/*
+* Karatsuba Squaring Operation
+*/
+void karatsuba_sqr(word z[], const word x[], u32bit N, word workspace[])
+   {
+   if(N == 6)
+      bigint_comba_sqr6(z, x);
+   else if(N == 8)
+      bigint_comba_sqr8(z, x);
+   else if(N == 16)
+      bigint_comba_sqr16(z, x);
+   else if(N < BOTAN_KARAT_SQR_THRESHOLD || N % 2)
+      bigint_simple_sqr(z, x, N);
+   else
+      {
+      const u32bit N2 = N / 2;
+
+      const word* x0 = x;
+      const word* x1 = x + N2;
+      word* z0 = z;
+      word* z1 = z + N;
+
+      const s32bit cmp = bigint_cmp(x0, N2, x1, N2);
+
+      clear_mem(workspace, 2*N);
+
+      if(cmp)
+         {
+         if(cmp > 0)
+            bigint_sub3(z0, x0, N2, x1, N2);
+         else
+            bigint_sub3(z0, x1, N2, x0, N2);
+
+         karatsuba_sqr(workspace, z0, N2, workspace+N);
+         }
+
+      karatsuba_sqr(z0, x0, N2, workspace+N);
+      karatsuba_sqr(z1, x1, N2, workspace+N);
+
+      const u32bit blocks_of_8 = N - (N % 8);
+
+      word carry = 0;
+
+      for(u32bit j = 0; j != blocks_of_8; j += 8)
+         carry = word8_add3(workspace + N + j, z0 + j, z1 + j, carry);
+
+      for(u32bit j = blocks_of_8; j != N; ++j)
+         workspace[N + j] = word_add(z0[j], z1[j], &carry);
+
+      word carry2 = 0;
+
+      for(u32bit j = 0; j != blocks_of_8; j += 8)
+         carry2 = word8_add2(z + N2 + j, workspace + N + j, carry2);
+
+      for(u32bit j = blocks_of_8; j != N; ++j)
+         z[N2 + j] = word_add(z[N2 + j], workspace[N + j], &carry2);
+
+      z[N + N2] = word_add(z[N + N2], carry2, &carry);
+
+      if(carry)
+         for(u32bit j = 1; j != N2; ++j)
+            if(++z[N + N2 + j])
+               break;
+
+      if(cmp == 0)
+         bigint_add2(z + N2, 2*N-N2, workspace, N);
+      else
+         bigint_sub2(z + N2, 2*N-N2, workspace, N);
+      }
+   }
+
+/*
+* Pick a good size for the Karatsuba multiply
+*/
+u32bit karatsuba_size(u32bit z_size,
+                      u32bit x_size, u32bit x_sw,
+                      u32bit y_size, u32bit y_sw)
+   {
+   if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
+      return 0;
+
+   if(((x_size == x_sw) && (x_size % 2)) ||
+      ((y_size == y_sw) && (y_size % 2)))
+      return 0;
+
+   const u32bit start = (x_sw > y_sw) ? x_sw : y_sw;
+   const u32bit end = (x_size < y_size) ? x_size : y_size;
+
+   if(start == end)
+      {
+      if(start % 2)
+         return 0;
+      return start;
+      }
+
+   for(u32bit j = start; j <= end; ++j)
+      {
+      if(j % 2)
+         continue;
+
+      if(2*j > z_size)
+         return 0;
+
+      if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
+         {
+         if(j % 4 == 2 &&
+            (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
+            return j+2;
+         return j;
+         }
+      }
+
+   return 0;
+   }
+
+/*
+* Pick a good size for the Karatsuba squaring
+*/
+u32bit karatsuba_size(u32bit z_size, u32bit x_size, u32bit x_sw)
+   {
+   if(x_sw == x_size)
+      {
+      if(x_sw % 2)
+         return 0;
+      return x_sw;
+      }
+
+   for(u32bit j = x_sw; j <= x_size; ++j)
+      {
+      if(j % 2)
+         continue;
+
+      if(2*j > z_size)
+         return 0;
+
+      if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
+         return j+2;
+      return j;
+      }
+
+   return 0;
+   }
+
+}
+
+/*
+* Multiplication Algorithm Dispatcher
+*/
+void bigint_mul(word z[], u32bit z_size, word workspace[],
+                const word x[], u32bit x_size, u32bit x_sw,
+                const word y[], u32bit y_size, u32bit y_sw)
+   {
+   if(x_sw == 1)
+      {
+      bigint_linmul3(z, y, y_sw, x[0]);
+      }
+   else if(y_sw == 1)
+      {
+      bigint_linmul3(z, x, x_sw, y[0]);
+      }
+   else if(x_sw <= 4 && x_size >= 4 &&
+           y_sw <= 4 && y_size >= 4 && z_size >= 8)
+      {
+      bigint_comba_mul4(z, x, y);
+      }
+   else if(x_sw <= 6 && x_size >= 6 &&
+           y_sw <= 6 && y_size >= 6 && z_size >= 12)
+      {
+      bigint_comba_mul6(z, x, y);
+      }
+   else if(x_sw <= 8 && x_size >= 8 &&
+           y_sw <= 8 && y_size >= 8 && z_size >= 16)
+      {
+      bigint_comba_mul8(z, x, y);
+      }
+   else if(x_sw <= 16 && x_size >= 16 &&
+           y_sw <= 16 && y_size >= 16 && z_size >= 32)
+      {
+      bigint_comba_mul16(z, x, y);
+      }
+   else if(x_sw < BOTAN_KARAT_MUL_THRESHOLD ||
+           y_sw < BOTAN_KARAT_MUL_THRESHOLD ||
+           !workspace)
+      {
+      bigint_simple_mul(z, x, x_sw, y, y_sw);
+      }
+   else
+      {
+      const u32bit N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
+
+      if(N)
+         {
+         clear_mem(workspace, 2*N);
+         karatsuba_mul(z, x, y, N, workspace);
+         }
+      else
+         bigint_simple_mul(z, x, x_sw, y, y_sw);
+      }
+   }
+
+/*
+* Squaring Algorithm Dispatcher
+*/
+void bigint_sqr(word z[], u32bit z_size, word workspace[],
+                const word x[], u32bit x_size, u32bit x_sw)
+   {
+   if(x_sw == 1)
+      {
+      bigint_linmul3(z, x, x_sw, x[0]);
+      }
+   else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
+      {
+      bigint_comba_sqr4(z, x);
+      }
+   else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
+      {
+      bigint_comba_sqr6(z, x);
+      }
+   else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
+      {
+      bigint_comba_sqr8(z, x);
+      }
+   else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
+      {
+      bigint_comba_sqr16(z, x);
+      }
+   else if(x_size < BOTAN_KARAT_SQR_THRESHOLD || !workspace)
+      {
+      bigint_simple_sqr(z, x, x_sw);
+      }
+   else
+      {
+      const u32bit N = karatsuba_size(z_size, x_size, x_sw);
+
+      if(N)
+         {
+         clear_mem(workspace, 2*N);
+         karatsuba_sqr(z, x, N, workspace);
+         }
+      else
+         bigint_simple_sqr(z, x, x_sw);
+      }
+   }
+
+}
diff --git a/src/math/mp/mp_misc.cpp b/src/math/mp/mp_misc.cpp
new file mode 100644
index 000000000..77b8e6f51
--- /dev/null
+++ b/src/math/mp/mp_misc.cpp
@@ -0,0 +1,102 @@
+/*
+* MP Misc Functions
+* (C) 1999-2008 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/internal/mp_asm.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Core Division Operation
+*/
+u32bit bigint_divcore(word q, word y2, word y1,
+                      word x3, word x2, word x1)
+   {
+   // Compute (y2,y1) * q
+
+   word y3 = 0;
+   y1 = word_madd2(q, y1, &y3);
+   y2 = word_madd2(q, y2, &y3);
+
+   // Return (y3,y2,y1) >? (x3,x2,x1)
+
+   if(y3 > x3) return 1;
+   if(y3 < x3) return 0;
+   if(y2 > x2) return 1;
+   if(y2 < x2) return 0;
+   if(y1 > x1) return 1;
+   if(y1 < x1) return 0;
+   return 0;
+   }
+
+/*
+* Compare two MP integers
+*/
+s32bit bigint_cmp(const word x[], u32bit x_size,
+                  const word y[], u32bit y_size)
+   {
+   if(x_size < y_size) { return (-bigint_cmp(y, y_size, x, x_size)); }
+
+   while(x_size > y_size)
+      {
+      if(x[x_size-1])
+         return 1;
+      x_size--;
+      }
+
+   for(u32bit j = x_size; j > 0; --j)
+      {
+      if(x[j-1] > y[j-1])
+         return 1;
+      if(x[j-1] < y[j-1])
+         return -1;
+      }
+
+   return 0;
+   }
+
+/*
+* Do a 2-word/1-word Division
+*/
+word bigint_divop(word n1, word n0, word d)
+   {
+   word high = n1 % d, quotient = 0;
+
+   for(u32bit j = 0; j != MP_WORD_BITS; ++j)
+      {
+      word high_top_bit = (high & MP_WORD_TOP_BIT);
+
+      high <<= 1;
+      high |= (n0 >> (MP_WORD_BITS-1-j)) & 1;
+      quotient <<= 1;
+
+      if(high_top_bit || high >= d)
+         {
+         high -= d;
+         quotient |= 1;
+         }
+      }
+
+   return quotient;
+   }
+
+/*
+* Do a 2-word/1-word Modulo
+*/
+word bigint_modop(word n1, word n0, word d)
+   {
+   word z = bigint_divop(n1, n0, d);
+   word dummy = 0;
+   z = word_madd2(z, d, &dummy);
+   return (n0-z);
+   }
+
+}
+
+}
diff --git a/src/math/mp/mp_msvc64/info.txt b/src/math/mp/mp_msvc64/info.txt
new file mode 100644
index 000000000..56ae05927
--- /dev/null
+++ b/src/math/mp/mp_msvc64/info.txt
@@ -0,0 +1,17 @@
+load_on dep
+
+mp_bits 64
+
+<header:internal>
+mp_asm.h
+mp_generic:mp_asmi.h
+</header:internal>
+
+<arch>
+amd64
+ia64
+</arch>
+
+<cc>
+msvc
+</cc>
diff --git a/src/math/mp/mp_msvc64/mp_asm.h b/src/math/mp/mp_msvc64/mp_asm.h
new file mode 100644
index 000000000..8e4535c35
--- /dev/null
+++ b/src/math/mp/mp_msvc64/mp_asm.h
@@ -0,0 +1,61 @@
+/*
+* Multiply-Add for 64-bit MSVC
+* (C) 2010 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MP_ASM_H__
+#define BOTAN_MP_ASM_H__
+
+#include <botan/mp_types.h>
+#include <intrin.h>
+
+#if (BOTAN_MP_WORD_BITS != 64)
+   #error The mp_msvc64 module requires that BOTAN_MP_WORD_BITS == 64
+#endif
+
+#pragma intrinsic(_umul128)
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Word Multiply
+*/
+inline word word_madd2(word a, word b, word* c)
+   {
+   word hi, lo;
+   lo = _umul128(a, b, &hi);
+
+   lo += *c;
+   hi += (lo < *c); // carry?
+
+   *c = hi;
+   return lo;
+   }
+
+/*
+* Word Multiply/Add
+*/
+inline word word_madd3(word a, word b, word c, word* d)
+   {
+   word hi, lo;
+   lo = _umul128(a, b, &hi);
+
+   lo += c;
+   hi += (lo < c); // carry?
+
+   lo += *d;
+   hi += (lo < *d); // carry?
+
+   *d = hi;
+   return lo;
+   }
+
+}
+
+}
+
+#endif
diff --git a/src/math/mp/mp_shift.cpp b/src/math/mp/mp_shift.cpp
new file mode 100644
index 000000000..f1d609bfb
--- /dev/null
+++ b/src/math/mp/mp_shift.cpp
@@ -0,0 +1,138 @@
+/*
+* MP Shift Algorithms
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_core.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Single Operand Left Shift
+*/
+void bigint_shl1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift)
+   {
+   if(word_shift)
+      {
+      for(u32bit j = 1; j != x_size + 1; ++j)
+         x[(x_size - j) + word_shift] = x[x_size - j];
+      clear_mem(x, word_shift);
+      }
+
+   if(bit_shift)
+      {
+      word carry = 0;
+      for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j)
+         {
+         word temp = x[j];
+         x[j] = (temp << bit_shift) | carry;
+         carry = (temp >> (MP_WORD_BITS - bit_shift));
+         }
+      }
+   }
+
+/*
+* Single Operand Right Shift
+*/
+void bigint_shr1(word x[], u32bit x_size, u32bit word_shift, u32bit bit_shift)
+   {
+   if(x_size < word_shift)
+      {
+      clear_mem(x, x_size);
+      return;
+      }
+
+   if(word_shift)
+      {
+      copy_mem(x, x + word_shift, x_size - word_shift);
+      clear_mem(x + x_size - word_shift, word_shift);
+      }
+
+   if(bit_shift)
+      {
+      word carry = 0;
+
+      u32bit top = x_size - word_shift;
+
+      while(top >= 4)
+         {
+         word w = x[top-1];
+         x[top-1] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+
+         w = x[top-2];
+         x[top-2] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+
+         w = x[top-3];
+         x[top-3] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+
+         w = x[top-4];
+         x[top-4] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+
+         top -= 4;
+         }
+
+      while(top)
+         {
+         word w = x[top-1];
+         x[top-1] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+
+         top--;
+         }
+      }
+   }
+
+/*
+* Two Operand Left Shift
+*/
+void bigint_shl2(word y[], const word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift)
+   {
+   for(u32bit j = 0; j != x_size; ++j)
+      y[j + word_shift] = x[j];
+   if(bit_shift)
+      {
+      word carry = 0;
+      for(u32bit j = word_shift; j != x_size + word_shift + 1; ++j)
+         {
+         word w = y[j];
+         y[j] = (w << bit_shift) | carry;
+         carry = (w >> (MP_WORD_BITS - bit_shift));
+         }
+      }
+   }
+
+/*
+* Two Operand Right Shift
+*/
+void bigint_shr2(word y[], const word x[], u32bit x_size,
+                 u32bit word_shift, u32bit bit_shift)
+   {
+   if(x_size < word_shift) return;
+
+   for(u32bit j = 0; j != x_size - word_shift; ++j)
+      y[j] = x[j + word_shift];
+   if(bit_shift)
+      {
+      word carry = 0;
+      for(u32bit j = x_size - word_shift; j > 0; --j)
+         {
+         word w = y[j-1];
+         y[j-1] = (w >> bit_shift) | carry;
+         carry = (w << (MP_WORD_BITS - bit_shift));
+         }
+      }
+   }
+
+}
+
+}
diff --git a/src/math/mp/mp_types.h b/src/math/mp/mp_types.h
new file mode 100644
index 000000000..1648713ed
--- /dev/null
+++ b/src/math/mp/mp_types.h
@@ -0,0 +1,33 @@
+/*
+* Low Level MPI Types
+* (C) 1999-2007 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#ifndef BOTAN_MPI_TYPES_H__
+#define BOTAN_MPI_TYPES_H__
+
+#include <botan/types.h>
+
+namespace Botan {
+
+#if (BOTAN_MP_WORD_BITS == 8)
+  typedef byte word;
+#elif (BOTAN_MP_WORD_BITS == 16)
+  typedef u16bit word;
+#elif (BOTAN_MP_WORD_BITS == 32)
+  typedef u32bit word;
+#elif (BOTAN_MP_WORD_BITS == 64)
+  typedef u64bit word;
+#else
+  #error BOTAN_MP_WORD_BITS must be 8, 16, 32, or 64
+#endif
+
+const word MP_WORD_MASK = ~static_cast<word>(0);
+const word MP_WORD_TOP_BIT = static_cast<word>(1) << (8*sizeof(word) - 1);
+const word MP_WORD_MAX = MP_WORD_MASK;
+
+}
+
+#endif
diff --git a/src/math/mp/mulop_generic/info.txt b/src/math/mp/mulop_generic/info.txt
new file mode 100644
index 000000000..548d0f44b
--- /dev/null
+++ b/src/math/mp/mulop_generic/info.txt
@@ -0,0 +1,5 @@
+load_on dep
+
+<source>
+mp_mulop.cpp
+</source>
diff --git a/src/math/mp/mulop_generic/mp_mulop.cpp b/src/math/mp/mulop_generic/mp_mulop.cpp
new file mode 100644
index 000000000..33ee2af32
--- /dev/null
+++ b/src/math/mp/mulop_generic/mp_mulop.cpp
@@ -0,0 +1,77 @@
+/*
+* Simple O(N^2) Multiplication and Squaring
+* (C) 1999-2008 Jack Lloyd
+*
+* Distributed under the terms of the Botan license
+*/
+
+#include <botan/internal/mp_asm.h>
+#include <botan/internal/mp_asmi.h>
+#include <botan/internal/mp_core.h>
+#include <botan/mem_ops.h>
+
+namespace Botan {
+
+extern "C" {
+
+/*
+* Simple O(N^2) Multiplication
+*/
+void bigint_simple_mul(word z[], const word x[], u32bit x_size,
+                                 const word y[], u32bit y_size)
+   {
+   const u32bit x_size_8 = x_size - (x_size % 8);
+
+   clear_mem(z, x_size + y_size);
+
+   for(u32bit i = 0; i != y_size; ++i)
+      {
+      const word y_i = y[i];
+
+      word carry = 0;
+
+      for(u32bit j = 0; j != x_size_8; j += 8)
+         carry = word8_madd3(z + i + j, x + j, y_i, carry);
+
+      for(u32bit j = x_size_8; j != x_size; ++j)
+         z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
+
+      z[x_size+i] = carry;
+      }
+   }
+
+/*
+* Simple O(N^2) Squaring
+
+This is exactly the same algorithm as bigint_simple_mul,
+however because C/C++ compilers suck at alias analysis it
+is good to have the version where the compiler knows
+that x == y
+
+There is an O(n^1.5) squaring algorithm specified in Handbook of
+Applied Cryptography, chapter 14
+*/
+void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
+   {
+   const u32bit x_size_8 = x_size - (x_size % 8);
+
+   clear_mem(z, 2*x_size);
+
+   for(u32bit i = 0; i != x_size; ++i)
+      {
+      const word x_i = x[i];
+      word carry = 0;
+
+      for(u32bit j = 0; j != x_size_8; j += 8)
+         carry = word8_madd3(z + i + j, x + j, x_i, carry);
+
+      for(u32bit j = x_size_8; j != x_size; ++j)
+         z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
+
+      z[x_size+i] = carry;
+      }
+   }
+
+}
+
+}
-- 
cgit v1.2.3