aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlloyd <[email protected]>2010-09-24 21:48:34 +0000
committerlloyd <[email protected]>2010-09-24 21:48:34 +0000
commitad279eccb6ae305e631a0bb9943957b02922a858 (patch)
treee4bbec1ffc49d61b4f84dbf833c582e3561a8801 /src
parent27126e7bf1ac37a9aebd7a03db86669759962289 (diff)
Delete obsolete asm versions
Diffstat (limited to 'src')
-rw-r--r--src/math/bigint/monty_amd64/info.txt30
-rw-r--r--src/math/bigint/monty_amd64/mp_monty.S399
-rw-r--r--src/math/bigint/mulop_amd64/info.txt30
-rw-r--r--src/math/bigint/mulop_amd64/mp_mulop.cpp96
-rw-r--r--src/math/bigint/mulop_amd64/mp_mulop_amd64.S130
-rw-r--r--src/math/bigint/mulop_ia32/info.txt32
-rw-r--r--src/math/bigint/mulop_ia32/mp_mulop.S64
7 files changed, 0 insertions, 781 deletions
diff --git a/src/math/bigint/monty_amd64/info.txt b/src/math/bigint/monty_amd64/info.txt
deleted file mode 100644
index bb16dbe8f..000000000
--- a/src/math/bigint/monty_amd64/info.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-mp_bits 64
-
-load_on never
-
-<source>
-mp_monty.S
-</source>
-
-<arch>
-amd64
-</arch>
-
-<cc>
-gcc
-icc
-</cc>
-
-# ELF systems
-<os>
-linux
-freebsd
-dragonfly
-netbsd
-openbsd
-solaris
-</os>
-
-<requires>
-asm_amd64
-</requires>
diff --git a/src/math/bigint/monty_amd64/mp_monty.S b/src/math/bigint/monty_amd64/mp_monty.S
deleted file mode 100644
index 9eab9f766..000000000
--- a/src/math/bigint/monty_amd64/mp_monty.S
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
-* Montgomery Reduction in x86-64 assembler
-* (C) 2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/asm_macr_amd64.h>
-
-START_LISTING(mp_monty.S)
-
-START_FUNCTION(bigint_monty_redc)
- pushq %r15 #
- pushq %r14 #
- pushq %r13 #
- pushq %r12 #
- pushq %rbp #
- pushq %rbx #
-
- movq %rdi, %r14 # z
- movq %rdx, %r12 # x
- movl %esi, %ebp # z_size
-
- xorl %esi, %esi # j.76
- movq %r8, -16(%rsp) # u, u
- movl %ecx, %ebx # x_size, x_size
- movl %ecx, %r8d # x_size, blocks_of_8
- andl $-8, %r8d #, blocks_of_8
- testl %ecx, %ecx # x_size
- je .L3 #,
- mov %ecx, %eax # x_size, pretmp.71
- leal 1(%rbx), %r15d #, k.73
- salq $3, %rax #,
- xorl %r13d, %r13d # j
- movq %rax, -8(%rsp) #, pretmp.21
- .p2align 4,,10
- .p2align 3
-.L11:
- mov %r13d, %eax # j, j
- movq -16(%rsp), %rdi # u, y
- leaq (%r14,%rax,8), %r11 #, z_j
- xorl %r9d, %r9d # i
- imulq (%r11), %rdi #* z_j, y
- xorl %r10d, %r10d # carry
- testl %r8d, %r8d # blocks_of_8
- je .L7 #,
- .p2align 4,,10
- .p2align 3
-.LOOP_MUL_ADD:
- mov %r9d, %ecx # i, i
- addl $8, %r9d #, i
- salq $3, %rcx #, D.2315
- leaq (%r11,%rcx), %rsi #, tmp130
- leaq (%r12,%rcx), %rcx #, tmp131
-
- movq 8*0(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*0(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*0 (%rsi)
-
- movq 8*1(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*1(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*1 (%rsi)
-
- movq 8*2(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*2(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*2 (%rsi)
-
- movq 8*3(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*3(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*3 (%rsi)
-
- movq 8*4(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*4(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*4 (%rsi)
-
- movq 8*5(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*5(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*5 (%rsi)
-
- movq 8*6(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*6(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*6 (%rsi)
-
- movq 8*7(%rcx), %rax
- mulq %rdi # y
- addq %r10, %rax # carry
- adcq $0,%rdx
- addq 8*7(%rsi), %rax
- adcq $0,%rdx
- movq %rdx,%r10 # carry
- movq %rax, 8*7 (%rsi)
-
- cmpl %r9d, %r8d # i, blocks_of_8
- jne .LOOP_MUL_ADD #,
- cmpl %r8d, %ebx # blocks_of_8, x_size
- je .L8 #,
-.L7:
- movl %r8d, %esi # blocks_of_8, i
- .p2align 4,,10
- .p2align 3
-.L5:
- mov %esi, %eax # i, i
- movq %rdi, %rcx # y, b
- leaq (%r11, %rax,8), %r9 #, D.2325
- incl %esi # i
- movq (%r12, %rax,8), %rax #* x, tmp133
-
- mulq %rcx # b
- addq (%r9), %rax #* D.2325, a
- adcq $0,%rdx #
- addq %r10, %rax # carry, a
- adcq $0,%rdx #
-
- cmpl %esi, %ebx # i, x_size
- movq %rdx, %r10 #, carry
- movq %rax, (%r9) # a,* D.2325
- jne .L5 #,
-.L8:
- movq -8(%rsp), %rdx # pretmp.21,
- leaq (%r11,%rdx), %rax #, D.2332
- movq (%rax), %rcx #* D.2332, D.2333
- leaq (%r10,%rcx), %rdx #, z_sum
- movq %rdx, (%rax) # z_sum,* D.2332
- cmpq %rdx, %rcx # z_sum, D.2333
- jbe .L9 #,
- cmpl %ebp, %r15d # z_size, k.73
- je .L9 #,
- movl %r15d, %ecx # k.73, k
- jmp .L10 #
- .p2align 4,,10
- .p2align 3
-.L31:
- incl %ecx # k
- cmpl %ecx, %ebp # k, z_size
- .p2align 4,,4
- .p2align 3
- je .L9 #,
-.L10:
- mov %ecx, %edx # k, k
- leaq (%r11,%rdx,8), %rdx #, D.2342
- movq (%rdx), %rax #* D.2342, tmp136
- incq %rax # D.2344
- movq %rax, (%rdx) # D.2344,* D.2342
- testq %rax, %rax # D.2344
- je .L31 #,
-.L9:
- incl %r13d # j
- decl %ebp # z_size
- cmpl %r13d, %ebx # j, x_size
- jne .L11 #,
- movl %ebx, %esi # x_size, j.76
-.L3:
- leal (%rbx,%rbx), %eax #, tmp137
- mov %eax, %eax
- leaq (%r14, %rax,8), %rdi #, D.2349
- cmpq $0, (%rdi) #,* D.2349
- jne .L12 #,
- testl %ebx, %ebx # x_size
- je .L12 #,
- leal -1(%rbx), %ecx #, j
- leal (%rsi,%rcx), %edx #, tmp141
- mov %ecx, %eax # j, j
- movq (%r14,%rdx,8), %rbp #* z,
- cmpq %rbp, (%r12, %rax,8) #,* x
- jb .L12 #,
- ja .L_EXIT #,
- leal -2(%rsi,%rbx), %edx #, ivtmp.45
- jmp .L14 #
- .p2align 4,,10
- .p2align 3
-.L15:
- mov %edx, %eax # ivtmp.45, ivtmp.45
- decl %ecx # j
- movq (%r14, %rax,8), %rsi #* z, D.2360
- mov %ecx, %eax # j, j
- movq (%r12, %rax,8), %rax #* x, temp.68
- cmpq %rax, %rsi
- ja .L12 #,
- decl %edx # ivtmp.45
- cmpq %rax, %rsi
- jb .L_EXIT #,
-.L14:
- testl %ecx, %ecx # j
- jne .L15 #,
-.L12:
- xorl %ecx, %ecx # j
- xorl %r10d, %r10d # carry
- mov %ebx, %esi # x_size, pretmp.19
- testl %r8d, %r8d # blocks_of_8
- je .L17 #,
- .p2align 4,,10
- .p2align 3
-.L22:
- mov %ecx, %edx # j, D.2375
- addl $8, %ecx #, j
- leaq (%rdx,%rsi), %rax #, tmp146
- leaq (%r12,%rdx,8), %rdx #, tmp150
- leaq (%r14, %rax,8), %rax #, tmp148
-
- rorq %r10 # carry
-
- movq 8*0(%rdx), %r10
- sbbq %r10, 8*0(%rax)
-
- movq 8*1(%rdx), %r10
- sbbq %r10, 8*1(%rax)
-
- movq 8*2(%rdx), %r10
- sbbq %r10, 8*2(%rax)
-
- movq 8*3(%rdx), %r10
- sbbq %r10, 8*3(%rax)
-
- movq 8*4(%rdx), %r10
- sbbq %r10, 8*4(%rax)
-
- movq 8*5(%rdx), %r10
- sbbq %r10, 8*5(%rax)
-
- movq 8*6(%rdx), %r10
- sbbq %r10, 8*6(%rax)
-
- movq 8*7(%rdx), %r10
- sbbq %r10, 8*7(%rax)
-
- sbbq %r10,%r10 # carry
- negq %r10 # carry
-
- cmpl %ecx, %r8d # j, blocks_of_8
- jne .L22 #,
-.L17:
- cmpl %r8d, %ebx # blocks_of_8, x_size
- je .L19 #,
- leal (%r8,%rbx), %r9d #, ivtmp.33
- movl %r8d, %esi # blocks_of_8, j
- .p2align 4,,10
- .p2align 3
-.L20:
- mov %r9d, %eax # ivtmp.33, ivtmp.33
- mov %esi, %ecx # j, j
- leaq (%r14, %rax,8), %rax #, D.2387
- incl %esi # j
- movq (%rax), %rdx #* D.2387, tmp153
- incl %r9d # ivtmp.33
-
- rorq %r10 # carry
- sbbq (%r12,%rcx,8),%rdx #* x, x
- sbbq %r10,%r10 # carry
- negq %r10 # carry
-
- cmpl %esi, %ebx # j, x_size
- movq %rdx, (%rax) # x,* D.2387
- jne .L20 #,
-.L19:
- testq %r10, %r10 # carry
- je .L_EXIT #,
- decq (%rdi) #* D.2349
-.L_EXIT:
- popq %rbx #
- popq %rbp #
- popq %r12 #
- popq %r13 #
- popq %r14 #
- popq %r15 #
-END_FUNCTION(bigint_monty_redc)
-
-
-#if 0
- #define Z_ARR ARG_1 // rdi
-#define Z_SIZE ARG_2_32 // esi
-// X_ARR is ARG_3 == rdx, moved b/c needed for multiply
-#define X_SIZE ARG_4_32 // ecx
-#define U ARG_5 // r8
-
-/*
- We need all arguments for a while (we can reuse U eventually)
- So only temp registers are
- TEMP_1 %r10
- TEMP_2 %r11
- TEMP_3 = ARG_6 = %r9
- void return, so also
- R0 %rax (aka TEMP_9)
- is free (but needed for multiply)
-
- Can push:
- %rbx (base pointer, callee saved)
- %rpb (frame pointer, callee saved)
- %r12-%r15 (callee saved)
-
- Can push base/frame pointers since this is a leaf function
- and does not reference any data.
-*/
-
- push %r12
- push %r13
- push %r14
- push %r15
-
-#define LOOP_CTR_I %r12
-#define LOOP_CTR_J %r13
-
-#define CARRY TEMP_1
-#define Z_WORD TEMP_2
-#define X_ARR TEMP_3
-#define MUL_LO %rax
-#define MUL_HI %rdx
-
- ASSIGN(X_ARR, ARG_3)
-
- /*
- ZEROIZE(CARRY)
-
- ASSIGN(LOOP_CTR, X_SIZE)
-
- JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
- JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1)
-
-#define MULADD_OP(N) \
- ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \
- ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \
- MUL(Y) ; \
- ADD(Z_WORD, CARRY) ; \
- ASSIGN(CARRY, MUL_HI) ; \
- ADD_LAST_CARRY(CARRY) ; \
- ADD(Z_WORD, MUL_LO) ; \
- ADD_LAST_CARRY(CARRY) ; \
- ASSIGN(ARRAY8(Z_ARR, N), Z_WORD)
-
-ALIGN
-.LOOP_MULADD8:
- MULADD_OP(0)
- MULADD_OP(1)
- MULADD_OP(2)
- MULADD_OP(3)
- MULADD_OP(4)
- MULADD_OP(5)
- MULADD_OP(6)
- MULADD_OP(7)
-
- SUB_IMM(LOOP_CTR, 8)
- ADD_IMM(Z_ARR, 64)
- ADD_IMM(X_ARR, 64)
- cmp IMM(8), LOOP_CTR
- jge .LOOP_MULADD8
-
- JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
-
-ALIGN
-.LOOP_MULADD1:
- MULADD_OP(0)
-
- SUB_IMM(LOOP_CTR, 1)
- ADD_IMM(Z_ARR, 8)
- ADD_IMM(X_ARR, 8)
-
- cmp IMM(0), LOOP_CTR
- jne .LOOP_MULADD1
-*/
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-#endif
diff --git a/src/math/bigint/mulop_amd64/info.txt b/src/math/bigint/mulop_amd64/info.txt
deleted file mode 100644
index 4a82ff5be..000000000
--- a/src/math/bigint/mulop_amd64/info.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-mp_bits 64
-
-load_on never
-
-<source>
-mp_mulop_amd64.S
-</source>
-
-<arch>
-amd64
-</arch>
-
-<cc>
-gcc
-icc
-</cc>
-
-# ELF systems
-<os>
-linux
-freebsd
-dragonfly
-netbsd
-openbsd
-solaris
-</os>
-
-<requires>
-asm_amd64
-</requires>
diff --git a/src/math/bigint/mulop_amd64/mp_mulop.cpp b/src/math/bigint/mulop_amd64/mp_mulop.cpp
deleted file mode 100644
index e2f38a104..000000000
--- a/src/math/bigint/mulop_amd64/mp_mulop.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-* Simple O(N^2) Multiplication and Squaring
-* (C) 1999-2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/mp_asm.h>
-#include <botan/internal/mp_asmi.h>
-#include <botan/internal/mp_core.h>
-#include <botan/mem_ops.h>
-
-namespace Botan {
-
-extern "C" {
-
-/*
-* Simple O(N^2) Multiplication
-*/
-void bigint_simple_mul(word z[], const word x[], u32bit x_size,
- const word y[], u32bit y_size)
- {
- const u32bit blocks = x_size - (x_size % 8);
-
- clear_mem(z, x_size + y_size);
-
- for(u32bit i = 0; i != y_size; ++i)
- {
- word carry = 0;
-
- for(u32bit j = 0; j != blocks; j += 8)
- carry = word8_madd3(z + i + j, x + j, y[i], carry);
-
- for(u32bit j = blocks; j != x_size; ++j)
- z[i+j] = word_madd3(x[j], y[i], z[i+j], &carry);
-
- z[x_size+i] = carry;
- }
- }
-
-inline word word_sqr(word x,
-
-/*
-* Simple O(N^2) Squaring
-
-This is exactly the same algorithm as bigint_simple_mul,
-however because C/C++ compilers suck at alias analysis it
-is good to have the version where the compiler knows
-that x == y
-*/
-void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
- {
- clear_mem(z, 2*x_size);
-
- for(u32bit i = 0; i != x_size; ++i)
- {
- const word x_i = x[i];
-
- word carry = z[2*i];
- z[2*i] = word_madd2(x_i, x_i, z[2*i], &carry);
-
- for(u32bit j = i; j != x_size; ++j)
- {
- // z[i+j] = z[i+j] + 2 * x[j] * x_i + carry;
-
- /*
- load z[i+j] into register
- load x[j] into %hi
- mulq %[x_i] -> x[i] * x[j] -> %lo:%hi
- shlq %lo, $1
-
- // put carry bit (cf) from %lo into %temp
- xorl %temp
- adcq $0, %temp
-
- // high bit of lo now in cf
- shl %hi, $1
- // add in lowest bid from %lo
- orl %temp, %hi
-
- addq %[c], %[lo]
- adcq $0, %[hi]
- addq %[z_ij], %[lo]
- adcq $0, %[hi]
-
- */
-
- }
-
- z[x_size+i] = carry;
- }
- }
-
-}
-
-}
diff --git a/src/math/bigint/mulop_amd64/mp_mulop_amd64.S b/src/math/bigint/mulop_amd64/mp_mulop_amd64.S
deleted file mode 100644
index d0bb3325f..000000000
--- a/src/math/bigint/mulop_amd64/mp_mulop_amd64.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
-* Simple O(N^2) Multiplication and Squaring
-* (C) 1999-2008 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/asm_macr_amd64.h>
-
-START_LISTING(mp_mulop.S)
-
-#if 0
-void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
- {
- const u32bit blocks = x_size - (x_size % 8);
-
- clear_mem(z, 2*x_size);
-
- for(u32bit i = 0; i != x_size; ++i)
- {
- word carry = 0;
-
- /*
- for(u32bit j = 0; j != blocks; j += 8)
- carry = word8_madd3(z + i + j, x + j, x[i], carry);
-
- for(u32bit j = blocks; j != x_size; ++j)
- z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
- */
-
-
- for(u32bit j = 0; j != x_size; ++j)
- z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
-
- for(u32bit j = 0; j != x_size; ++j)
- {
- dword z = (dword)a * b + c + *d;
- *d = (word)(z >> BOTAN_MP_WORD_BITS);
- return (word)z;
- }
-
-
-
- z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
-
- }
-
-
-
- z[x_size+i] = carry;
- }
- }
-
-#endif
-
-START_FUNCTION(bigint_simple_sqr)
-
-#define Z_ARR ARG_1
-#define X_ARR ARG_2
-//#define X_SIZE ARG_3_32
-
-#define CARRY TEMP_1
-#define Z_WORD TEMP_2
-#define LOOP_I TEMP_3
-#define LOOP_J TEMP_4
-#define X_SIZE TEMP_5
-#define MUL_LO %rax
-// arg 3, xsize
-#define MUL_HI %rdx
-
-// need arg3 == rdx for multiply
- ASSIGN(X_SIZE, ARG3_32)
-
- ZEROIZE(CARRY)
-
- ZEROIZE(LOOP_I)
-
-.LOOP_ZEROIZE_Z:
-
- cmp LOOP_I, X_SIZE
-
-
-
-
- JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
- JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1)
-
-#define MULADD_OP(N) \
- ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \
- ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \
- MUL(Y) ; \
- ADD(Z_WORD, CARRY) ; \
- ASSIGN(CARRY, MUL_HI) ; \
- ADD_LAST_CARRY(CARRY) ; \
- ADD(Z_WORD, MUL_LO) ; \
- ADD_LAST_CARRY(CARRY) ; \
- ASSIGN(ARRAY8(Z_ARR, N), Z_WORD)
-
-.LOOP_MULADD8:
- MULADD_OP(0)
- MULADD_OP(1)
- MULADD_OP(2)
- MULADD_OP(3)
- MULADD_OP(4)
- MULADD_OP(5)
- MULADD_OP(6)
- MULADD_OP(7)
-
- SUB_IMM(LOOP_CTR, 8)
- ADD_IMM(Z_ARR, 64)
- ADD_IMM(X_ARR, 64)
- cmp IMM(8), LOOP_CTR
- jge .LOOP_MULADD8
-
- JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
-
-ALIGN
-.LOOP_MULADD1:
- MULADD_OP(0)
-
- SUB_IMM(LOOP_CTR, 1)
- ADD_IMM(Z_ARR, 8)
- ADD_IMM(X_ARR, 8)
-
- cmp IMM(0), LOOP_CTR
- jne .LOOP_MULADD1
-
-.L_MULADD_DONE:
- RETURN_VALUE_IS(CARRY)
-END_FUNCTION(bigint_simple_square)
diff --git a/src/math/bigint/mulop_ia32/info.txt b/src/math/bigint/mulop_ia32/info.txt
deleted file mode 100644
index 7afa2ca2f..000000000
--- a/src/math/bigint/mulop_ia32/info.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-mp_bits 32
-
-# Out of date, still implements bigint_mul_add_words
-
-load_on request
-
-<source>
-mp_mulop.S
-</source>
-
-<arch>
-ia32
-</arch>
-
-<cc>
-gcc
-icc
-</cc>
-
-# ELF systems
-<os>
-linux
-freebsd
-dragonfly
-netbsd
-openbsd
-solaris
-</os>
-
-<requires>
-asm_ia32
-</requires>
diff --git a/src/math/bigint/mulop_ia32/mp_mulop.S b/src/math/bigint/mulop_ia32/mp_mulop.S
deleted file mode 100644
index 34cd58112..000000000
--- a/src/math/bigint/mulop_ia32/mp_mulop.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-* Multiply/Add Algorithm in IA-32 assembler
-* (C) 1999-2007 Jack Lloyd
-*
-* Distributed under the terms of the Botan license
-*/
-
-#include <botan/internal/asm_macr_ia32.h>
-
-START_LISTING(mp_muladd.S)
-
-START_FUNCTION(bigint_mul_add_words)
- SPILL_REGS()
-#define PUSHED 4
-
-#define LOOP_CTR ESI
- ASSIGN(LOOP_CTR, ARG(3)) /* x_size */
- ZEROIZE(EDI)
-
- ASSIGN(ECX, ARG(1)) /* z[] */
- ASSIGN(EBX, ARG(2)) /* x[] */
- ASSIGN(EBP, ARG(4)) /* y */
-
-#define MULADD_OP(N) \
- ASSIGN(EAX, ARRAY4(EBX, N)) ; \
- MUL(EBP) ; \
- ADD_W_CARRY(EAX, EDX, EDI) ; \
- ASSIGN(EDI, EDX) ; \
- ADD_W_CARRY(ARRAY4(ECX, N), EDI, EAX) ;
-
- JUMP_IF_ZERO(LOOP_CTR, .MUL_ADD_DONE)
- JUMP_IF_LT(LOOP_CTR, 8, .MULADD1_LOOP)
-
-START_LOOP(.MULADD8)
- MULADD_OP(0)
- MULADD_OP(1)
- MULADD_OP(2)
- MULADD_OP(3)
- MULADD_OP(4)
- MULADD_OP(5)
- MULADD_OP(6)
- MULADD_OP(7)
-
- SUB_IMM(LOOP_CTR, 8)
- ADD_IMM(EBX, 32)
- ADD_IMM(ECX, 32)
-LOOP_UNTIL_LT(LOOP_CTR, 8, .MULADD8)
-
- JUMP_IF_ZERO(LOOP_CTR, .MUL_ADD_DONE)
-
-START_LOOP(.MULADD1)
- MULADD_OP(0)
-
- SUB_IMM(LOOP_CTR, 1)
- ADD_IMM(EBX, 4)
- ADD_IMM(ECX, 4)
-LOOP_UNTIL_EQ(LOOP_CTR, 0, .MULADD1)
-
-.MUL_ADD_DONE:
-
- ASSIGN(EAX, EDI)
-#undef PUSHED
- RESTORE_REGS()
-END_FUNCTION(bigint_mul_add_words)