1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
/*
* Simple O(N^2) Multiplication and Squaring
* (C) 1999-2008 Jack Lloyd
*
* Distributed under the terms of the Botan license
*/
#include <botan/asm_macr.h>
START_LISTING(mp_mulop.S)
#if 0
void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
{
const u32bit blocks = x_size - (x_size % 8);
clear_mem(z, 2*x_size);
for(u32bit i = 0; i != x_size; ++i)
{
word carry = 0;
/*
for(u32bit j = 0; j != blocks; j += 8)
carry = word8_madd3(z + i + j, x + j, x[i], carry);
for(u32bit j = blocks; j != x_size; ++j)
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
*/
for(u32bit j = 0; j != x_size; ++j)
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
for(u32bit j = 0; j != x_size; ++j)
{
dword z = (dword)a * b + c + *d;
*d = (word)(z >> BOTAN_MP_WORD_BITS);
return (word)z;
}
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
}
z[x_size+i] = carry;
}
}
#endif
START_FUNCTION(bigint_simple_sqr)
#define Z_ARR ARG_1
#define X_ARR ARG_2
//#define X_SIZE ARG_3_32
#define CARRY TEMP_1
#define Z_WORD TEMP_2
#define LOOP_I TEMP_3
#define LOOP_J TEMP_4
#define X_SIZE TEMP_5
#define MUL_LO %rax
// arg 3, xsize
#define MUL_HI %rdx
// need arg3 == rdx for multiply
ASSIGN(X_SIZE, ARG3_32)
ZEROIZE(CARRY)
ZEROIZE(LOOP_I)
.LOOP_ZEROIZE_Z:
cmp LOOP_I, X_SIZE
JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1)
#define MULADD_OP(N) \
ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \
ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \
MUL(Y) ; \
ADD(Z_WORD, CARRY) ; \
ASSIGN(CARRY, MUL_HI) ; \
ADD_LAST_CARRY(CARRY) ; \
ADD(Z_WORD, MUL_LO) ; \
ADD_LAST_CARRY(CARRY) ; \
ASSIGN(ARRAY8(Z_ARR, N), Z_WORD)
.LOOP_MULADD8:
MULADD_OP(0)
MULADD_OP(1)
MULADD_OP(2)
MULADD_OP(3)
MULADD_OP(4)
MULADD_OP(5)
MULADD_OP(6)
MULADD_OP(7)
SUB_IMM(LOOP_CTR, 8)
ADD_IMM(Z_ARR, 64)
ADD_IMM(X_ARR, 64)
cmp IMM(8), LOOP_CTR
jge .LOOP_MULADD8
JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
ALIGN
.LOOP_MULADD1:
MULADD_OP(0)
SUB_IMM(LOOP_CTR, 1)
ADD_IMM(Z_ARR, 8)
ADD_IMM(X_ARR, 8)
cmp IMM(0), LOOP_CTR
jne .LOOP_MULADD1
.L_MULADD_DONE:
RETURN_VALUE_IS(CARRY)
END_FUNCTION(bigint_simple_square)
|