1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/*************************************************
* Simple O(N^2) Multiplication and Squaring *
* (C) 1999-2008 Jack Lloyd *
*************************************************/
#include <botan/asm_macr.h>
START_LISTING(mp_mulop.S)
#if 0
void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
{
const u32bit blocks = x_size - (x_size % 8);
clear_mem(z, 2*x_size);
for(u32bit i = 0; i != x_size; ++i)
{
word carry = 0;
/*
for(u32bit j = 0; j != blocks; j += 8)
carry = word8_madd3(z + i + j, x + j, x[i], carry);
for(u32bit j = blocks; j != x_size; ++j)
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
*/
for(u32bit j = 0; j != x_size; ++j)
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
for(u32bit j = 0; j != x_size; ++j)
{
dword z = (dword)a * b + c + *d;
*d = (word)(z >> BOTAN_MP_WORD_BITS);
return (word)z;
}
z[i+j] = word_madd3(x[j], x[i], z[i+j], &carry);
}
z[x_size+i] = carry;
}
}
#endif
START_FUNCTION(bigint_simple_sqr)
#define Z_ARR ARG_1
#define X_ARR ARG_2
//#define X_SIZE ARG_3_32
#define CARRY TEMP_1
#define Z_WORD TEMP_2
#define LOOP_I TEMP_3
#define LOOP_J TEMP_4
#define X_SIZE TEMP_5
#define MUL_LO %rax
// arg 3, xsize
#define MUL_HI %rdx
// need arg3 == rdx for multiply
ASSIGN(X_SIZE, ARG3_32)
ZEROIZE(CARRY)
ZEROIZE(LOOP_I)
.LOOP_ZEROIZE_Z:
cmp LOOP_I, X_SIZE
JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
JUMP_IF_LT(LOOP_CTR, 8, .LOOP_MULADD1)
#define MULADD_OP(N) \
ASSIGN(MUL_LO, ARRAY8(X_ARR, N)) ; \
ASSIGN(Z_WORD, ARRAY8(Z_ARR, N)) ; \
MUL(Y) ; \
ADD(Z_WORD, CARRY) ; \
ASSIGN(CARRY, MUL_HI) ; \
ADD_LAST_CARRY(CARRY) ; \
ADD(Z_WORD, MUL_LO) ; \
ADD_LAST_CARRY(CARRY) ; \
ASSIGN(ARRAY8(Z_ARR, N), Z_WORD)
.LOOP_MULADD8:
MULADD_OP(0)
MULADD_OP(1)
MULADD_OP(2)
MULADD_OP(3)
MULADD_OP(4)
MULADD_OP(5)
MULADD_OP(6)
MULADD_OP(7)
SUB_IMM(LOOP_CTR, 8)
ADD_IMM(Z_ARR, 64)
ADD_IMM(X_ARR, 64)
cmp IMM(8), LOOP_CTR
jge .LOOP_MULADD8
JUMP_IF_ZERO(LOOP_CTR, .L_MULADD_DONE)
ALIGN
.LOOP_MULADD1:
MULADD_OP(0)
SUB_IMM(LOOP_CTR, 1)
ADD_IMM(Z_ARR, 8)
ADD_IMM(X_ARR, 8)
cmp IMM(0), LOOP_CTR
jne .LOOP_MULADD1
.L_MULADD_DONE:
RETURN_VALUE_IS(CARRY)
END_FUNCTION(bigint_simple_square)
|