aboutsummaryrefslogtreecommitdiffstats
path: root/src/math/bigint/mulop_amd64/mp_mulop.cpp
blob: d1aa51489092ea34bd20a892ac997b035dcbd9f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*************************************************
* Simple O(N^2) Multiplication and Squaring      *
* (C) 1999-2008 Jack Lloyd                       *
*************************************************/

#include <botan/mp_asm.h>
#include <botan/mp_asmi.h>
#include <botan/mp_core.h>
#include <botan/mem_ops.h>

namespace Botan {

extern "C" {

/*************************************************
* Simple O(N^2) Multiplication                   *
*************************************************/
void bigint_simple_mul(word z[], const word x[], u32bit x_size,
                                 const word y[], u32bit y_size)
   {
   const u32bit blocks = x_size - (x_size % 8);

   clear_mem(z, x_size + y_size);

   for(u32bit i = 0; i != y_size; ++i)
      {
      word carry = 0;

      for(u32bit j = 0; j != blocks; j += 8)
         carry = word8_madd3(z + i + j, x + j, y[i], carry);

      for(u32bit j = blocks; j != x_size; ++j)
         z[i+j] = word_madd3(x[j], y[i], z[i+j], &carry);

      z[x_size+i] = carry;
      }
   }

inline word word_sqr(word x,

/*************************************************
* Simple O(N^2) Squaring

This is exactly the same algorithm as bigint_simple_mul,
however because C/C++ compilers suck at alias analysis it
is good to have the version where the compiler knows
that x == y
*************************************************/
void bigint_simple_sqr(word z[], const word x[], u32bit x_size)
   {
   clear_mem(z, 2*x_size);

   for(u32bit i = 0; i != x_size; ++i)
      {
      const word x_i = x[i];

      word carry = z[2*i];
      z[2*i] = word_madd2(x_i, x_i, z[2*i], &carry);

      for(u32bit j = i; j != x_size; ++j)
         {
         // z[i+j] = z[i+j] + 2 * x[j] * x_i + carry;

         /*
         load z[i+j] into register
         load x[j] into %hi
         mulq %[x_i] -> x[i] * x[j] -> %lo:%hi
         shlq %lo, $1

         // put carry bit (cf) from %lo into %temp
         xorl %temp
         adcq $0, %temp

         // high bit of lo now in cf
         shl %hi, $1
         // add in lowest bid from %lo
         orl %temp, %hi

         addq %[c], %[lo]
         adcq $0, %[hi]
         addq %[z_ij], %[lo]
         adcq $0, %[hi]

         */

         }

      z[x_size+i] = carry;
      }
   }

}

}