Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_decimal/libmpdec/convolute.c
12 views
1
/*
2
* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3
*
4
* Redistribution and use in source and binary forms, with or without
5
* modification, are permitted provided that the following conditions
6
* are met:
7
*
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
*
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
* SUCH DAMAGE.
26
*/
27
28
29
#include "mpdecimal.h"
30
#include "bits.h"
31
#include "constants.h"
32
#include "convolute.h"
33
#include "fnt.h"
34
#include "fourstep.h"
35
#include "numbertheory.h"
36
#include "sixstep.h"
37
#include "umodarith.h"
38
39
40
/* Bignum: Fast convolution using the Number Theoretic Transform. Used for
41
the multiplication of very large coefficients. */
42
43
44
/* Convolute the data in c1 and c2. Result is in c1. */
45
int
46
fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
47
{
48
int (*fnt)(mpd_uint_t *, mpd_size_t, int);
49
int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
50
#ifdef PPRO
51
double dmod;
52
uint32_t dinvmod[3];
53
#endif
54
mpd_uint_t n_inv, umod;
55
mpd_size_t i;
56
57
58
SETMODULUS(modnum);
59
n_inv = POWMOD(n, (umod-2));
60
61
if (ispower2(n)) {
62
if (n > SIX_STEP_THRESHOLD) {
63
fnt = six_step_fnt;
64
inv_fnt = inv_six_step_fnt;
65
}
66
else {
67
fnt = std_fnt;
68
inv_fnt = std_inv_fnt;
69
}
70
}
71
else {
72
fnt = four_step_fnt;
73
inv_fnt = inv_four_step_fnt;
74
}
75
76
if (!fnt(c1, n, modnum)) {
77
return 0;
78
}
79
if (!fnt(c2, n, modnum)) {
80
return 0;
81
}
82
for (i = 0; i < n-1; i += 2) {
83
mpd_uint_t x0 = c1[i];
84
mpd_uint_t y0 = c2[i];
85
mpd_uint_t x1 = c1[i+1];
86
mpd_uint_t y1 = c2[i+1];
87
MULMOD2(&x0, y0, &x1, y1);
88
c1[i] = x0;
89
c1[i+1] = x1;
90
}
91
92
if (!inv_fnt(c1, n, modnum)) {
93
return 0;
94
}
95
for (i = 0; i < n-3; i += 4) {
96
mpd_uint_t x0 = c1[i];
97
mpd_uint_t x1 = c1[i+1];
98
mpd_uint_t x2 = c1[i+2];
99
mpd_uint_t x3 = c1[i+3];
100
MULMOD2C(&x0, &x1, n_inv);
101
MULMOD2C(&x2, &x3, n_inv);
102
c1[i] = x0;
103
c1[i+1] = x1;
104
c1[i+2] = x2;
105
c1[i+3] = x3;
106
}
107
108
return 1;
109
}
110
111
/* Autoconvolute the data in c1. Result is in c1. */
112
int
113
fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
114
{
115
int (*fnt)(mpd_uint_t *, mpd_size_t, int);
116
int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
117
#ifdef PPRO
118
double dmod;
119
uint32_t dinvmod[3];
120
#endif
121
mpd_uint_t n_inv, umod;
122
mpd_size_t i;
123
124
125
SETMODULUS(modnum);
126
n_inv = POWMOD(n, (umod-2));
127
128
if (ispower2(n)) {
129
if (n > SIX_STEP_THRESHOLD) {
130
fnt = six_step_fnt;
131
inv_fnt = inv_six_step_fnt;
132
}
133
else {
134
fnt = std_fnt;
135
inv_fnt = std_inv_fnt;
136
}
137
}
138
else {
139
fnt = four_step_fnt;
140
inv_fnt = inv_four_step_fnt;
141
}
142
143
if (!fnt(c1, n, modnum)) {
144
return 0;
145
}
146
for (i = 0; i < n-1; i += 2) {
147
mpd_uint_t x0 = c1[i];
148
mpd_uint_t x1 = c1[i+1];
149
MULMOD2(&x0, x0, &x1, x1);
150
c1[i] = x0;
151
c1[i+1] = x1;
152
}
153
154
if (!inv_fnt(c1, n, modnum)) {
155
return 0;
156
}
157
for (i = 0; i < n-3; i += 4) {
158
mpd_uint_t x0 = c1[i];
159
mpd_uint_t x1 = c1[i+1];
160
mpd_uint_t x2 = c1[i+2];
161
mpd_uint_t x3 = c1[i+3];
162
MULMOD2C(&x0, &x1, n_inv);
163
MULMOD2C(&x2, &x3, n_inv);
164
c1[i] = x0;
165
c1[i+1] = x1;
166
c1[i+2] = x2;
167
c1[i+3] = x3;
168
}
169
170
return 1;
171
}
172
173