CoCalc -- convolute.c

GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_decimal/libmpdec/convolute.c
¹² views
1
/*
2
 * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 *
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in the
13
 *    documentation and/or other materials provided with the distribution.
14
 *
15
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
 * SUCH DAMAGE.
26
 */
27

28

29
#include "mpdecimal.h"
30
#include "bits.h"
31
#include "constants.h"
32
#include "convolute.h"
33
#include "fnt.h"
34
#include "fourstep.h"
35
#include "numbertheory.h"
36
#include "sixstep.h"
37
#include "umodarith.h"
38

39

40
/* Bignum: Fast convolution using the Number Theoretic Transform. Used for
41
   the multiplication of very large coefficients. */
42

43

44
/* Convolute the data in c1 and c2. Result is in c1. */
45
int
46
fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
47
{
48
    int (*fnt)(mpd_uint_t *, mpd_size_t, int);
49
    int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
50
#ifdef PPRO
51
    double dmod;
52
    uint32_t dinvmod[3];
53
#endif
54
    mpd_uint_t n_inv, umod;
55
    mpd_size_t i;
56

57

58
    SETMODULUS(modnum);
59
    n_inv = POWMOD(n, (umod-2));
60

61
    if (ispower2(n)) {
62
        if (n > SIX_STEP_THRESHOLD) {
63
            fnt = six_step_fnt;
64
            inv_fnt = inv_six_step_fnt;
65
        }
66
        else {
67
            fnt = std_fnt;
68
            inv_fnt = std_inv_fnt;
69
        }
70
    }
71
    else {
72
        fnt = four_step_fnt;
73
        inv_fnt = inv_four_step_fnt;
74
    }
75

76
    if (!fnt(c1, n, modnum)) {
77
        return 0;
78
    }
79
    if (!fnt(c2, n, modnum)) {
80
        return 0;
81
    }
82
    for (i = 0; i < n-1; i += 2) {
83
        mpd_uint_t x0 = c1[i];
84
        mpd_uint_t y0 = c2[i];
85
        mpd_uint_t x1 = c1[i+1];
86
        mpd_uint_t y1 = c2[i+1];
87
        MULMOD2(&x0, y0, &x1, y1);
88
        c1[i] = x0;
89
        c1[i+1] = x1;
90
    }
91

92
    if (!inv_fnt(c1, n, modnum)) {
93
        return 0;
94
    }
95
    for (i = 0; i < n-3; i += 4) {
96
        mpd_uint_t x0 = c1[i];
97
        mpd_uint_t x1 = c1[i+1];
98
        mpd_uint_t x2 = c1[i+2];
99
        mpd_uint_t x3 = c1[i+3];
100
        MULMOD2C(&x0, &x1, n_inv);
101
        MULMOD2C(&x2, &x3, n_inv);
102
        c1[i] = x0;
103
        c1[i+1] = x1;
104
        c1[i+2] = x2;
105
        c1[i+3] = x3;
106
    }
107

108
    return 1;
109
}
110

111
/* Autoconvolute the data in c1. Result is in c1. */
112
int
113
fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
114
{
115
    int (*fnt)(mpd_uint_t *, mpd_size_t, int);
116
    int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
117
#ifdef PPRO
118
    double dmod;
119
    uint32_t dinvmod[3];
120
#endif
121
    mpd_uint_t n_inv, umod;
122
    mpd_size_t i;
123

124

125
    SETMODULUS(modnum);
126
    n_inv = POWMOD(n, (umod-2));
127

128
    if (ispower2(n)) {
129
        if (n > SIX_STEP_THRESHOLD) {
130
            fnt = six_step_fnt;
131
            inv_fnt = inv_six_step_fnt;
132
        }
133
        else {
134
            fnt = std_fnt;
135
            inv_fnt = std_inv_fnt;
136
        }
137
    }
138
    else {
139
        fnt = four_step_fnt;
140
        inv_fnt = inv_four_step_fnt;
141
    }
142

143
    if (!fnt(c1, n, modnum)) {
144
        return 0;
145
    }
146
    for (i = 0; i < n-1; i += 2) {
147
        mpd_uint_t x0 = c1[i];
148
        mpd_uint_t x1 = c1[i+1];
149
        MULMOD2(&x0, x0, &x1, x1);
150
        c1[i] = x0;
151
        c1[i+1] = x1;
152
    }
153

154
    if (!inv_fnt(c1, n, modnum)) {
155
        return 0;
156
    }
157
    for (i = 0; i < n-3; i += 4) {
158
        mpd_uint_t x0 = c1[i];
159
        mpd_uint_t x1 = c1[i+1];
160
        mpd_uint_t x2 = c1[i+2];
161
        mpd_uint_t x3 = c1[i+3];
162
        MULMOD2C(&x0, &x1, n_inv);
163
        MULMOD2C(&x2, &x3, n_inv);
164
        c1[i] = x0;
165
        c1[i+1] = x1;
166
        c1[i+2] = x2;
167
        c1[i+3] = x3;
168
    }
169

170
    return 1;
171
}
172

173
Product

Resources

Company