CoCalc -- difradix2.c

GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_decimal/libmpdec/difradix2.c
¹² views
1
/*
2
 * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3
 *
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions
6
 * are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 *
11
 * 2. Redistributions in binary form must reproduce the above copyright
12
 *    notice, this list of conditions and the following disclaimer in the
13
 *    documentation and/or other materials provided with the distribution.
14
 *
15
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
 * SUCH DAMAGE.
26
 */
27

28

29
#include "mpdecimal.h"
30

31
#include <assert.h>
32

33
#include "bits.h"
34
#include "constants.h"
35
#include "difradix2.h"
36
#include "numbertheory.h"
37
#include "umodarith.h"
38

39

40
/* Bignum: The actual transform routine (decimation in frequency). */
41

42

43
/*
44
 * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
45
 * n must be a power of two.
46
 * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
47
 * Chapter 1.14.4. [http://www.jjj.de/fxt/]
48
 */
49
static inline void
50
bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
51
{
52
    mpd_size_t x = 0;
53
    mpd_size_t r = 0;
54
    mpd_uint_t t;
55

56
    do { /* Invariant: r = bitreverse(x) */
57
        if (r > x) {
58
            t = a[x];
59
            a[x] = a[r];
60
            a[r] = t;
61
        }
62
        /* Flip trailing consecutive 1 bits and the first zero bit
63
         * that absorbs a possible carry. */
64
        x += 1;
65
        /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
66
           high bits of r. */
67
        r ^= (n - (n >> (mpd_bsf(x)+1)));
68
        /* The loop invariant is preserved. */
69
    } while (x < n);
70
}
71

72

73
/* Fast Number Theoretic Transform, decimation in frequency. */
74
void
75
fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
76
{
77
    mpd_uint_t *wtable = tparams->wtable;
78
    mpd_uint_t umod;
79
#ifdef PPRO
80
    double dmod;
81
    uint32_t dinvmod[3];
82
#endif
83
    mpd_uint_t u0, u1, v0, v1;
84
    mpd_uint_t w, w0, w1, wstep;
85
    mpd_size_t m, mhalf;
86
    mpd_size_t j, r;
87

88

89
    assert(ispower2(n));
90
    assert(n >= 4);
91

92
    SETMODULUS(tparams->modnum);
93

94
    /* m == n */
95
    mhalf = n / 2;
96
    for (j = 0; j < mhalf; j += 2) {
97

98
        w0 = wtable[j];
99
        w1 = wtable[j+1];
100

101
        u0 = a[j];
102
        v0 = a[j+mhalf];
103

104
        u1 = a[j+1];
105
        v1 = a[j+1+mhalf];
106

107
        a[j] = addmod(u0, v0, umod);
108
        v0 = submod(u0, v0, umod);
109

110
        a[j+1] = addmod(u1, v1, umod);
111
        v1 = submod(u1, v1, umod);
112

113
        MULMOD2(&v0, w0, &v1, w1);
114

115
        a[j+mhalf] = v0;
116
        a[j+1+mhalf] = v1;
117

118
    }
119

120
    wstep = 2;
121
    for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
122

123
        mhalf = m / 2;
124

125
        /* j == 0 */
126
        for (r = 0; r < n; r += 2*m) {
127

128
            u0 = a[r];
129
            v0 = a[r+mhalf];
130

131
            u1 = a[m+r];
132
            v1 = a[m+r+mhalf];
133

134
            a[r] = addmod(u0, v0, umod);
135
            v0 = submod(u0, v0, umod);
136

137
            a[m+r] = addmod(u1, v1, umod);
138
            v1 = submod(u1, v1, umod);
139

140
            a[r+mhalf] = v0;
141
            a[m+r+mhalf] = v1;
142
        }
143

144
        for (j = 1; j < mhalf; j++) {
145

146
            w = wtable[j*wstep];
147

148
            for (r = 0; r < n; r += 2*m) {
149

150
                u0 = a[r+j];
151
                v0 = a[r+j+mhalf];
152

153
                u1 = a[m+r+j];
154
                v1 = a[m+r+j+mhalf];
155

156
                a[r+j] = addmod(u0, v0, umod);
157
                v0 = submod(u0, v0, umod);
158

159
                a[m+r+j] = addmod(u1, v1, umod);
160
                v1 = submod(u1, v1, umod);
161

162
                MULMOD2C(&v0, &v1, w);
163

164
                a[r+j+mhalf] = v0;
165
                a[m+r+j+mhalf] = v1;
166
            }
167

168
        }
169

170
    }
171

172
    bitreverse_permute(a, n);
173
}
174

175
Product

Resources

Company