Path: blob/main/Modules/_decimal/libmpdec/difradix2.c
12 views
/*1* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.2*3* Redistribution and use in source and binary forms, with or without4* modification, are permitted provided that the following conditions5* are met:6*7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9*10* 2. Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13*14* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND15* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE16* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE17* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE18* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL19* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS20* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)21* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT22* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY23* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF24* SUCH DAMAGE.25*/262728#include "mpdecimal.h"2930#include <assert.h>3132#include "bits.h"33#include "constants.h"34#include "difradix2.h"35#include "numbertheory.h"36#include "umodarith.h"373839/* Bignum: The actual transform routine (decimation in frequency). */404142/*43* Generate index pairs (x, bitreverse(x)) and carry out the permutation.44* n must be a power of two.45* Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",46* Chapter 1.14.4. [http://www.jjj.de/fxt/]47*/48static inline void49bitreverse_permute(mpd_uint_t a[], mpd_size_t n)50{51mpd_size_t x = 0;52mpd_size_t r = 0;53mpd_uint_t t;5455do { /* Invariant: r = bitreverse(x) */56if (r > x) {57t = a[x];58a[x] = a[r];59a[r] = t;60}61/* Flip trailing consecutive 1 bits and the first zero bit62* that absorbs a possible carry. */63x += 1;64/* Mirror the operation on r: Flip n_trailing_zeros(x)+165high bits of r. */66r ^= (n - (n >> (mpd_bsf(x)+1)));67/* The loop invariant is preserved. */68} while (x < n);69}707172/* Fast Number Theoretic Transform, decimation in frequency. */73void74fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)75{76mpd_uint_t *wtable = tparams->wtable;77mpd_uint_t umod;78#ifdef PPRO79double dmod;80uint32_t dinvmod[3];81#endif82mpd_uint_t u0, u1, v0, v1;83mpd_uint_t w, w0, w1, wstep;84mpd_size_t m, mhalf;85mpd_size_t j, r;868788assert(ispower2(n));89assert(n >= 4);9091SETMODULUS(tparams->modnum);9293/* m == n */94mhalf = n / 2;95for (j = 0; j < mhalf; j += 2) {9697w0 = wtable[j];98w1 = wtable[j+1];99100u0 = a[j];101v0 = a[j+mhalf];102103u1 = a[j+1];104v1 = a[j+1+mhalf];105106a[j] = addmod(u0, v0, umod);107v0 = submod(u0, v0, umod);108109a[j+1] = addmod(u1, v1, umod);110v1 = submod(u1, v1, umod);111112MULMOD2(&v0, w0, &v1, w1);113114a[j+mhalf] = v0;115a[j+1+mhalf] = v1;116117}118119wstep = 2;120for (m = n/2; m >= 2; m>>=1, wstep<<=1) {121122mhalf = m / 2;123124/* j == 0 */125for (r = 0; r < n; r += 2*m) {126127u0 = a[r];128v0 = a[r+mhalf];129130u1 = a[m+r];131v1 = a[m+r+mhalf];132133a[r] = addmod(u0, v0, umod);134v0 = submod(u0, v0, umod);135136a[m+r] = addmod(u1, v1, umod);137v1 = submod(u1, v1, umod);138139a[r+mhalf] = v0;140a[m+r+mhalf] = v1;141}142143for (j = 1; j < mhalf; j++) {144145w = wtable[j*wstep];146147for (r = 0; r < n; r += 2*m) {148149u0 = a[r+j];150v0 = a[r+j+mhalf];151152u1 = a[m+r+j];153v1 = a[m+r+j+mhalf];154155a[r+j] = addmod(u0, v0, umod);156v0 = submod(u0, v0, umod);157158a[m+r+j] = addmod(u1, v1, umod);159v1 = submod(u1, v1, umod);160161MULMOD2C(&v0, &v1, w);162163a[r+j+mhalf] = v0;164a[m+r+j+mhalf] = v1;165}166167}168169}170171bitreverse_permute(a, n);172}173174175