/*1* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.2*3* Redistribution and use in source and binary forms, with or without4* modification, are permitted provided that the following conditions5* are met:6*7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9*10* 2. Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13*14* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND15* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE16* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE17* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE18* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL19* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS20* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)21* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT22* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY23* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF24* SUCH DAMAGE.25*/262728#include "mpdecimal.h"2930#include <assert.h>31#include <stdio.h>3233#include "bits.h"34#include "constants.h"35#include "difradix2.h"36#include "numbertheory.h"37#include "sixstep.h"38#include "transpose.h"39#include "umodarith.h"404142/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the43form 2**n (See literature/six-step.txt). */444546/* forward transform with sign = -1 */47int48six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)49{50struct fnt_params *tparams;51mpd_size_t log2n, C, R;52mpd_uint_t kernel;53mpd_uint_t umod;54#ifdef PPRO55double dmod;56uint32_t dinvmod[3];57#endif58mpd_uint_t *x, w0, w1, wstep;59mpd_size_t i, k;606162assert(ispower2(n));63assert(n >= 16);64assert(n <= MPD_MAXTRANSFORM_2N);6566log2n = mpd_bsr(n);67C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */68R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */697071/* Transpose the matrix. */72if (!transpose_pow2(a, R, C)) {73return 0;74}7576/* Length R transform on the rows. */77if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {78return 0;79}80for (x = a; x < a+n; x += R) {81fnt_dif2(x, R, tparams);82}8384/* Transpose the matrix. */85if (!transpose_pow2(a, C, R)) {86mpd_free(tparams);87return 0;88}8990/* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */91SETMODULUS(modnum);92kernel = _mpd_getkernel(n, -1, modnum);93for (i = 1; i < R; i++) {94w0 = 1; /* r**(i*0): initial value for k=0 */95w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */96wstep = MULMOD(w1, w1); /* r**(2*i) */97for (k = 0; k < C; k += 2) {98mpd_uint_t x0 = a[i*C+k];99mpd_uint_t x1 = a[i*C+k+1];100MULMOD2(&x0, w0, &x1, w1);101MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */102a[i*C+k] = x0;103a[i*C+k+1] = x1;104}105}106107/* Length C transform on the rows. */108if (C != R) {109mpd_free(tparams);110if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {111return 0;112}113}114for (x = a; x < a+n; x += C) {115fnt_dif2(x, C, tparams);116}117mpd_free(tparams);118119#if 0120/* An unordered transform is sufficient for convolution. */121/* Transpose the matrix. */122if (!transpose_pow2(a, R, C)) {123return 0;124}125#endif126127return 1;128}129130131/* reverse transform, sign = 1 */132int133inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)134{135struct fnt_params *tparams;136mpd_size_t log2n, C, R;137mpd_uint_t kernel;138mpd_uint_t umod;139#ifdef PPRO140double dmod;141uint32_t dinvmod[3];142#endif143mpd_uint_t *x, w0, w1, wstep;144mpd_size_t i, k;145146147assert(ispower2(n));148assert(n >= 16);149assert(n <= MPD_MAXTRANSFORM_2N);150151log2n = mpd_bsr(n);152C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */153R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */154155156#if 0157/* An unordered transform is sufficient for convolution. */158/* Transpose the matrix, producing an R*C matrix. */159if (!transpose_pow2(a, C, R)) {160return 0;161}162#endif163164/* Length C transform on the rows. */165if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {166return 0;167}168for (x = a; x < a+n; x += C) {169fnt_dif2(x, C, tparams);170}171172/* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */173SETMODULUS(modnum);174kernel = _mpd_getkernel(n, 1, modnum);175for (i = 1; i < R; i++) {176w0 = 1;177w1 = POWMOD(kernel, i);178wstep = MULMOD(w1, w1);179for (k = 0; k < C; k += 2) {180mpd_uint_t x0 = a[i*C+k];181mpd_uint_t x1 = a[i*C+k+1];182MULMOD2(&x0, w0, &x1, w1);183MULMOD2C(&w0, &w1, wstep);184a[i*C+k] = x0;185a[i*C+k+1] = x1;186}187}188189/* Transpose the matrix. */190if (!transpose_pow2(a, R, C)) {191mpd_free(tparams);192return 0;193}194195/* Length R transform on the rows. */196if (R != C) {197mpd_free(tparams);198if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {199return 0;200}201}202for (x = a; x < a+n; x += R) {203fnt_dif2(x, R, tparams);204}205mpd_free(tparams);206207/* Transpose the matrix. */208if (!transpose_pow2(a, C, R)) {209return 0;210}211212return 1;213}214215216