Path: blob/main/Modules/_decimal/libmpdec/convolute.c
12 views
/*1* Copyright (c) 2008-2020 Stefan Krah. All rights reserved.2*3* Redistribution and use in source and binary forms, with or without4* modification, are permitted provided that the following conditions5* are met:6*7* 1. Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9*10* 2. Redistributions in binary form must reproduce the above copyright11* notice, this list of conditions and the following disclaimer in the12* documentation and/or other materials provided with the distribution.13*14* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND15* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE16* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE17* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE18* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL19* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS20* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)21* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT22* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY23* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF24* SUCH DAMAGE.25*/262728#include "mpdecimal.h"29#include "bits.h"30#include "constants.h"31#include "convolute.h"32#include "fnt.h"33#include "fourstep.h"34#include "numbertheory.h"35#include "sixstep.h"36#include "umodarith.h"373839/* Bignum: Fast convolution using the Number Theoretic Transform. Used for40the multiplication of very large coefficients. */414243/* Convolute the data in c1 and c2. Result is in c1. */44int45fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)46{47int (*fnt)(mpd_uint_t *, mpd_size_t, int);48int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);49#ifdef PPRO50double dmod;51uint32_t dinvmod[3];52#endif53mpd_uint_t n_inv, umod;54mpd_size_t i;555657SETMODULUS(modnum);58n_inv = POWMOD(n, (umod-2));5960if (ispower2(n)) {61if (n > SIX_STEP_THRESHOLD) {62fnt = six_step_fnt;63inv_fnt = inv_six_step_fnt;64}65else {66fnt = std_fnt;67inv_fnt = std_inv_fnt;68}69}70else {71fnt = four_step_fnt;72inv_fnt = inv_four_step_fnt;73}7475if (!fnt(c1, n, modnum)) {76return 0;77}78if (!fnt(c2, n, modnum)) {79return 0;80}81for (i = 0; i < n-1; i += 2) {82mpd_uint_t x0 = c1[i];83mpd_uint_t y0 = c2[i];84mpd_uint_t x1 = c1[i+1];85mpd_uint_t y1 = c2[i+1];86MULMOD2(&x0, y0, &x1, y1);87c1[i] = x0;88c1[i+1] = x1;89}9091if (!inv_fnt(c1, n, modnum)) {92return 0;93}94for (i = 0; i < n-3; i += 4) {95mpd_uint_t x0 = c1[i];96mpd_uint_t x1 = c1[i+1];97mpd_uint_t x2 = c1[i+2];98mpd_uint_t x3 = c1[i+3];99MULMOD2C(&x0, &x1, n_inv);100MULMOD2C(&x2, &x3, n_inv);101c1[i] = x0;102c1[i+1] = x1;103c1[i+2] = x2;104c1[i+3] = x3;105}106107return 1;108}109110/* Autoconvolute the data in c1. Result is in c1. */111int112fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)113{114int (*fnt)(mpd_uint_t *, mpd_size_t, int);115int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);116#ifdef PPRO117double dmod;118uint32_t dinvmod[3];119#endif120mpd_uint_t n_inv, umod;121mpd_size_t i;122123124SETMODULUS(modnum);125n_inv = POWMOD(n, (umod-2));126127if (ispower2(n)) {128if (n > SIX_STEP_THRESHOLD) {129fnt = six_step_fnt;130inv_fnt = inv_six_step_fnt;131}132else {133fnt = std_fnt;134inv_fnt = std_inv_fnt;135}136}137else {138fnt = four_step_fnt;139inv_fnt = inv_four_step_fnt;140}141142if (!fnt(c1, n, modnum)) {143return 0;144}145for (i = 0; i < n-1; i += 2) {146mpd_uint_t x0 = c1[i];147mpd_uint_t x1 = c1[i+1];148MULMOD2(&x0, x0, &x1, x1);149c1[i] = x0;150c1[i+1] = x1;151}152153if (!inv_fnt(c1, n, modnum)) {154return 0;155}156for (i = 0; i < n-3; i += 4) {157mpd_uint_t x0 = c1[i];158mpd_uint_t x1 = c1[i+1];159mpd_uint_t x2 = c1[i+2];160mpd_uint_t x3 = c1[i+3];161MULMOD2C(&x0, &x1, n_inv);162MULMOD2C(&x2, &x3, n_inv);163c1[i] = x0;164c1[i+1] = x1;165c1[i+2] = x2;166c1[i+3] = x3;167}168169return 1;170}171172173