CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/ext/at3_standalone/fft.cpp
Views: 1401
/*1* FFT/IFFT transforms2* Copyright (c) 2008 Loren Merritt3* Copyright (c) 2002 Fabrice Bellard4* Partly based on libdjbfft by D. J. Bernstein5*6* This file is part of FFmpeg.7*8* FFmpeg is free software; you can redistribute it and/or9* modify it under the terms of the GNU Lesser General Public10* License as published by the Free Software Foundation; either11* version 2.1 of the License, or (at your option) any later version.12*13* FFmpeg is distributed in the hope that it will be useful,14* but WITHOUT ANY WARRANTY; without even the implied warranty of15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU16* Lesser General Public License for more details.17*18* You should have received a copy of the GNU Lesser General Public19* License along with FFmpeg; if not, write to the Free Software20* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA21*/2223/**24* @file25* FFT/IFFT transforms.26*/2728#include <stdlib.h>29#include <string.h>3031#define _USE_MATH_DEFINES32#include <math.h>3334#include "mem.h"35#include "fft.h"3637#define sqrthalf (float)M_SQRT1_23839void imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);40void imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);4142/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */43COSTABLE(16);44COSTABLE(32);45COSTABLE(64);46COSTABLE(128);47COSTABLE(256);48COSTABLE(512);49COSTABLE(1024);5051static FFTSample * const av_cos_tabs[] = {52NULL, NULL, NULL, NULL,53av_cos_16,54av_cos_32,55av_cos_64,56av_cos_128,57av_cos_256,58av_cos_512,59av_cos_1024,60};6162void fft_calc(FFTContext *s, FFTComplex *z);6364static int split_radix_permutation(int i, int n, int inverse)65{66int m;67if(n <= 2) return i&1;68m = n >> 1;69if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;70m >>= 1;71if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;72else return split_radix_permutation(i, m, inverse)*4 - 1;73}7475void ff_init_ff_cos_tabs(int index)76{77int i;78int m = 1<<index;79double freq = 2*M_PI/m;80FFTSample *tab = av_cos_tabs[index];81for(i=0; i<=m/4; i++)82tab[i] = cos(i*freq);83for(i=1; i<m/4; i++)84tab[m/2-i] = tab[i];85}8687static const int avx_tab[] = {880, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 1589};9091static int is_second_half_of_fft32(int i, int n)92{93if (n <= 32)94return i >= 16;95else if (i < n/2)96return is_second_half_of_fft32(i, n/2);97else if (i < 3*n/4)98return is_second_half_of_fft32(i - n/2, n/4);99else100return is_second_half_of_fft32(i - 3*n/4, n/4);101}102103int ff_fft_init(FFTContext *s, int nbits, int inverse)104{105int i, j, n;106107if (nbits < 2 || nbits > 16)108goto fail;109s->nbits = nbits;110n = 1 << nbits;111112s->revtab = (uint16_t *)av_malloc(n * sizeof(uint16_t));113if (!s->revtab)114goto fail;115s->tmp_buf = (FFTComplex *)av_malloc(n * sizeof(FFTComplex));116if (!s->tmp_buf)117goto fail;118s->inverse = inverse;119120for(j=4; j<=nbits; j++) {121ff_init_ff_cos_tabs(j);122}123124for(i=0; i<n; i++) {125j = i;126int index = -split_radix_permutation(i, n, s->inverse) & (n - 1);127s->revtab[index] = j;128}129130return 0;131fail:132av_freep(&s->revtab);133av_freep(&s->tmp_buf);134return -1;135}136137void ff_fft_end(FFTContext *s)138{139av_freep(&s->revtab);140av_freep(&s->tmp_buf);141}142143#define BF(x, y, a, b) do { \144x = a - b; \145y = a + b; \146} while (0)147148#define BUTTERFLIES(a0,a1,a2,a3) {\149BF(t3, t5, t5, t1);\150BF(a2.re, a0.re, a0.re, t5);\151BF(a3.im, a1.im, a1.im, t3);\152BF(t4, t6, t2, t6);\153BF(a3.re, a1.re, a1.re, t4);\154BF(a2.im, a0.im, a0.im, t6);\155}156157// force loading all the inputs before storing any.158// this is slightly slower for small data, but avoids store->load aliasing159// for addresses separated by large powers of 2.160#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\161FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\162BF(t3, t5, t5, t1);\163BF(a2.re, a0.re, r0, t5);\164BF(a3.im, a1.im, i1, t3);\165BF(t4, t6, t2, t6);\166BF(a3.re, a1.re, r1, t4);\167BF(a2.im, a0.im, i0, t6);\168}169170#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\171CMUL(t1, t2, a2.re, a2.im, wre, -wim);\172CMUL(t5, t6, a3.re, a3.im, wre, wim);\173BUTTERFLIES(a0,a1,a2,a3)\174}175176#define TRANSFORM_ZERO(a0,a1,a2,a3) {\177t1 = a2.re;\178t2 = a2.im;\179t5 = a3.re;\180t6 = a3.im;\181BUTTERFLIES(a0,a1,a2,a3)\182}183184/* z[0...8n-1], w[1...2n-1] */185#define PASS(name)\186static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\187{\188FFTDouble t1, t2, t3, t4, t5, t6;\189int o1 = 2*n;\190int o2 = 4*n;\191int o3 = 6*n;\192const FFTSample *wim = wre+o1;\193n--;\194\195TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\196TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\197do {\198z += 2;\199wre += 2;\200wim -= 2;\201TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\202TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\203} while(--n);\204}205206PASS(pass)207#undef BUTTERFLIES208#define BUTTERFLIES BUTTERFLIES_BIG209PASS(pass_big)210211#define DECL_FFT(n,n2,n4)\212static void fft##n(FFTComplex *z)\213{\214fft##n2(z);\215fft##n4(z+n4*2);\216fft##n4(z+n4*3);\217pass(z,av_cos_##n,n4/2);\218}219220static void fft4(FFTComplex *z)221{222FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;223224BF(t3, t1, z[0].re, z[1].re);225BF(t8, t6, z[3].re, z[2].re);226BF(z[2].re, z[0].re, t1, t6);227BF(t4, t2, z[0].im, z[1].im);228BF(t7, t5, z[2].im, z[3].im);229BF(z[3].im, z[1].im, t4, t8);230BF(z[3].re, z[1].re, t3, t7);231BF(z[2].im, z[0].im, t2, t5);232}233234static void fft8(FFTComplex *z)235{236FFTDouble t1, t2, t3, t4, t5, t6;237238fft4(z);239240BF(t1, z[5].re, z[4].re, -z[5].re);241BF(t2, z[5].im, z[4].im, -z[5].im);242BF(t5, z[7].re, z[6].re, -z[7].re);243BF(t6, z[7].im, z[6].im, -z[7].im);244245BUTTERFLIES(z[0],z[2],z[4],z[6]);246TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);247}248249static void fft16(FFTComplex *z)250{251FFTDouble t1, t2, t3, t4, t5, t6;252FFTSample cos_16_1 = av_cos_16[1];253FFTSample cos_16_3 = av_cos_16[3];254255fft8(z);256fft4(z+8);257fft4(z+12);258259TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);260TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);261TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);262TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);263}264DECL_FFT(32,16,8)265DECL_FFT(64,32,16)266DECL_FFT(128,64,32)267DECL_FFT(256,128,64)268DECL_FFT(512,256,128)269#define pass pass_big270DECL_FFT(1024,512,256)271272static void (* const fft_dispatch[])(FFTComplex*) = {273fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,274};275276void fft_calc(FFTContext *s, FFTComplex *z) {277fft_dispatch[s->nbits-2](z);278}279280#include <stdlib.h>281#include <string.h>282283#include "fft.h"284#include "mem.h"285286/**287* init MDCT or IMDCT computation.288*/289int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)290{291int n, n4, i;292double alpha, theta;293int tstep;294295memset(s, 0, sizeof(*s));296n = 1 << nbits;297s->mdct_bits = nbits;298s->mdct_size = n;299n4 = n >> 2;300s->mdct_permutation = FF_MDCT_PERM_NONE;301302if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)303goto fail;304305s->tcos = (FFTSample *)av_malloc_array(n / 2, sizeof(FFTSample));306if (!s->tcos)307goto fail;308309switch (s->mdct_permutation) {310case FF_MDCT_PERM_NONE:311s->tsin = s->tcos + n4;312tstep = 1;313break;314case FF_MDCT_PERM_INTERLEAVE:315s->tsin = s->tcos + 1;316tstep = 2;317break;318default:319goto fail;320}321322theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);323scale = sqrt(fabs(scale));324for (i = 0; i < n4; i++) {325alpha = 2 * M_PI * (i + theta) / n;326s->tcos[i * tstep] = -cos(alpha) * scale;327s->tsin[i * tstep] = -sin(alpha) * scale;328}329return 0;330fail:331ff_mdct_end(s);332return -1;333}334335/**336* Compute the middle half of the inverse MDCT of size N = 2^nbits,337* thus excluding the parts that can be derived by symmetry338* @param output N/2 samples339* @param input N/2 samples340*/341void imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)342{343int k, n8, n4, n2, n, j;344const uint16_t *revtab = s->revtab;345const FFTSample *tcos = s->tcos;346const FFTSample *tsin = s->tsin;347const FFTSample *in1, *in2;348FFTComplex *z = (FFTComplex *)output;349350n = 1 << s->mdct_bits;351n2 = n >> 1;352n4 = n >> 2;353n8 = n >> 3;354355/* pre rotation */356in1 = input;357in2 = input + n2 - 1;358for (k = 0; k < n4; k++) {359j = revtab[k];360CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);361in1 += 2;362in2 -= 2;363}364fft_calc(s, z);365366/* post rotation + reordering */367for (k = 0; k < n8; k++) {368FFTSample r0, i0, r1, i1;369CMUL(r0, i1, z[n8 - k - 1].im, z[n8 - k - 1].re, tsin[n8 - k - 1], tcos[n8 - k - 1]);370CMUL(r1, i0, z[n8 + k].im, z[n8 + k].re, tsin[n8 + k], tcos[n8 + k]);371z[n8 - k - 1].re = r0;372z[n8 - k - 1].im = i0;373z[n8 + k].re = r1;374z[n8 + k].im = i1;375}376}377378/**379* Compute inverse MDCT of size N = 2^nbits380* @param output N samples381* @param input N/2 samples382*/383void imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)384{385int k;386int n = 1 << s->mdct_bits;387int n2 = n >> 1;388int n4 = n >> 2;389390imdct_half(s, output + n4, input);391392for (k = 0; k < n4; k++) {393output[k] = -output[n2 - k - 1];394output[n - k - 1] = output[n2 + k];395}396}397398void ff_mdct_end(FFTContext *s)399{400av_freep(&s->tcos);401ff_fft_end(s);402}403404405