Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libsnes/bsnes/nall/dsp/resample/lib/sinc.hpp
2 views
1
// If these types are changed to anything other than "float", you should comment out the SSE detection directives below
2
// so that the SSE code is not used.
3
4
typedef float resample_coeff_t; // note: sizeof(resample_coeff_t) must be == to a power of 2, and not larger than 16
5
typedef float resample_samp_t;
6
7
#include <string>
8
#include <vector>
9
10
// ...but don't comment this single RESAMPLE_SSEREGPARM define out when disabling SSE.
11
#define RESAMPLE_SSEREGPARM
12
13
#if defined(__SSE__)
14
#define SINCRESAMPLE_USE_SSE 1
15
#ifndef __x86_64__
16
#undef RESAMPLE_SSEREGPARM
17
#define RESAMPLE_SSEREGPARM __attribute__((sseregparm))
18
#endif
19
#else
20
// TODO: altivec here
21
#endif
22
23
namespace ResampleUtility
24
{
25
inline void kaiser_window(double* io, int count, double beta);
26
inline void gen_sinc(double* out, int size, double cutoff, double kaiser);
27
inline void gen_sinc_os(double* out, int size, double cutoff, double kaiser);
28
inline void normalize(double* io, int size, double gain = 1.0);
29
30
inline void* make_aligned(void* ptr, unsigned boundary); // boundary must be a power of 2
31
}
32
33
class SincResampleHR
34
{
35
private:
36
37
inline void Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d);
38
39
inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM;
40
inline resample_samp_t read(void) RESAMPLE_SSEREGPARM;
41
inline bool output_avail(void);
42
43
private:
44
45
inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count);
46
47
unsigned ratio;
48
unsigned num_convolutions;
49
50
resample_coeff_t *coeffs;
51
std::vector<unsigned char> coeffs_mem;
52
53
// second half of ringbuffer should be copy of first half.
54
resample_samp_t *rb;
55
std::vector<unsigned char> rb_mem;
56
57
signed rb_readpos;
58
signed rb_writepos;
59
signed rb_in;
60
signed rb_eff_size;
61
62
friend class SincResample;
63
};
64
65
class SincResample
66
{
67
public:
68
69
enum
70
{
71
QUALITY_LOW = 0,
72
QUALITY_MEDIUM = 2,
73
QUALITY_HIGH = 4
74
};
75
76
inline SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality = QUALITY_HIGH);
77
78
inline void write(resample_samp_t sample) RESAMPLE_SSEREGPARM;
79
inline resample_samp_t read(void) RESAMPLE_SSEREGPARM;
80
inline bool output_avail(void);
81
82
private:
83
84
inline void Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min);
85
86
inline resample_samp_t mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count) RESAMPLE_SSEREGPARM;
87
88
unsigned num_convolutions;
89
unsigned num_phases;
90
91
unsigned step_int;
92
double step_fract;
93
94
double input_pos_fract;
95
96
97
std::vector<resample_coeff_t *> coeffs; // Pointers into coeff_mem.
98
std::vector<unsigned char> coeff_mem;
99
100
101
std::vector<resample_samp_t> rb; // second half should be copy of first half.
102
signed rb_readpos;
103
signed rb_writepos;
104
signed rb_in;
105
106
bool hr_used;
107
SincResampleHR hr;
108
};
109
110
111
//
112
// Code:
113
//
114
//#include "resample.hpp"
115
116
#if 0
117
namespace bit
118
{
119
inline unsigned round(unsigned x) {
120
if((x & (x - 1)) == 0) return x;
121
while(x & (x - 1)) x &= x - 1;
122
return x << 1;
123
}
124
}
125
#endif
126
127
void SincResampleHR::Init(unsigned ratio_arg, double desired_bandwidth, double beta, double d)
128
{
129
const unsigned align_boundary = 16;
130
std::vector<double> coeffs_tmp;
131
double cutoff; // 1.0 = f/2
132
133
ratio = ratio_arg;
134
135
//num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) + 1) &~ 1; // round up to be even
136
num_convolutions = ((unsigned)ceil(d / ((1.0 - desired_bandwidth) / ratio)) | 1);
137
138
cutoff = (1.0 / ratio) - (d / num_convolutions);
139
140
//printf("%d %d %.20f\n", ratio, num_convolutions, cutoff);
141
assert(num_convolutions > ratio);
142
143
144
// Generate windowed sinc of POWER
145
coeffs_tmp.resize(num_convolutions);
146
//ResampleUtility::gen_sinc(&coeffs_tmp[0], num_convolutions, cutoff, beta);
147
ResampleUtility::gen_sinc_os(&coeffs_tmp[0], num_convolutions, cutoff, beta);
148
ResampleUtility::normalize(&coeffs_tmp[0], num_convolutions);
149
150
// Copy from coeffs_tmp to coeffs~
151
// We multiply many coefficients at a time in the mac loop, so make sure the last few that don't really
152
// exist are allocated, zero'd mem.
153
154
coeffs_mem.resize(((num_convolutions + 7) &~ 7) * sizeof(resample_coeff_t) + (align_boundary - 1));
155
coeffs = (resample_coeff_t *)ResampleUtility::make_aligned(&coeffs_mem[0], align_boundary);
156
157
158
for(unsigned i = 0; i < num_convolutions; i++)
159
coeffs[i] = coeffs_tmp[i];
160
161
rb_eff_size = nall::bit::round(num_convolutions * 2) >> 1;
162
rb_readpos = 0;
163
rb_writepos = 0;
164
rb_in = 0;
165
166
rb_mem.resize(rb_eff_size * 2 * sizeof(resample_samp_t) + (align_boundary - 1));
167
rb = (resample_samp_t *)ResampleUtility::make_aligned(&rb_mem[0], align_boundary);
168
}
169
170
171
inline bool SincResampleHR::output_avail(void)
172
{
173
return(rb_in >= (signed)num_convolutions);
174
}
175
176
inline void SincResampleHR::write(resample_samp_t sample)
177
{
178
assert(!output_avail());
179
180
rb[rb_writepos] = sample;
181
rb[rb_writepos + rb_eff_size] = sample;
182
rb_writepos = (rb_writepos + 1) & (rb_eff_size - 1);
183
rb_in++;
184
}
185
186
resample_samp_t SincResampleHR::mac(const resample_samp_t *wave, const resample_coeff_t *coeff, unsigned count)
187
{
188
#if SINCRESAMPLE_USE_SSE
189
__m128 accum_veca[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
190
191
resample_samp_t accum;
192
193
for(unsigned c = 0; c < count; c += 8)
194
{
195
for(unsigned i = 0; i < 2; i++)
196
{
197
__m128 co[2];
198
__m128 w[2];
199
200
co[i] = _mm_load_ps(&coeff[c + i * 4]);
201
w[i] = _mm_load_ps(&wave[c + i * 4]);
202
203
w[i] = _mm_mul_ps(w[i], co[i]);
204
205
accum_veca[i] = _mm_add_ps(w[i], accum_veca[i]);
206
}
207
}
208
209
__m128 accum_vec = _mm_add_ps(accum_veca[0], accum_veca[1]); //_mm_add_ps(_mm_add_ps(accum_veca[0], accum_veca[1]), _mm_add_ps(accum_veca[2], accum_veca[3]));
210
211
accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6)));
212
accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6)));
213
214
_mm_store_ss(&accum, accum_vec);
215
216
return accum;
217
#else
218
resample_samp_t accum[4] = { 0, 0, 0, 0 };
219
220
for(unsigned c = 0; c < count; c+= 4)
221
{
222
accum[0] += wave[c + 0] * coeff[c + 0];
223
accum[1] += wave[c + 1] * coeff[c + 1];
224
accum[2] += wave[c + 2] * coeff[c + 2];
225
accum[3] += wave[c + 3] * coeff[c + 3];
226
}
227
228
return (accum[0] + accum[1]) + (accum[2] + accum[3]); // don't mess with parentheses(assuming compiler doesn't already, which it may...
229
230
#endif
231
}
232
233
234
resample_samp_t SincResampleHR::read(void)
235
{
236
assert(output_avail());
237
resample_samp_t ret;
238
239
ret = mac(&rb[rb_readpos], &coeffs[0], num_convolutions);
240
241
rb_readpos = (rb_readpos + ratio) & (rb_eff_size - 1);
242
rb_in -= ratio;
243
244
return ret;
245
}
246
247
248
SincResample::SincResample(double input_rate, double output_rate, double desired_bandwidth, unsigned quality)
249
{
250
const struct
251
{
252
double beta;
253
double d;
254
unsigned pn_nume;
255
unsigned phases_min;
256
} qtab[5] =
257
{
258
{ 5.658, 3.62, 4096, 4 },
259
{ 6.764, 4.32, 8192, 4 },
260
{ 7.865, 5.0, 16384, 8 },
261
{ 8.960, 5.7, 32768, 16 },
262
{ 10.056, 6.4, 65536, 32 }
263
};
264
265
// Sanity checks
266
assert(ceil(input_rate) > 0);
267
assert(ceil(output_rate) > 0);
268
assert(ceil(input_rate / output_rate) <= 1024);
269
assert(ceil(output_rate / input_rate) <= 1024);
270
271
// The simplistic number-of-phases calculation code doesn't work well enough for when desired_bandwidth is close to 1.0 and when
272
// upsampling.
273
assert(desired_bandwidth >= 0.25 && desired_bandwidth < 0.96);
274
assert(quality >= 0 && quality <= 4);
275
276
hr_used = false;
277
278
#if 1
279
// Round down to the nearest multiple of 4(so wave buffer remains aligned)
280
// It also adjusts the effective intermediate sampling rate up slightly, so that the upper frequencies below f/2
281
// aren't overly attenuated so much. In the future, we might want to do an FFT or something to choose the intermediate rate more accurately
282
// to virtually eliminate over-attenuation.
283
unsigned ioratio_rd = (unsigned)floor(input_rate / (output_rate * (1.0 + (1.0 - desired_bandwidth) / 2) )) & ~3;
284
285
if(ioratio_rd >= 8)
286
{
287
hr.Init(ioratio_rd, desired_bandwidth, qtab[quality].beta, qtab[quality].d); //10.056, 6.4);
288
hr_used = true;
289
290
input_rate /= ioratio_rd;
291
}
292
#endif
293
294
Init(input_rate, output_rate, desired_bandwidth, qtab[quality].beta, qtab[quality].d, qtab[quality].pn_nume, qtab[quality].phases_min);
295
}
296
297
void SincResample::Init(double input_rate, double output_rate, double desired_bandwidth, double beta, double d, unsigned pn_nume, unsigned phases_min)
298
{
299
const unsigned max_mult_atatime = 8; // multiply "granularity". must be power of 2.
300
const unsigned max_mult_minus1 = (max_mult_atatime - 1);
301
const unsigned conv_alignment_bytes = 16; // must be power of 2
302
const double input_to_output_ratio = input_rate / output_rate;
303
const double output_to_input_ratio = output_rate / input_rate;
304
double cutoff; // 1.0 = input_rate / 2
305
std::vector<double> coeff_init_buffer;
306
307
// Round up num_convolutions to be even.
308
if(output_rate > input_rate)
309
num_convolutions = ((unsigned)ceil(d / (1.0 - desired_bandwidth)) + 1) & ~1;
310
else
311
num_convolutions = ((unsigned)ceil(d / (output_to_input_ratio * (1.0 - desired_bandwidth))) + 1) & ~1;
312
313
if(output_rate > input_rate) // Upsampling
314
cutoff = desired_bandwidth;
315
else // Downsampling
316
cutoff = output_to_input_ratio * desired_bandwidth;
317
318
// Round up to be even.
319
num_phases = (std::max<unsigned>(pn_nume / num_convolutions, phases_min) + 1) &~1;
320
321
// Adjust cutoff to account for the multiple phases.
322
cutoff = cutoff / num_phases;
323
324
assert((num_convolutions & 1) == 0);
325
assert((num_phases & 1) == 0);
326
327
// fprintf(stderr, "num_convolutions=%u, num_phases=%u, total expected coeff byte size=%lu\n", num_convolutions, num_phases,
328
// (long)((num_phases + 2) * ((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * sizeof(float) + conv_alignment_bytes));
329
330
coeff_init_buffer.resize(num_phases * num_convolutions);
331
332
coeffs.resize(num_phases + 1 + 1);
333
334
coeff_mem.resize((num_phases + 1 + 1) * ((num_convolutions + max_mult_minus1) &~ max_mult_minus1) * sizeof(resample_coeff_t) + conv_alignment_bytes);
335
336
// Assign aligned pointers into coeff_mem
337
{
338
resample_coeff_t *base_ptr = (resample_coeff_t *)ResampleUtility::make_aligned(&coeff_mem[0], conv_alignment_bytes);
339
340
for(unsigned phase = 0; phase < (num_phases + 1 + 1); phase++)
341
{
342
coeffs[phase] = base_ptr + (((num_convolutions + max_mult_minus1) & ~max_mult_minus1) * phase);
343
}
344
}
345
346
ResampleUtility::gen_sinc(&coeff_init_buffer[0], num_phases * num_convolutions, cutoff, beta);
347
ResampleUtility::normalize(&coeff_init_buffer[0], num_phases * num_convolutions, num_phases);
348
349
// Reorder coefficients to allow for more efficient convolution.
350
for(int phase = -1; phase < ((int)num_phases + 1); phase++)
351
{
352
for(int conv = 0; conv < (int)num_convolutions; conv++)
353
{
354
double coeff;
355
356
if(phase == -1 && conv == 0)
357
coeff = 0;
358
else if(phase == (int)num_phases && conv == ((int)num_convolutions - 1))
359
coeff = 0;
360
else
361
coeff = coeff_init_buffer[conv * num_phases + phase];
362
363
coeffs[phase + 1][conv] = coeff;
364
}
365
}
366
367
// Free a bit of mem
368
coeff_init_buffer.resize(0);
369
370
step_int = floor(input_to_output_ratio);
371
step_fract = input_to_output_ratio - step_int;
372
373
input_pos_fract = 0;
374
375
// Do NOT use rb.size() later in the code, since it'll include the padding.
376
// We should only need one "max_mult_minus1" here, not two, since it won't matter if it over-reads(due to doing "max_mult_atatime" multiplications at a time
377
// rather than just 1, in which case this over-read wouldn't happen), from the first half into the duplicated half,
378
// since those corresponding coefficients will be zero anyway; this is just to handle the case of reading off the end of the duplicated half to
379
// prevent illegal memory accesses.
380
rb.resize(num_convolutions * 2 + max_mult_minus1);
381
382
rb_readpos = 0;
383
rb_writepos = 0;
384
rb_in = 0;
385
}
386
387
resample_samp_t SincResample::mac(const resample_samp_t *wave, const resample_coeff_t *coeffs_a, const resample_coeff_t *coeffs_b, const double ffract, unsigned count)
388
{
389
resample_samp_t accum = 0;
390
#if SINCRESAMPLE_USE_SSE
391
__m128 accum_vec_a[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
392
__m128 accum_vec_b[2] = { _mm_set1_ps(0), _mm_set1_ps(0) };
393
394
for(unsigned c = 0; c < count; c += 8) //8) //4)
395
{
396
__m128 coeff_a[2];
397
__m128 coeff_b[2];
398
__m128 w[2];
399
__m128 result_a[2], result_b[2];
400
401
for(unsigned i = 0; i < 2; i++)
402
{
403
coeff_a[i] = _mm_load_ps(&coeffs_a[c + (i * 4)]);
404
coeff_b[i] = _mm_load_ps(&coeffs_b[c + (i * 4)]);
405
w[i] = _mm_loadu_ps(&wave[c + (i * 4)]);
406
407
result_a[i] = _mm_mul_ps(coeff_a[i], w[i]);
408
result_b[i] = _mm_mul_ps(coeff_b[i], w[i]);
409
410
accum_vec_a[i] = _mm_add_ps(result_a[i], accum_vec_a[i]);
411
accum_vec_b[i] = _mm_add_ps(result_b[i], accum_vec_b[i]);
412
}
413
}
414
415
__m128 accum_vec, av_a, av_b;
416
__m128 mult_a_vec = _mm_set1_ps(1.0 - ffract);
417
__m128 mult_b_vec = _mm_set1_ps(ffract);
418
419
av_a = _mm_mul_ps(mult_a_vec, /*accum_vec_a[0]);*/ _mm_add_ps(accum_vec_a[0], accum_vec_a[1]));
420
av_b = _mm_mul_ps(mult_b_vec, /*accum_vec_b[0]);*/ _mm_add_ps(accum_vec_b[0], accum_vec_b[1]));
421
422
accum_vec = _mm_add_ps(av_a, av_b);
423
424
accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (3 << 0) | (2 << 2) | (1 << 4) | (0 << 6)));
425
accum_vec = _mm_add_ps(accum_vec, _mm_shuffle_ps(accum_vec, accum_vec, (1 << 0) | (0 << 2) | (1 << 4) | (0 << 6)));
426
427
_mm_store_ss(&accum, accum_vec);
428
#else
429
resample_coeff_t mult_a = 1.0 - ffract;
430
resample_coeff_t mult_b = ffract;
431
432
for(unsigned c = 0; c < count; c += 4)
433
{
434
accum += wave[c + 0] * (coeffs_a[c + 0] * mult_a + coeffs_b[c + 0] * mult_b);
435
accum += wave[c + 1] * (coeffs_a[c + 1] * mult_a + coeffs_b[c + 1] * mult_b);
436
accum += wave[c + 2] * (coeffs_a[c + 2] * mult_a + coeffs_b[c + 2] * mult_b);
437
accum += wave[c + 3] * (coeffs_a[c + 3] * mult_a + coeffs_b[c + 3] * mult_b);
438
}
439
#endif
440
441
return accum;
442
}
443
444
inline bool SincResample::output_avail(void)
445
{
446
return(rb_in >= (int)num_convolutions);
447
}
448
449
resample_samp_t SincResample::read(void)
450
{
451
assert(output_avail());
452
double phase = input_pos_fract * num_phases - 0.5;
453
signed phase_int = (signed)floor(phase);
454
double phase_fract = phase - phase_int;
455
unsigned phase_a = num_phases - 1 - phase_int;
456
unsigned phase_b = phase_a - 1;
457
resample_samp_t ret;
458
459
ret = mac(&rb[rb_readpos], &coeffs[phase_a + 1][0], &coeffs[phase_b + 1][0], phase_fract, num_convolutions);
460
461
unsigned int_increment = step_int;
462
463
input_pos_fract += step_fract;
464
int_increment += floor(input_pos_fract);
465
input_pos_fract -= floor(input_pos_fract);
466
467
rb_readpos = (rb_readpos + int_increment) % num_convolutions;
468
rb_in -= int_increment;
469
470
return ret;
471
}
472
473
inline void SincResample::write(resample_samp_t sample)
474
{
475
assert(!output_avail());
476
477
if(hr_used)
478
{
479
hr.write(sample);
480
481
if(hr.output_avail())
482
{
483
sample = hr.read();
484
}
485
else
486
{
487
return;
488
}
489
}
490
491
rb[rb_writepos + 0 * num_convolutions] = sample;
492
rb[rb_writepos + 1 * num_convolutions] = sample;
493
rb_writepos = (rb_writepos + 1) % num_convolutions;
494
rb_in++;
495
}
496
497
void ResampleUtility::kaiser_window( double* io, int count, double beta)
498
{
499
int const accuracy = 24; //16; //12;
500
501
double* end = io + count;
502
503
double beta2 = beta * beta * (double) -0.25;
504
double to_fract = beta2 / ((double) count * count);
505
double i = 0;
506
double rescale = 0; // Doesn't need an initializer, to shut up gcc
507
508
for ( ; io < end; ++io, i += 1 )
509
{
510
double x = i * i * to_fract - beta2;
511
double u = x;
512
double k = x + 1;
513
514
double n = 2;
515
do
516
{
517
u *= x / (n * n);
518
n += 1;
519
k += u;
520
}
521
while ( k <= u * (1 << accuracy) );
522
523
if ( !i )
524
rescale = 1 / k; // otherwise values get large
525
526
*io *= k * rescale;
527
}
528
}
529
530
void ResampleUtility::gen_sinc(double* out, int size, double cutoff, double kaiser)
531
{
532
assert( size % 2 == 0 ); // size must be even
533
534
int const half_size = size / 2;
535
double* const mid = &out [half_size];
536
537
// Generate right half of sinc
538
for ( int i = 0; i < half_size; i++ )
539
{
540
double angle = (i * 2 + 1) * (M_PI / 2);
541
mid [i] = sin( angle * cutoff ) / angle;
542
}
543
544
kaiser_window( mid, half_size, kaiser );
545
546
// Mirror for left half
547
for ( int i = 0; i < half_size; i++ )
548
out [i] = mid [half_size - 1 - i];
549
}
550
551
void ResampleUtility::gen_sinc_os(double* out, int size, double cutoff, double kaiser)
552
{
553
assert( size % 2 == 1); // size must be odd
554
555
for(int i = 0; i < size; i++)
556
{
557
if(i == (size / 2))
558
out[i] = 2 * M_PI * (cutoff / 2); //0.078478; //1.0; //sin(2 * M_PI * (cutoff / 2) * (i - size / 2)) / (i - (size / 2));
559
else
560
out[i] = sin(2 * M_PI * (cutoff / 2) * (i - size / 2)) / (i - (size / 2));
561
562
// out[i] *= 0.3635819 - 0.4891775 * cos(2 * M_PI * i / (size - 1)) + 0.1365995 * cos(4 * M_PI * i / (size - 1)) - 0.0106411 * cos(6 * M_PI * i / (size - 1));
563
//0.42 - 0.5 * cos(2 * M_PI * i / (size - 1)) + 0.08 * cos(4 * M_PI * i / (size - 1));
564
565
// printf("%d %f\n", i, out[i]);
566
}
567
568
kaiser_window(&out[size / 2], size / 2 + 1, kaiser);
569
570
// Mirror for left half
571
for ( int i = 0; i < size / 2; i++ )
572
out [i] = out [size - 1 - i];
573
574
}
575
576
void ResampleUtility::normalize(double* io, int size, double gain)
577
{
578
double sum = 0;
579
for ( int i = 0; i < size; i++ )
580
sum += io [i];
581
582
double scale = gain / sum;
583
for ( int i = 0; i < size; i++ )
584
io [i] *= scale;
585
}
586
587
void* ResampleUtility::make_aligned(void* ptr, unsigned boundary)
588
{
589
unsigned char* null_ptr = (unsigned char *)NULL;
590
unsigned char* uc_ptr = (unsigned char *)ptr;
591
592
uc_ptr += (boundary - ((uc_ptr - null_ptr) & (boundary - 1))) & (boundary - 1);
593
594
//while((uc_ptr - null_ptr) & (boundary - 1))
595
// uc_ptr++;
596
597
//printf("%16llx %16llx\n", (unsigned long long)ptr, (unsigned long long)uc_ptr);
598
599
assert((uc_ptr - (unsigned char *)ptr) < boundary && (uc_ptr >= (unsigned char *)ptr));
600
601
return uc_ptr;
602
}
603
604