Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/core_audio_stream.cpp
7197 views
1
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "core_audio_stream.h"
5
#include "translation.h"
6
7
#include "common/align.h"
8
#include "common/assert.h"
9
#include "common/error.h"
10
#include "common/gsvector.h"
11
#include "common/log.h"
12
#include "common/settings_interface.h"
13
#include "common/timer.h"
14
15
#include "soundtouch/SoundTouch.h"
16
#include "soundtouch/SoundTouchDLL.h"
17
18
#include <algorithm>
19
#include <cmath>
20
#include <cstring>
21
#include <limits>
22
23
LOG_CHANNEL(AudioStream);
24
25
static constexpr bool LOG_TIMESTRETCH_STATS = false;
26
27
void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
28
{
29
stretch_mode =
30
CoreAudioStream::ParseStretchMode(
31
si.GetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
32
.value_or(DEFAULT_STRETCH_MODE);
33
output_latency_ms = static_cast<u16>(std::min<u32>(
34
si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
35
output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
36
buffer_ms = static_cast<u16>(
37
std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
38
39
stretch_sequence_length_ms =
40
static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
41
std::numeric_limits<u16>::max()));
42
stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
43
si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
44
stretch_overlap_ms = static_cast<u16>(std::min<u32>(
45
si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
46
stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
47
stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
48
}
49
50
void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
51
{
52
si.SetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(stretch_mode));
53
si.SetUIntValue(section, "BufferMS", buffer_ms);
54
si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
55
si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
56
57
si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
58
si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
59
si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
60
si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
61
si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
62
}
63
64
void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
65
{
66
si.DeleteValue(section, "StretchMode");
67
si.DeleteValue(section, "ExpansionMode");
68
si.DeleteValue(section, "BufferMS");
69
si.DeleteValue(section, "OutputLatencyMS");
70
si.DeleteValue(section, "OutputLatencyMinimal");
71
72
si.DeleteValue(section, "StretchSequenceLengthMS");
73
si.DeleteValue(section, "StretchSeekWindowMS");
74
si.DeleteValue(section, "StretchOverlapMS");
75
si.DeleteValue(section, "StretchUseQuickSeek");
76
si.DeleteValue(section, "StretchUseAAFilter");
77
}
78
79
bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
80
{
81
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
82
}
83
84
bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
85
{
86
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
87
}
88
89
CoreAudioStream::CoreAudioStream() = default;
90
91
CoreAudioStream::~CoreAudioStream()
92
{
93
Destroy();
94
}
95
96
bool CoreAudioStream::Initialize(AudioBackend backend, u32 sample_rate, const AudioStreamParameters& params,
97
std::string_view driver_name, std::string_view device_name,
98
Error* error /* = nullptr */)
99
{
100
Destroy();
101
102
m_sample_rate = sample_rate;
103
m_volume = 100;
104
m_parameters = params;
105
m_filling = false;
106
m_paused = false;
107
108
AllocateBuffer();
109
StretchAllocate();
110
111
const u32 output_latency_frames =
112
GetBufferSizeForMS(sample_rate, (params.output_latency_ms != 0) ? params.output_latency_ms : params.buffer_ms);
113
if (backend != AudioBackend::Null)
114
{
115
if (!(m_stream =
116
AudioStream::CreateStream(backend, sample_rate, NUM_CHANNELS, output_latency_frames,
117
params.output_latency_minimal, driver_name, device_name, this, true, error)))
118
{
119
Destroy();
120
return false;
121
}
122
}
123
else
124
{
125
// no point stretching with no output
126
m_parameters = AudioStreamParameters();
127
m_parameters.stretch_mode = AudioStretchMode::Off;
128
m_parameters.buffer_ms = params.buffer_ms;
129
130
// always paused to avoid output
131
m_paused = true;
132
}
133
134
return true;
135
}
136
137
void CoreAudioStream::UpdateParameters(const AudioStreamParameters& params)
138
{
139
constexpr auto copy_stretch_params = [](AudioStreamParameters& dest, const AudioStreamParameters& src) {
140
dest.stretch_mode = src.stretch_mode;
141
dest.stretch_sequence_length_ms = src.stretch_sequence_length_ms;
142
dest.stretch_seekwindow_ms = src.stretch_seekwindow_ms;
143
dest.stretch_overlap_ms = src.stretch_overlap_ms;
144
dest.stretch_use_quickseek = src.stretch_use_quickseek;
145
dest.stretch_use_aa_filter = src.stretch_use_aa_filter;
146
};
147
148
if (params.buffer_ms != m_parameters.buffer_ms)
149
{
150
Error error;
151
152
// have to pause the stream to change buffer size
153
if (m_stream && !m_paused)
154
{
155
if (!m_stream->Stop(&error))
156
{
157
ERROR_LOG("Failed to stop audio stream for buffer size change: {}", error.GetDescription());
158
return;
159
}
160
}
161
162
StretchDestroy();
163
DestroyBuffer();
164
165
m_parameters.buffer_ms = params.buffer_ms;
166
copy_stretch_params(m_parameters, params);
167
168
AllocateBuffer();
169
StretchAllocate();
170
171
if (m_stream && !m_paused)
172
{
173
if (!m_stream->Start(&error))
174
{
175
ERROR_LOG("Failed to start audio stream after buffer size change: {}", error.GetDescription());
176
m_paused = true;
177
}
178
}
179
180
return;
181
}
182
183
if (params.stretch_mode != m_parameters.stretch_mode)
184
{
185
StretchDestroy();
186
copy_stretch_params(m_parameters, params);
187
StretchAllocate();
188
}
189
else
190
{
191
// easier case: just changing stretch settings
192
StretchUpdateParameters(params);
193
}
194
}
195
196
void CoreAudioStream::Destroy()
197
{
198
m_stream.reset();
199
StretchDestroy();
200
DestroyBuffer();
201
m_sample_rate = 0;
202
m_parameters = AudioStreamParameters();
203
m_volume = 0;
204
m_filling = false;
205
m_paused = true;
206
}
207
208
u32 CoreAudioStream::GetAlignedBufferSize(u32 size)
209
{
210
static_assert(Common::IsPow2(CHUNK_SIZE));
211
return Common::AlignUpPow2(size, CHUNK_SIZE);
212
}
213
214
u32 CoreAudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
215
{
216
return GetAlignedBufferSize((ms * sample_rate) / 1000u);
217
}
218
219
u32 CoreAudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
220
{
221
buffer_size = GetAlignedBufferSize(buffer_size);
222
return (buffer_size * 1000u) / sample_rate;
223
}
224
225
static constexpr const std::array s_stretch_mode_names = {
226
"None",
227
"Resample",
228
"TimeStretch",
229
};
230
static constexpr const std::array s_stretch_mode_display_names = {
231
TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
232
TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
233
TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
234
};
235
236
const char* CoreAudioStream::GetStretchModeName(AudioStretchMode mode)
237
{
238
return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
239
"";
240
}
241
242
const char* CoreAudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
243
{
244
return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
245
Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
246
"AudioStretchMode") :
247
"";
248
}
249
250
std::optional<AudioStretchMode> CoreAudioStream::ParseStretchMode(const char* name)
251
{
252
for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
253
{
254
if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
255
return static_cast<AudioStretchMode>(i);
256
}
257
258
return std::nullopt;
259
}
260
261
u32 CoreAudioStream::GetBufferedFramesRelaxed() const
262
{
263
const u32 rpos = m_rpos.load(std::memory_order_relaxed);
264
const u32 wpos = m_wpos.load(std::memory_order_relaxed);
265
return (wpos + m_buffer_size - rpos) % m_buffer_size;
266
}
267
268
void CoreAudioStream::ReadFrames(SampleType* samples, u32 num_frames)
269
{
270
const u32 available_frames = GetBufferedFramesRelaxed();
271
u32 frames_to_read = num_frames;
272
u32 silence_frames = 0;
273
274
if (m_filling)
275
{
276
u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
277
toFill = GetAlignedBufferSize(toFill);
278
279
if (available_frames < toFill)
280
{
281
silence_frames = num_frames;
282
frames_to_read = 0;
283
}
284
else
285
{
286
m_filling = false;
287
VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
288
}
289
}
290
291
if (available_frames < frames_to_read)
292
{
293
silence_frames = frames_to_read - available_frames;
294
frames_to_read = available_frames;
295
m_filling = true;
296
297
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
298
StretchUnderrun();
299
}
300
301
if (frames_to_read > 0)
302
{
303
u32 rpos = m_rpos.load(std::memory_order_acquire);
304
305
u32 end = m_buffer_size - rpos;
306
if (end > frames_to_read)
307
end = frames_to_read;
308
309
// towards the end of the buffer
310
if (end > 0)
311
{
312
std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
313
rpos += end;
314
rpos = (rpos == m_buffer_size) ? 0 : rpos;
315
}
316
317
// after wrapping around
318
const u32 start = frames_to_read - end;
319
if (start > 0)
320
{
321
std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
322
rpos = start;
323
}
324
325
m_rpos.store(rpos, std::memory_order_release);
326
}
327
328
if (silence_frames > 0)
329
{
330
if (frames_to_read > 0)
331
{
332
// super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
333
// aliasing, but better than popping by inserting silence.
334
const u32 increment =
335
static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
336
337
SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
338
std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
339
340
SampleType* out_ptr = samples;
341
const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
342
u32 resample_subpos = 0;
343
for (u32 i = 0; i < num_frames; i++)
344
{
345
std::memcpy(out_ptr, resample_ptr, copy_stride);
346
out_ptr += NUM_CHANNELS;
347
348
resample_subpos += increment;
349
resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
350
resample_subpos %= 65536u;
351
}
352
353
VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
354
}
355
else
356
{
357
// no data, fall back to silence
358
std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
359
}
360
}
361
362
if (m_volume != 100)
363
{
364
u32 num_samples = num_frames * NUM_CHANNELS;
365
366
const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
367
num_samples -= aligned_samples;
368
369
const float volume_mult = static_cast<float>(m_volume) / 100.0f;
370
const GSVector4 volume_multv = GSVector4(volume_mult);
371
const SampleType* const aligned_samples_end = samples + aligned_samples;
372
for (; samples != aligned_samples_end; samples += 8)
373
{
374
GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
375
GSVector4i iv1 = iv.upl16(iv); // [0, 0, 1, 1, 2, 2, 3, 3]
376
GSVector4i iv2 = iv.uph16(iv); // [4, 4, 5, 5, 6, 6, 7, 7]
377
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
378
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
379
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
380
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
381
fv1 = fv1 * volume_multv; // [f0, f1, f2, f3]
382
fv2 = fv2 * volume_multv; // [f4, f5, f6, f7]
383
iv1 = GSVector4i(fv1); // [0, 1, 2, 3]
384
iv2 = GSVector4i(fv2); // [4, 5, 6, 7]
385
iv = iv1.ps32(iv2); // [0, 1, 2, 3, 4, 5, 6, 7]
386
GSVector4i::store<false>(samples, iv);
387
}
388
389
while (num_samples > 0)
390
{
391
*samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
392
samples++;
393
num_samples--;
394
}
395
}
396
}
397
398
void CoreAudioStream::InternalWriteFrames(s16* data, u32 num_frames)
399
{
400
const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
401
if (free <= num_frames)
402
{
403
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
404
{
405
StretchOverrun();
406
}
407
else
408
{
409
DEBUG_LOG("Buffer overrun, chunk dropped");
410
return;
411
}
412
}
413
414
u32 wpos = m_wpos.load(std::memory_order_acquire);
415
416
// wrapping around the end of the buffer?
417
if ((m_buffer_size - wpos) <= num_frames)
418
{
419
// needs to be written in two parts
420
const u32 end = m_buffer_size - wpos;
421
const u32 start = num_frames - end;
422
423
// start is zero when this chunk reaches exactly the end
424
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
425
if (start > 0)
426
std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
427
428
wpos = start;
429
}
430
else
431
{
432
// no split
433
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
434
wpos += num_frames;
435
}
436
437
m_wpos.store(wpos, std::memory_order_release);
438
}
439
440
void CoreAudioStream::AllocateBuffer()
441
{
442
// Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
443
// In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
444
const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
445
16 :
446
((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
447
m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
448
m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
449
450
m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
451
m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
452
m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
453
454
DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
455
m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
456
}
457
458
void CoreAudioStream::DestroyBuffer()
459
{
460
m_staging_buffer.reset();
461
m_float_buffer.reset();
462
m_buffer.reset();
463
m_buffer_size = 0;
464
m_wpos.store(0, std::memory_order_release);
465
m_rpos.store(0, std::memory_order_release);
466
}
467
468
void CoreAudioStream::EmptyBuffer()
469
{
470
if (IsStretchEnabled())
471
{
472
soundtouch_clear(m_soundtouch);
473
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
474
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
475
}
476
477
m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
478
}
479
480
void CoreAudioStream::SetNominalRate(float tempo)
481
{
482
m_nominal_rate = tempo;
483
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
484
soundtouch_setRate(m_soundtouch, tempo);
485
else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
486
soundtouch_setTempo(m_soundtouch, tempo);
487
}
488
489
void CoreAudioStream::SetStretchMode(AudioStretchMode mode)
490
{
491
if (m_parameters.stretch_mode == mode)
492
return;
493
494
// can't resize the buffers while paused
495
bool paused = m_paused;
496
if (!paused)
497
SetPaused(true);
498
499
DestroyBuffer();
500
StretchDestroy();
501
m_parameters.stretch_mode = mode;
502
503
AllocateBuffer();
504
if (m_parameters.stretch_mode != AudioStretchMode::Off)
505
StretchAllocate();
506
507
if (!paused)
508
SetPaused(false);
509
}
510
511
void CoreAudioStream::SetPaused(bool paused)
512
{
513
// force state to always be paused if we're a null output
514
if (m_paused == paused || !m_stream)
515
return;
516
517
Error error;
518
if (!(paused ? m_stream->Stop(&error) : m_stream->Start(&error)))
519
ERROR_LOG("Failed to {} stream: {}", paused ? "pause" : "restart", error.GetDescription());
520
else
521
m_paused = paused;
522
}
523
524
void CoreAudioStream::SetOutputVolume(u32 volume)
525
{
526
m_volume = volume;
527
}
528
529
void CoreAudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
530
{
531
// TODO: Write directly to buffer when not using stretching.
532
*buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
533
*num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
534
}
535
536
static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
537
{
538
constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
539
540
const u32 iterations = (num_samples + 7) / 8;
541
for (u32 i = 0; i < iterations; i++)
542
{
543
const GSVector4i sv = GSVector4i::load<true>(src);
544
src += 8;
545
546
GSVector4i iv1 = sv.upl16(sv); // [0, 0, 1, 1, 2, 2, 3, 3]
547
GSVector4i iv2 = sv.uph16(sv); // [4, 4, 5, 5, 6, 6, 7, 7]
548
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
549
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
550
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
551
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
552
fv1 = fv1 * S16_TO_FLOAT_V;
553
fv2 = fv2 * S16_TO_FLOAT_V;
554
555
GSVector4::store<true>(dst + 0, fv1);
556
GSVector4::store<true>(dst + 4, fv2);
557
dst += 8;
558
}
559
}
560
561
static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
562
{
563
const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
564
565
const u32 iterations = (num_samples + 7) / 8;
566
for (u32 i = 0; i < iterations; i++)
567
{
568
GSVector4 fv1 = GSVector4::load<true>(src + 0);
569
GSVector4 fv2 = GSVector4::load<true>(src + 4);
570
src += 8;
571
572
fv1 = fv1 * FLOAT_TO_S16_V;
573
fv2 = fv2 * FLOAT_TO_S16_V;
574
GSVector4i iv1 = GSVector4i(fv1);
575
GSVector4i iv2 = GSVector4i(fv2);
576
577
const GSVector4i iv = iv1.ps32(iv2);
578
GSVector4i::store<true>(dst, iv);
579
dst += 8;
580
}
581
}
582
583
void CoreAudioStream::EndWrite(u32 num_frames)
584
{
585
// don't bother committing anything when muted
586
if (m_volume == 0 || m_paused)
587
return;
588
589
m_staging_buffer_pos += num_frames * NUM_CHANNELS;
590
DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
591
if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
592
return;
593
594
m_staging_buffer_pos = 0;
595
596
if (!IsStretchEnabled())
597
{
598
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
599
return;
600
}
601
602
S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
603
StretchWriteBlock(m_float_buffer.get());
604
}
605
606
// Time stretching algorithm based on PCSX2 implementation.
607
608
template<class T>
609
ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
610
{
611
return (min <= val && val <= max);
612
}
613
614
void CoreAudioStream::StretchAllocate()
615
{
616
if (m_parameters.stretch_mode == AudioStretchMode::Off)
617
return;
618
619
m_soundtouch = soundtouch_createInstance();
620
soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
621
soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
622
623
soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
624
soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
625
626
soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
627
soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
628
soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
629
630
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
631
soundtouch_setRate(m_soundtouch, m_nominal_rate);
632
else
633
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
634
635
m_stretch_reset = STRETCH_RESET_THRESHOLD;
636
m_stretch_inactive = false;
637
m_stretch_ok_count = 0;
638
m_dynamic_target_usage = 0.0f;
639
m_average_position = 0;
640
m_average_available = 0;
641
642
m_staging_buffer_pos = 0;
643
}
644
645
void CoreAudioStream::StretchUpdateParameters(const AudioStreamParameters& params)
646
{
647
if (m_parameters.stretch_mode == AudioStretchMode::Off)
648
return;
649
650
if (params.stretch_use_quickseek != m_parameters.stretch_use_quickseek)
651
{
652
m_parameters.stretch_use_quickseek = params.stretch_use_quickseek;
653
soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
654
}
655
656
if (params.stretch_use_aa_filter != m_parameters.stretch_use_aa_filter)
657
{
658
m_parameters.stretch_use_aa_filter = params.stretch_use_aa_filter;
659
soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
660
}
661
662
if (params.stretch_sequence_length_ms != m_parameters.stretch_sequence_length_ms)
663
{
664
m_parameters.stretch_sequence_length_ms = params.stretch_sequence_length_ms;
665
soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
666
}
667
668
if (params.stretch_seekwindow_ms != m_parameters.stretch_seekwindow_ms)
669
{
670
m_parameters.stretch_seekwindow_ms = params.stretch_seekwindow_ms;
671
soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
672
}
673
674
if (params.stretch_overlap_ms != m_parameters.stretch_overlap_ms)
675
{
676
m_parameters.stretch_overlap_ms = params.stretch_overlap_ms;
677
soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
678
}
679
}
680
681
void CoreAudioStream::StretchDestroy()
682
{
683
if (m_soundtouch)
684
{
685
soundtouch_destroyInstance(m_soundtouch);
686
m_soundtouch = nullptr;
687
}
688
}
689
690
void CoreAudioStream::StretchWriteBlock(const float* block)
691
{
692
if (IsStretchEnabled())
693
{
694
soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
695
696
u32 tempProgress;
697
while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
698
{
699
FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
700
InternalWriteFrames(m_staging_buffer.get(), tempProgress);
701
}
702
703
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
704
UpdateStretchTempo();
705
}
706
else
707
{
708
FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
709
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
710
}
711
}
712
713
float CoreAudioStream::AddAndGetAverageTempo(float val)
714
{
715
static constexpr u32 AVERAGING_WINDOW = 50;
716
717
// Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
718
if (m_average_available < AVERAGING_BUFFER_SIZE)
719
m_average_available++;
720
721
m_average_fullness[m_average_position] = val;
722
m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
723
724
// The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
725
const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
726
u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
727
float sum = 0.0f;
728
u32 count = 0;
729
730
#ifdef CPU_ARCH_SIMD
731
GSVector4 vsum = GSVector4::zero();
732
const u32 vcount = Common::AlignDownPow2(actual_window, 4);
733
for (; count < vcount; count += 4)
734
{
735
if ((index + 4) > AVERAGING_BUFFER_SIZE)
736
{
737
// wraparound
738
for (u32 i = 0; i < 4; i++)
739
{
740
sum += m_average_fullness[index];
741
index = (index + 1) % AVERAGING_BUFFER_SIZE;
742
}
743
}
744
else
745
{
746
vsum += GSVector4::load<false>(&m_average_fullness[index]);
747
index = (index + 4) % AVERAGING_BUFFER_SIZE;
748
}
749
}
750
sum += vsum.addv();
751
#endif
752
for (; count < actual_window; count++)
753
{
754
sum += m_average_fullness[index];
755
index = (index + 1) % AVERAGING_BUFFER_SIZE;
756
}
757
sum /= static_cast<float>(actual_window);
758
759
return (sum != 0.0f) ? sum : 1.0f;
760
}
761
762
void CoreAudioStream::UpdateStretchTempo()
763
{
764
static constexpr float MIN_TEMPO = 0.05f;
765
static constexpr float MAX_TEMPO = 500.0f;
766
767
// Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
768
// i.e. this is the range we will run in 1:1 mode for.
769
static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
770
static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
771
772
// Require sustained good performance before deactivating.
773
static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
774
static constexpr u32 COMPENSATION_DIVIDER = 100;
775
776
// Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
777
// of the target speed, but need additional buffering when intentionally running below 100%.
778
float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
779
780
// tempo = current_buffer / target_buffer.
781
const u32 ibuffer_usage = GetBufferedFramesRelaxed();
782
float buffer_usage = static_cast<float>(ibuffer_usage);
783
float tempo = buffer_usage / m_dynamic_target_usage;
784
785
// Prevents the system from getting stuck in a bad state due to accumulated errors.
786
if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
787
{
788
VERBOSE_LOG("___ Stretcher is being reset.");
789
m_stretch_inactive = false;
790
m_stretch_ok_count = 0;
791
m_dynamic_target_usage = base_target_usage;
792
m_average_available = 0;
793
m_average_position = 0;
794
m_stretch_reset = 0;
795
tempo = m_nominal_rate;
796
}
797
else if (m_stretch_reset > 0)
798
{
799
// Back off resets if enough time has passed. That way a very occasional lag/overflow
800
// doesn't cascade into unnecessary tempo adjustment.
801
const u64 now = Timer::GetCurrentValue();
802
if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
803
{
804
m_stretch_reset--;
805
m_stretch_reset_time = now;
806
}
807
}
808
809
// Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
810
tempo = AddAndGetAverageTempo(tempo);
811
812
// Apply non-linear dampening when close to target to reduce oscillation.
813
if (tempo < 2.0f)
814
tempo = std::sqrt(tempo);
815
816
tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
817
818
if (tempo < 1.0f)
819
base_target_usage /= std::sqrt(tempo);
820
821
// Gradually adjust our dynamic target toward what would give us the desired tempo.
822
m_dynamic_target_usage +=
823
static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
824
825
// Snap back to baseline if we're very close.
826
if (IsInRange(tempo, 0.9f, 1.1f) &&
827
IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
828
{
829
m_dynamic_target_usage = base_target_usage;
830
}
831
832
// Are we changing the active state?
833
if (!m_stretch_inactive)
834
{
835
if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
836
m_stretch_ok_count++;
837
else
838
m_stretch_ok_count = 0;
839
840
if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
841
{
842
VERBOSE_LOG("=== Stretcher is now inactive.");
843
m_stretch_inactive = true;
844
}
845
}
846
else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
847
{
848
VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
849
m_stretch_inactive = false;
850
m_stretch_ok_count = 0;
851
}
852
853
// If we're inactive, we don't want to change the tempo.
854
if (m_stretch_inactive)
855
tempo = m_nominal_rate;
856
857
if constexpr (LOG_TIMESTRETCH_STATS)
858
{
859
static float min_tempo = 0.0f;
860
static float max_tempo = 0.0f;
861
static float acc_tempo = 0.0f;
862
static u32 acc_cnt = 0;
863
acc_tempo += tempo;
864
acc_cnt++;
865
min_tempo = std::min(min_tempo, tempo);
866
max_tempo = std::max(max_tempo, tempo);
867
868
static int iterations = 0;
869
static u64 last_log_time = 0;
870
871
const u64 now = Timer::GetCurrentValue();
872
873
if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
874
{
875
const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
876
877
VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
878
(ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
879
min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
880
881
last_log_time = now;
882
iterations = 0;
883
884
min_tempo = std::numeric_limits<float>::max();
885
max_tempo = std::numeric_limits<float>::min();
886
acc_tempo = 0.0f;
887
acc_cnt = 0;
888
}
889
890
iterations++;
891
}
892
893
soundtouch_setTempo(m_soundtouch, tempo);
894
}
895
896
void CoreAudioStream::StretchUnderrun()
897
{
898
// Didn't produce enough frames in time.
899
m_stretch_reset++;
900
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
901
m_stretch_reset_time = Timer::GetCurrentValue();
902
}
903
904
void CoreAudioStream::StretchOverrun()
905
{
906
// Produced more frames than can fit in the buffer.
907
m_stretch_reset++;
908
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
909
m_stretch_reset_time = Timer::GetCurrentValue();
910
911
// Drop two packets to give the time stretcher a bit more time to slow things down.
912
// This prevents a cascading overrun situation where each overrun makes the next one more likely.
913
const u32 discard = CHUNK_SIZE * 2;
914
m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
915
}
916
917
void CoreAudioStream::EmptyStretchBuffers()
918
{
919
if (!IsStretchEnabled())
920
return;
921
922
m_stretch_reset = STRETCH_RESET_THRESHOLD;
923
924
// Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
925
// we'll still have quite a large buffer of samples that will be played back at a low tempo,
926
// resulting in a long delay before the audio starts playing at the new tempo.
927
soundtouch_clear(m_soundtouch);
928
}
929
930