CoCalc -- audio_stream.cpp

GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/audio_stream.cpp
⁴²¹⁴ views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3

4
#include "audio_stream.h"
5
#include "host.h"
6

7
#include "common/align.h"
8
#include "common/assert.h"
9
#include "common/error.h"
10
#include "common/gsvector.h"
11
#include "common/log.h"
12
#include "common/settings_interface.h"
13
#include "common/timer.h"
14

15
#include "soundtouch/SoundTouch.h"
16
#include "soundtouch/SoundTouchDLL.h"
17

18
#include <algorithm>
19
#include <cmath>
20
#include <cstring>
21
#include <limits>
22

23
LOG_CHANNEL(AudioStream);
24

25
static constexpr bool LOG_TIMESTRETCH_STATS = false;
26

27
AudioStream::DeviceInfo::DeviceInfo(std::string name_, std::string display_name_, u32 minimum_latency_)
28
  : name(std::move(name_)), display_name(std::move(display_name_)), minimum_latency_frames(minimum_latency_)
29
{
30
}
31

32
AudioStream::DeviceInfo::~DeviceInfo() = default;
33

34
void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
35
{
36
  stretch_mode =
37
    AudioStream::ParseStretchMode(
38
      si.GetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
39
      .value_or(DEFAULT_STRETCH_MODE);
40
  output_latency_ms = static_cast<u16>(std::min<u32>(
41
    si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
42
  output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
43
  buffer_ms = static_cast<u16>(
44
    std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
45

46
  stretch_sequence_length_ms =
47
    static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
48
                                   std::numeric_limits<u16>::max()));
49
  stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
50
    si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
51
  stretch_overlap_ms = static_cast<u16>(std::min<u32>(
52
    si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
53
  stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
54
  stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
55
}
56

57
void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
58
{
59
  si.SetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(stretch_mode));
60
  si.SetUIntValue(section, "BufferMS", buffer_ms);
61
  si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
62
  si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
63

64
  si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
65
  si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
66
  si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
67
  si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
68
  si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
69
}
70

71
void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
72
{
73
  si.DeleteValue(section, "StretchMode");
74
  si.DeleteValue(section, "ExpansionMode");
75
  si.DeleteValue(section, "BufferMS");
76
  si.DeleteValue(section, "OutputLatencyMS");
77
  si.DeleteValue(section, "OutputLatencyMinimal");
78

79
  si.DeleteValue(section, "StretchSequenceLengthMS");
80
  si.DeleteValue(section, "StretchSeekWindowMS");
81
  si.DeleteValue(section, "StretchOverlapMS");
82
  si.DeleteValue(section, "StretchUseQuickSeek");
83
  si.DeleteValue(section, "StretchUseAAFilter");
84
}
85

86
bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
87
{
88
  return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
89
}
90

91
bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
92
{
93
  return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
94
}
95

96
AudioStream::AudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
97
  : m_sample_rate(sample_rate), m_parameters(parameters)
98
{
99
}
100

101
AudioStream::~AudioStream()
102
{
103
  StretchDestroy();
104
  DestroyBuffer();
105
}
106

107
std::unique_ptr<AudioStream> AudioStream::CreateNullStream(u32 sample_rate, u32 buffer_ms)
108
{
109
  // no point stretching with no output
110
  AudioStreamParameters params;
111
  params.stretch_mode = AudioStretchMode::Off;
112
  params.buffer_ms = static_cast<u16>(buffer_ms);
113

114
  std::unique_ptr<AudioStream> stream(new AudioStream(sample_rate, params));
115
  stream->BaseInitialize();
116
  return stream;
117
}
118

119
std::vector<std::pair<std::string, std::string>> AudioStream::GetDriverNames(AudioBackend backend)
120
{
121
  std::vector<std::pair<std::string, std::string>> ret;
122
  switch (backend)
123
  {
124
#ifndef __ANDROID__
125
    case AudioBackend::Cubeb:
126
      ret = GetCubebDriverNames();
127
      break;
128
#endif
129

130
    default:
131
      break;
132
  }
133

134
  return ret;
135
}
136

137
std::vector<AudioStream::DeviceInfo> AudioStream::GetOutputDevices(AudioBackend backend, const char* driver,
138
                                                                   u32 sample_rate)
139
{
140
  std::vector<AudioStream::DeviceInfo> ret;
141
  switch (backend)
142
  {
143
#ifndef __ANDROID__
144
    case AudioBackend::Cubeb:
145
      ret = GetCubebOutputDevices(driver, sample_rate);
146
      break;
147
#endif
148

149
    default:
150
      break;
151
  }
152

153
  return ret;
154
}
155

156
std::unique_ptr<AudioStream> AudioStream::CreateStream(AudioBackend backend, u32 sample_rate,
157
                                                       const AudioStreamParameters& parameters, const char* driver_name,
158
                                                       const char* device_name, Error* error /* = nullptr */)
159
{
160
  switch (backend)
161
  {
162
#ifndef __ANDROID__
163
    case AudioBackend::Cubeb:
164
      return CreateCubebAudioStream(sample_rate, parameters, driver_name, device_name, error);
165

166
    case AudioBackend::SDL:
167
      return CreateSDLAudioStream(sample_rate, parameters, error);
168
#else
169
    case AudioBackend::AAudio:
170
      return CreateAAudioAudioStream(sample_rate, parameters, error);
171

172
    case AudioBackend::OpenSLES:
173
      return CreateOpenSLESAudioStream(sample_rate, parameters, error);
174
#endif
175

176
    case AudioBackend::Null:
177
      return CreateNullStream(sample_rate, parameters.buffer_ms);
178

179
    default:
180
      Error::SetStringView(error, "Unknown audio backend.");
181
      return nullptr;
182
  }
183
}
184

185
u32 AudioStream::GetAlignedBufferSize(u32 size)
186
{
187
  static_assert(Common::IsPow2(CHUNK_SIZE));
188
  return Common::AlignUpPow2(size, CHUNK_SIZE);
189
}
190

191
u32 AudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
192
{
193
  return GetAlignedBufferSize((ms * sample_rate) / 1000u);
194
}
195

196
u32 AudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
197
{
198
  buffer_size = GetAlignedBufferSize(buffer_size);
199
  return (buffer_size * 1000u) / sample_rate;
200
}
201

202
static constexpr const std::array s_backend_names = {
203
  "Null",
204
#ifndef __ANDROID__
205
  "Cubeb",
206
  "SDL",
207
#else
208
  "AAudio",
209
  "OpenSLES",
210
#endif
211
};
212
static constexpr const std::array s_backend_display_names = {
213
  TRANSLATE_DISAMBIG_NOOP("Settings", "Null (No Output)", "AudioBackend"),
214
#ifndef __ANDROID__
215
  TRANSLATE_DISAMBIG_NOOP("Settings", "Cubeb", "AudioBackend"),
216
  TRANSLATE_DISAMBIG_NOOP("Settings", "SDL", "AudioBackend"),
217
#else
218
  "AAudio",
219
  "OpenSL ES",
220
#endif
221
};
222

223
std::optional<AudioBackend> AudioStream::ParseBackendName(const char* str)
224
{
225
  int index = 0;
226
  for (const char* name : s_backend_names)
227
  {
228
    if (std::strcmp(name, str) == 0)
229
      return static_cast<AudioBackend>(index);
230

231
    index++;
232
  }
233

234
  return std::nullopt;
235
}
236

237
const char* AudioStream::GetBackendName(AudioBackend backend)
238
{
239
  return s_backend_names[static_cast<int>(backend)];
240
}
241

242
const char* AudioStream::GetBackendDisplayName(AudioBackend backend)
243
{
244
  return Host::TranslateToCString("AudioStream", s_backend_display_names[static_cast<int>(backend)]);
245
}
246

247
static constexpr const std::array s_stretch_mode_names = {
248
  "None",
249
  "Resample",
250
  "TimeStretch",
251
};
252
static constexpr const std::array s_stretch_mode_display_names = {
253
  TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
254
  TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
255
  TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
256
};
257

258
const char* AudioStream::GetStretchModeName(AudioStretchMode mode)
259
{
260
  return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
261
                                                                     "";
262
}
263

264
const char* AudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
265
{
266
  return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
267
           Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
268
                                    "AudioStretchMode") :
269
           "";
270
}
271

272
std::optional<AudioStretchMode> AudioStream::ParseStretchMode(const char* name)
273
{
274
  for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
275
  {
276
    if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
277
      return static_cast<AudioStretchMode>(i);
278
  }
279

280
  return std::nullopt;
281
}
282

283
u32 AudioStream::GetBufferedFramesRelaxed() const
284
{
285
  const u32 rpos = m_rpos.load(std::memory_order_relaxed);
286
  const u32 wpos = m_wpos.load(std::memory_order_relaxed);
287
  return (wpos + m_buffer_size - rpos) % m_buffer_size;
288
}
289

290
void AudioStream::ReadFrames(SampleType* samples, u32 num_frames)
291
{
292
  const u32 available_frames = GetBufferedFramesRelaxed();
293
  u32 frames_to_read = num_frames;
294
  u32 silence_frames = 0;
295

296
  if (m_filling)
297
  {
298
    u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
299
    toFill = GetAlignedBufferSize(toFill);
300

301
    if (available_frames < toFill)
302
    {
303
      silence_frames = num_frames;
304
      frames_to_read = 0;
305
    }
306
    else
307
    {
308
      m_filling = false;
309
      VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
310
    }
311
  }
312

313
  if (available_frames < frames_to_read)
314
  {
315
    silence_frames = frames_to_read - available_frames;
316
    frames_to_read = available_frames;
317
    m_filling = true;
318

319
    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
320
      StretchUnderrun();
321
  }
322

323
  if (frames_to_read > 0)
324
  {
325
    u32 rpos = m_rpos.load(std::memory_order_acquire);
326

327
    u32 end = m_buffer_size - rpos;
328
    if (end > frames_to_read)
329
      end = frames_to_read;
330

331
    // towards the end of the buffer
332
    if (end > 0)
333
    {
334
      std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
335
      rpos += end;
336
      rpos = (rpos == m_buffer_size) ? 0 : rpos;
337
    }
338

339
    // after wrapping around
340
    const u32 start = frames_to_read - end;
341
    if (start > 0)
342
    {
343
      std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
344
      rpos = start;
345
    }
346

347
    m_rpos.store(rpos, std::memory_order_release);
348
  }
349

350
  if (silence_frames > 0)
351
  {
352
    if (frames_to_read > 0)
353
    {
354
      // super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
355
      // aliasing, but better than popping by inserting silence.
356
      const u32 increment =
357
        static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
358

359
      SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
360
      std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
361

362
      SampleType* out_ptr = samples;
363
      const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
364
      u32 resample_subpos = 0;
365
      for (u32 i = 0; i < num_frames; i++)
366
      {
367
        std::memcpy(out_ptr, resample_ptr, copy_stride);
368
        out_ptr += NUM_CHANNELS;
369

370
        resample_subpos += increment;
371
        resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
372
        resample_subpos %= 65536u;
373
      }
374

375
      VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
376
    }
377
    else
378
    {
379
      // no data, fall back to silence
380
      std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
381
    }
382
  }
383

384
  if (m_volume != 100)
385
  {
386
    u32 num_samples = num_frames * NUM_CHANNELS;
387

388
    const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
389
    num_samples -= aligned_samples;
390

391
    const float volume_mult = static_cast<float>(m_volume) / 100.0f;
392
    const GSVector4 volume_multv = GSVector4(volume_mult);
393
    const SampleType* const aligned_samples_end = samples + aligned_samples;
394
    for (; samples != aligned_samples_end; samples += 8)
395
    {
396
      GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
397
      GSVector4i iv1 = iv.upl16(iv);                    // [0, 0, 1, 1, 2, 2, 3, 3]
398
      GSVector4i iv2 = iv.uph16(iv);                    // [4, 4, 5, 5, 6, 6, 7, 7]
399
      iv1 = iv1.sra32<16>();                            // [0, 1, 2, 3]
400
      iv2 = iv2.sra32<16>();                            // [4, 5, 6, 7]
401
      GSVector4 fv1 = GSVector4(iv1);                   // [f0, f1, f2, f3]
402
      GSVector4 fv2 = GSVector4(iv2);                   // [f4, f5, f6, f7]
403
      fv1 = fv1 * volume_multv;                         // [f0, f1, f2, f3]
404
      fv2 = fv2 * volume_multv;                         // [f4, f5, f6, f7]
405
      iv1 = GSVector4i(fv1);                            // [0, 1, 2, 3]
406
      iv2 = GSVector4i(fv2);                            // [4, 5, 6, 7]
407
      iv = iv1.ps32(iv2);                               // [0, 1, 2, 3, 4, 5, 6, 7]
408
      GSVector4i::store<false>(samples, iv);
409
    }
410

411
    while (num_samples > 0)
412
    {
413
      *samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
414
      samples++;
415
      num_samples--;
416
    }
417
  }
418
}
419

420
void AudioStream::InternalWriteFrames(s16* data, u32 num_frames)
421
{
422
  const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
423
  if (free <= num_frames)
424
  {
425
    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
426
    {
427
      StretchOverrun();
428
    }
429
    else
430
    {
431
      DEBUG_LOG("Buffer overrun, chunk dropped");
432
      return;
433
    }
434
  }
435

436
  u32 wpos = m_wpos.load(std::memory_order_acquire);
437

438
  // wrapping around the end of the buffer?
439
  if ((m_buffer_size - wpos) <= num_frames)
440
  {
441
    // needs to be written in two parts
442
    const u32 end = m_buffer_size - wpos;
443
    const u32 start = num_frames - end;
444

445
    // start is zero when this chunk reaches exactly the end
446
    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
447
    if (start > 0)
448
      std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
449

450
    wpos = start;
451
  }
452
  else
453
  {
454
    // no split
455
    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
456
    wpos += num_frames;
457
  }
458

459
  m_wpos.store(wpos, std::memory_order_release);
460
}
461

462
void AudioStream::BaseInitialize()
463
{
464
  AllocateBuffer();
465
  StretchAllocate();
466
}
467

468
void AudioStream::AllocateBuffer()
469
{
470
  // Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
471
  // In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
472
  const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
473
                           16 :
474
                           ((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
475
  m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
476
  m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
477

478
  m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
479
  m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
480
  m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
481

482
  DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
483
          m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
484
}
485

486
void AudioStream::DestroyBuffer()
487
{
488
  m_staging_buffer.reset();
489
  m_float_buffer.reset();
490
  m_buffer.reset();
491
  m_buffer_size = 0;
492
  m_wpos.store(0, std::memory_order_release);
493
  m_rpos.store(0, std::memory_order_release);
494
}
495

496
void AudioStream::EmptyBuffer()
497
{
498
  if (IsStretchEnabled())
499
  {
500
    soundtouch_clear(m_soundtouch);
501
    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
502
      soundtouch_setTempo(m_soundtouch, m_nominal_rate);
503
  }
504

505
  m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
506
}
507

508
void AudioStream::SetNominalRate(float tempo)
509
{
510
  m_nominal_rate = tempo;
511
  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
512
    soundtouch_setRate(m_soundtouch, tempo);
513
  else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
514
    soundtouch_setTempo(m_soundtouch, tempo);
515
}
516

517
void AudioStream::SetStretchMode(AudioStretchMode mode)
518
{
519
  if (m_parameters.stretch_mode == mode)
520
    return;
521

522
  // can't resize the buffers while paused
523
  bool paused = m_paused;
524
  if (!paused)
525
    SetPaused(true);
526

527
  DestroyBuffer();
528
  StretchDestroy();
529
  m_parameters.stretch_mode = mode;
530

531
  AllocateBuffer();
532
  if (m_parameters.stretch_mode != AudioStretchMode::Off)
533
    StretchAllocate();
534

535
  if (!paused)
536
    SetPaused(false);
537
}
538

539
void AudioStream::SetPaused(bool paused)
540
{
541
  m_paused = paused;
542
}
543

544
void AudioStream::SetOutputVolume(u32 volume)
545
{
546
  m_volume = volume;
547
}
548

549
void AudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
550
{
551
  // TODO: Write directly to buffer when not using stretching.
552
  *buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
553
  *num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
554
}
555

556
static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
557
{
558
  constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
559

560
  const u32 iterations = (num_samples + 7) / 8;
561
  for (u32 i = 0; i < iterations; i++)
562
  {
563
    const GSVector4i sv = GSVector4i::load<true>(src);
564
    src += 8;
565

566
    GSVector4i iv1 = sv.upl16(sv);  // [0, 0, 1, 1, 2, 2, 3, 3]
567
    GSVector4i iv2 = sv.uph16(sv);  // [4, 4, 5, 5, 6, 6, 7, 7]
568
    iv1 = iv1.sra32<16>();          // [0, 1, 2, 3]
569
    iv2 = iv2.sra32<16>();          // [4, 5, 6, 7]
570
    GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
571
    GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
572
    fv1 = fv1 * S16_TO_FLOAT_V;
573
    fv2 = fv2 * S16_TO_FLOAT_V;
574

575
    GSVector4::store<true>(dst + 0, fv1);
576
    GSVector4::store<true>(dst + 4, fv2);
577
    dst += 8;
578
  }
579
}
580

581
static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
582
{
583
  const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
584

585
  const u32 iterations = (num_samples + 7) / 8;
586
  for (u32 i = 0; i < iterations; i++)
587
  {
588
    GSVector4 fv1 = GSVector4::load<true>(src + 0);
589
    GSVector4 fv2 = GSVector4::load<true>(src + 4);
590
    src += 8;
591

592
    fv1 = fv1 * FLOAT_TO_S16_V;
593
    fv2 = fv2 * FLOAT_TO_S16_V;
594
    GSVector4i iv1 = GSVector4i(fv1);
595
    GSVector4i iv2 = GSVector4i(fv2);
596

597
    const GSVector4i iv = iv1.ps32(iv2);
598
    GSVector4i::store<true>(dst, iv);
599
    dst += 8;
600
  }
601
}
602

603
void AudioStream::EndWrite(u32 num_frames)
604
{
605
  // don't bother committing anything when muted
606
  if (m_volume == 0)
607
    return;
608

609
  m_staging_buffer_pos += num_frames * NUM_CHANNELS;
610
  DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
611
  if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
612
    return;
613

614
  m_staging_buffer_pos = 0;
615

616
  if (!IsStretchEnabled())
617
  {
618
    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
619
    return;
620
  }
621

622
  S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
623
  StretchWriteBlock(m_float_buffer.get());
624
}
625

626
// Time stretching algorithm based on PCSX2 implementation.
627

628
template<class T>
629
ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
630
{
631
  return (min <= val && val <= max);
632
}
633

634
void AudioStream::StretchAllocate()
635
{
636
  if (m_parameters.stretch_mode == AudioStretchMode::Off)
637
    return;
638

639
  m_soundtouch = soundtouch_createInstance();
640
  soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
641
  soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
642

643
  soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
644
  soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
645

646
  soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
647
  soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
648
  soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
649

650
  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
651
    soundtouch_setRate(m_soundtouch, m_nominal_rate);
652
  else
653
    soundtouch_setTempo(m_soundtouch, m_nominal_rate);
654

655
  m_stretch_reset = STRETCH_RESET_THRESHOLD;
656
  m_stretch_inactive = false;
657
  m_stretch_ok_count = 0;
658
  m_dynamic_target_usage = 0.0f;
659
  m_average_position = 0;
660
  m_average_available = 0;
661

662
  m_staging_buffer_pos = 0;
663
}
664

665
void AudioStream::StretchDestroy()
666
{
667
  if (m_soundtouch)
668
  {
669
    soundtouch_destroyInstance(m_soundtouch);
670
    m_soundtouch = nullptr;
671
  }
672
}
673

674
void AudioStream::StretchWriteBlock(const float* block)
675
{
676
  if (IsStretchEnabled())
677
  {
678
    soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
679

680
    u32 tempProgress;
681
    while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
682
    {
683
      FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
684
      InternalWriteFrames(m_staging_buffer.get(), tempProgress);
685
    }
686

687
    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
688
      UpdateStretchTempo();
689
  }
690
  else
691
  {
692
    FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
693
    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
694
  }
695
}
696

697
float AudioStream::AddAndGetAverageTempo(float val)
698
{
699
  static constexpr u32 AVERAGING_WINDOW = 50;
700

701
  // Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
702
  if (m_average_available < AVERAGING_BUFFER_SIZE)
703
    m_average_available++;
704

705
  m_average_fullness[m_average_position] = val;
706
  m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
707

708
  // The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
709
  const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
710
  u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
711
  float sum = 0.0f;
712
  u32 count = 0;
713

714
#ifdef CPU_ARCH_SIMD
715
  GSVector4 vsum = GSVector4::zero();
716
  const u32 vcount = Common::AlignDownPow2(actual_window, 4);
717
  for (; count < vcount; count += 4)
718
  {
719
    if ((index + 4) > AVERAGING_BUFFER_SIZE)
720
    {
721
      // wraparound
722
      for (u32 i = 0; i < 4; i++)
723
      {
724
        sum += m_average_fullness[index];
725
        index = (index + 1) % AVERAGING_BUFFER_SIZE;
726
      }
727
    }
728
    else
729
    {
730
      vsum += GSVector4::load<false>(&m_average_fullness[index]);
731
      index = (index + 4) % AVERAGING_BUFFER_SIZE;
732
    }
733
  }
734
  sum += vsum.addv();
735
#endif
736
  for (; count < actual_window; count++)
737
  {
738
    sum += m_average_fullness[index];
739
    index = (index + 1) % AVERAGING_BUFFER_SIZE;
740
  }
741
  sum /= static_cast<float>(actual_window);
742

743
  return (sum != 0.0f) ? sum : 1.0f;
744
}
745

746
void AudioStream::UpdateStretchTempo()
747
{
748
  static constexpr float MIN_TEMPO = 0.05f;
749
  static constexpr float MAX_TEMPO = 500.0f;
750

751
  // Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
752
  // i.e. this is the range we will run in 1:1 mode for.
753
  static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
754
  static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
755

756
  // Require sustained good performance before deactivating.
757
  static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
758
  static constexpr u32 COMPENSATION_DIVIDER = 100;
759

760
  // Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
761
  // of the target speed, but need additional buffering when intentionally running below 100%.
762
  float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
763

764
  // tempo = current_buffer / target_buffer.
765
  const u32 ibuffer_usage = GetBufferedFramesRelaxed();
766
  float buffer_usage = static_cast<float>(ibuffer_usage);
767
  float tempo = buffer_usage / m_dynamic_target_usage;
768

769
  // Prevents the system from getting stuck in a bad state due to accumulated errors.
770
  if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
771
  {
772
    VERBOSE_LOG("___ Stretcher is being reset.");
773
    m_stretch_inactive = false;
774
    m_stretch_ok_count = 0;
775
    m_dynamic_target_usage = base_target_usage;
776
    m_average_available = 0;
777
    m_average_position = 0;
778
    m_stretch_reset = 0;
779
    tempo = m_nominal_rate;
780
  }
781
  else if (m_stretch_reset > 0)
782
  {
783
    // Back off resets if enough time has passed. That way a very occasional lag/overflow
784
    // doesn't cascade into unnecessary tempo adjustment.
785
    const u64 now = Timer::GetCurrentValue();
786
    if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
787
    {
788
      m_stretch_reset--;
789
      m_stretch_reset_time = now;
790
    }
791
  }
792

793
  // Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
794
  tempo = AddAndGetAverageTempo(tempo);
795

796
  // Apply non-linear dampening when close to target to reduce oscillation.
797
  if (tempo < 2.0f)
798
    tempo = std::sqrt(tempo);
799

800
  tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
801

802
  if (tempo < 1.0f)
803
    base_target_usage /= std::sqrt(tempo);
804

805
  // Gradually adjust our dynamic target toward what would give us the desired tempo.
806
  m_dynamic_target_usage +=
807
    static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
808

809
  // Snap back to baseline if we're very close.
810
  if (IsInRange(tempo, 0.9f, 1.1f) &&
811
      IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
812
  {
813
    m_dynamic_target_usage = base_target_usage;
814
  }
815

816
  // Are we changing the active state?
817
  if (!m_stretch_inactive)
818
  {
819
    if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
820
      m_stretch_ok_count++;
821
    else
822
      m_stretch_ok_count = 0;
823

824
    if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
825
    {
826
      VERBOSE_LOG("=== Stretcher is now inactive.");
827
      m_stretch_inactive = true;
828
    }
829
  }
830
  else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
831
  {
832
    VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
833
    m_stretch_inactive = false;
834
    m_stretch_ok_count = 0;
835
  }
836

837
  // If we're inactive, we don't want to change the tempo.
838
  if (m_stretch_inactive)
839
    tempo = m_nominal_rate;
840

841
  if constexpr (LOG_TIMESTRETCH_STATS)
842
  {
843
    static float min_tempo = 0.0f;
844
    static float max_tempo = 0.0f;
845
    static float acc_tempo = 0.0f;
846
    static u32 acc_cnt = 0;
847
    acc_tempo += tempo;
848
    acc_cnt++;
849
    min_tempo = std::min(min_tempo, tempo);
850
    max_tempo = std::max(max_tempo, tempo);
851

852
    static int iterations = 0;
853
    static u64 last_log_time = 0;
854

855
    const u64 now = Timer::GetCurrentValue();
856

857
    if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
858
    {
859
      const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
860

861
      VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
862
                  (ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
863
                  min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
864

865
      last_log_time = now;
866
      iterations = 0;
867

868
      min_tempo = std::numeric_limits<float>::max();
869
      max_tempo = std::numeric_limits<float>::min();
870
      acc_tempo = 0.0f;
871
      acc_cnt = 0;
872
    }
873

874
    iterations++;
875
  }
876

877
  soundtouch_setTempo(m_soundtouch, tempo);
878
}
879

880
void AudioStream::StretchUnderrun()
881
{
882
  // Didn't produce enough frames in time.
883
  m_stretch_reset++;
884
  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
885
    m_stretch_reset_time = Timer::GetCurrentValue();
886
}
887

888
void AudioStream::StretchOverrun()
889
{
890
  // Produced more frames than can fit in the buffer.
891
  m_stretch_reset++;
892
  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
893
    m_stretch_reset_time = Timer::GetCurrentValue();
894

895
  // Drop two packets to give the time stretcher a bit more time to slow things down.
896
  // This prevents a cascading overrun situation where each overrun makes the next one more likely.
897
  const u32 discard = CHUNK_SIZE * 2;
898
  m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
899
}
900

901
void AudioStream::EmptyStretchBuffers()
902
{
903
  if (!IsStretchEnabled())
904
    return;
905

906
  m_stretch_reset = STRETCH_RESET_THRESHOLD;
907

908
  // Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
909
  // we'll still have quite a large buffer of samples that will be played back at a low tempo,
910
  // resulting in a long delay before the audio starts playing at the new tempo.
911
  soundtouch_clear(m_soundtouch);
912
}
913

914
Product

Resources

Company