CoCalc -- StereoResampler.cpp

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/HW/StereoResampler.cpp
Views: ¹⁴⁰¹
1
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17

18
// Adapted from Dolphin.
19

20
// 16 bit Stereo
21

22
// These must be powers of 2.
23
#define MAX_BUFSIZE_DEFAULT (4096) // 2*64ms - had to double it for nVidia Shield which has huge buffers
24
#define MAX_BUFSIZE_EXTRA   (8192)
25

26
#define TARGET_BUFSIZE_MARGIN 512
27

28
#define TARGET_BUFSIZE_DEFAULT 1680 // 40 ms
29
#define TARGET_BUFSIZE_EXTRA 3360 // 80 ms
30

31
#define MAX_FREQ_SHIFT  600.0f  // how far off can we be from 44100 Hz
32
#define CONTROL_FACTOR  0.2f // in freq_shift per fifo size offset
33
#define CONTROL_AVG     32.0f
34

35
#include "ppsspp_config.h"
36
#include <cstring>
37
#include <atomic>
38

39
#include "Common/Common.h"
40
#include "Common/System/System.h"
41
#include "Common/Math/math_util.h"
42
#include "Common/Serialize/Serializer.h"
43
#include "Common/Log.h"
44
#include "Common/TimeUtil.h"
45
#include "Core/Config.h"
46
#include "Core/ConfigValues.h"
47
#include "Core/HW/StereoResampler.h"
48
#include "Core/HLE/__sceAudio.h"
49
#include "Core/Util/AudioFormat.h"  // for clamp_u8
50
#include "Core/System.h"
51

52
#ifdef _M_SSE
53
#include <emmintrin.h>
54
#endif
55
#if PPSSPP_ARCH(ARM_NEON)
56
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
57
#include <arm64_neon.h>
58
#else
59
#include <arm_neon.h>
60
#endif
61
#endif
62

63
StereoResampler::StereoResampler()
64
		: m_maxBufsize(MAX_BUFSIZE_DEFAULT)
65
	  , m_targetBufsize(TARGET_BUFSIZE_DEFAULT) {
66
	// Need to have space for the worst case in case it changes.
67
	m_buffer = new int16_t[MAX_BUFSIZE_EXTRA * 2]();
68

69
	// Some Android devices are v-synced to non-60Hz framerates. We simply timestretch audio to fit.
70
	// TODO: should only do this if auto frameskip is off?
71
	float refresh = System_GetPropertyFloat(SYSPROP_DISPLAY_REFRESH_RATE);
72

73
	// If framerate is "close"...
74
	if (refresh != 60.0f && refresh > 50.0f && refresh < 70.0f) {
75
		int input_sample_rate = (int)(44100 * (refresh / 60.0f));
76
		INFO_LOG(Log::Audio, "StereoResampler: Adjusting target sample rate to %dHz", input_sample_rate);
77
		m_input_sample_rate = input_sample_rate;
78
	}
79

80
	UpdateBufferSize();
81
}
82

83
StereoResampler::~StereoResampler() {
84
	delete[] m_buffer;
85
	m_buffer = nullptr;
86
}
87

88
void StereoResampler::UpdateBufferSize() {
89
	if (g_Config.bExtraAudioBuffering) {
90
		m_maxBufsize = MAX_BUFSIZE_EXTRA;
91
		m_targetBufsize = TARGET_BUFSIZE_EXTRA;
92
	} else {
93
		m_maxBufsize = MAX_BUFSIZE_DEFAULT;
94
		m_targetBufsize = TARGET_BUFSIZE_DEFAULT;
95

96
		int systemBufsize = System_GetPropertyInt(SYSPROP_AUDIO_FRAMES_PER_BUFFER);
97
		if (systemBufsize > 0 && m_targetBufsize < systemBufsize + TARGET_BUFSIZE_MARGIN) {
98
			m_targetBufsize = std::min(4096, systemBufsize + TARGET_BUFSIZE_MARGIN);
99
			if (m_targetBufsize * 2 > MAX_BUFSIZE_DEFAULT)
100
				m_maxBufsize = MAX_BUFSIZE_EXTRA;
101
		}
102
	}
103
}
104

105
template<bool useShift>
106
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, s8 volShift) {
107
#ifdef _M_SSE
108
	// Size will always be 16-byte aligned as the hwBlockSize is.
109
	while (size >= 8) {
110
		__m128i in1 = _mm_loadu_si128((__m128i *)in);
111
		__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
112
		__m128i packed = _mm_packs_epi32(in1, in2);
113
		if (useShift) {
114
			packed = _mm_srai_epi16(packed, volShift);
115
		}
116
		_mm_storeu_si128((__m128i *)out, packed);
117
		out += 8;
118
		in += 8;
119
		size -= 8;
120
	}
121
#elif PPSSPP_ARCH(ARM_NEON)
122
	// Dynamic shifts can only be left, but it's signed - negate to shift right.
123
	int16x4_t signedVolShift = vdup_n_s16(-volShift);
124
	while (size >= 8) {
125
		int32x4_t in1 = vld1q_s32(in);
126
		int32x4_t in2 = vld1q_s32(in + 4);
127
		int16x4_t packed1 = vqmovn_s32(in1);
128
		int16x4_t packed2 = vqmovn_s32(in2);
129
		if (useShift) {
130
			packed1 = vshl_s16(packed1, signedVolShift);
131
			packed2 = vshl_s16(packed2, signedVolShift);
132
		}
133
		vst1_s16(out, packed1);
134
		vst1_s16(out + 4, packed2);
135
		out += 8;
136
		in += 8;
137
		size -= 8;
138
	}
139
#endif
140
	// This does the remainder if SIMD was used, otherwise it does it all.
141
	for (size_t i = 0; i < size; i++) {
142
		out[i] = clamp_s16(useShift ? (in[i] >> volShift) : in[i]);
143
	}
144
}
145

146
inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size) {
147
	int volume = g_Config.iGlobalVolume;
148
	if (PSP_CoreParameter().fpsLimit != FPSLimit::NORMAL || PSP_CoreParameter().fastForward) {
149
		if (g_Config.iAltSpeedVolume != -1) {
150
			volume = g_Config.iAltSpeedVolume;
151
		}
152
	}
153

154
	if (volume >= VOLUME_FULL) {
155
		ClampBufferToS16<false>(out, in, size, 0);
156
	} else if (volume <= VOLUME_OFF) {
157
		memset(out, 0, size * sizeof(s16));
158
	} else {
159
		ClampBufferToS16<true>(out, in, size, VOLUME_FULL - (s8)volume);
160
	}
161
}
162

163
void StereoResampler::Clear() {
164
	memset(m_buffer, 0, m_maxBufsize * 2 * sizeof(int16_t));
165
}
166

167
inline int16_t MixSingleSample(int16_t s1, int16_t s2, uint16_t frac) {
168
	int32_t value = s1 + (((s2 - s1) * frac) >> 16);
169
	if (value < -32767)
170
		return -32767;
171
	else if (value > 32767)
172
		return 32767;
173
	else
174
		return (int16_t)value;
175
}
176

177
// Executed from sound stream thread, pulling sound out of the buffer.
178
unsigned int StereoResampler::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
179
	if (!samples)
180
		return 0;
181

182
	unsigned int currentSample;
183

184
	// Cache access in non-volatile variable
185
	// This is the only function changing the read value, so it's safe to
186
	// cache it locally although it's written here.
187
	// The writing pointer will be modified outside, but it will only increase,
188
	// so we will just ignore new written data while interpolating (until it wraps...).
189
	// Without this cache, the compiler wouldn't be allowed to optimize the
190
	// interpolation loop.
191
	u32 indexR = m_indexR.load();
192
	u32 indexW = m_indexW.load();
193

194
	const int INDEX_MASK = (m_maxBufsize * 2 - 1);
195

196
	// This is only for debug visualization, not used for anything.
197
	lastBufSize_ = ((indexW - indexR) & INDEX_MASK) / 2;
198

199
	// Drift prevention mechanism.
200
	float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
201
	// If we had to discard samples the last frame due to underrun,
202
	// apply an adjustment here. Otherwise we'll overestimate how many
203
	// samples we need.
204
	numLeft -= droppedSamples_;
205
	droppedSamples_ = 0;
206

207
	// m_numLeftI here becomes a lowpass filtered version of numLeft.
208
	m_numLeftI = (numLeft + m_numLeftI * (CONTROL_AVG - 1.0f)) / CONTROL_AVG;
209

210
	// Here we try to keep the buffer size around m_lowwatermark (which is
211
	// really now more like desired_buffer_size) by adjusting the speed.
212
	// Note that the speed of adjustment here does not take the buffer size into
213
	// account. Since this is called once per "output frame", the frame size
214
	// will affect how fast this algorithm reacts, which can't be a good thing.
215
	float offset = (m_numLeftI - (float)m_targetBufsize) * CONTROL_FACTOR;
216
	if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
217
	if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
218

219
	output_sample_rate_ = (float)(m_input_sample_rate + offset);
220
	const u32 ratio = (u32)(65536.0 * output_sample_rate_ / (double)sample_rate);
221
	ratio_ = ratio;
222
	// TODO: consider a higher-quality resampling algorithm.
223
	// TODO: Add a fast path for 1:1.
224
	u32 frac = m_frac;
225
	for (currentSample = 0; currentSample < numSamples * 2; currentSample += 2) {
226
		if (((indexW - indexR) & INDEX_MASK) <= 2) {
227
			// Ran out!
228
			// int missing = numSamples * 2 - currentSample;
229
			// ILOG("Resampler underrun: %d (numSamples: %d, currentSample: %d)", missing, numSamples, currentSample / 2);
230
			underrunCount_++;
231
			break;
232
		}
233
		u32 indexR2 = indexR + 2; //next sample
234
		s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
235
		s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
236
		s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
237
		s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
238
		samples[currentSample] = MixSingleSample(l1, l2, (u16)frac);
239
		samples[currentSample + 1] = MixSingleSample(r1, r2, (u16)frac);
240
		frac += ratio;
241
		indexR += 2 * (frac >> 16);
242
		frac &= 0xffff;
243
	}
244
	m_frac = frac;
245

246
	// Let's not count the underrun padding here.
247
	outputSampleCount_ += currentSample / 2;
248

249
	// Padding with the last value to reduce clicking
250
	short s[2];
251
	s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]);
252
	s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]);
253
	for (; currentSample < numSamples * 2; currentSample += 2) {
254
		samples[currentSample] = s[0];
255
		samples[currentSample + 1] = s[1];
256
	}
257

258
	// Flush cached variable
259
	m_indexR.store(indexR);
260

261
	// TODO: What should we actually return here?
262
	return currentSample / 2;
263
}
264

265
// Executes on the emulator thread, pushing sound into the buffer.
266
void StereoResampler::PushSamples(const s32 *samples, unsigned int numSamples) {
267
	inputSampleCount_ += numSamples;
268

269
	UpdateBufferSize();
270
	const int INDEX_MASK = (m_maxBufsize * 2 - 1);
271
	// Cache access in non-volatile variable
272
	// indexR isn't allowed to cache in the audio throttling loop as it
273
	// needs to get updates to not deadlock.
274
	u32 indexW = m_indexW.load();
275

276
	u32 cap = m_maxBufsize * 2;
277
	// If fast-forwarding, no need to fill up the entire buffer, just screws up timing after releasing the fast-forward button.
278
	if (PSP_CoreParameter().fastForward) {
279
		cap = m_targetBufsize * 2;
280
	}
281

282
	// Check if we have enough free space
283
	// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
284
	if (numSamples * 2 + ((indexW - m_indexR.load()) & INDEX_MASK) >= cap) {
285
		if (!PSP_CoreParameter().fastForward) {
286
			overrunCount_++;
287
		}
288
		// TODO: "Timestretch" by doing a windowed overlap with existing buffer content?
289
		return;
290
	}
291

292
	// Check if we need to roll over to the start of the buffer during the copy.
293
	unsigned int indexW_left_samples = m_maxBufsize * 2 - (indexW & INDEX_MASK);
294
	if (numSamples * 2 > indexW_left_samples) {
295
		ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, indexW_left_samples);
296
		ClampBufferToS16WithVolume(&m_buffer[0], samples + indexW_left_samples, numSamples * 2 - indexW_left_samples);
297
	} else {
298
		ClampBufferToS16WithVolume(&m_buffer[indexW & INDEX_MASK], samples, numSamples * 2);
299
	}
300

301
	m_indexW += numSamples * 2;
302
	lastPushSize_ = numSamples;
303
}
304

305
void StereoResampler::GetAudioDebugStats(char *buf, size_t bufSize) {
306
	double elapsed = time_now_d() - startTime_;
307

308
	double effective_input_sample_rate = (double)inputSampleCount_ / elapsed;
309
	double effective_output_sample_rate = (double)outputSampleCount_ / elapsed;
310
	snprintf(buf, bufSize,
311
		"Audio buffer: %d/%d (target: %d)\n"
312
		"Filtered: %0.2f\n"
313
		"Underruns: %d\n"
314
		"Overruns: %d\n"
315
		"Sample rate: %d (input: %d)\n"
316
		"Effective input sample rate: %0.2f\n"
317
		"Effective output sample rate: %0.2f\n"
318
		"Push size: %d\n"
319
		"Ratio: %0.6f\n",
320
		lastBufSize_,
321
		m_maxBufsize,
322
		m_targetBufsize,
323
		m_numLeftI,
324
		underrunCountTotal_,
325
		overrunCountTotal_,
326
		(int)output_sample_rate_,
327
		m_input_sample_rate,
328
		effective_input_sample_rate,
329
		effective_output_sample_rate,
330
		lastPushSize_,
331
		(float)ratio_ / 65536.0f);
332
	underrunCountTotal_ += underrunCount_;
333
	overrunCountTotal_ += overrunCount_;
334
	underrunCount_ = 0;
335
	overrunCount_ = 0;
336

337
	// Use this to remove the bias from the startup.
338
	// if (elapsed > 3.0) {
339
		//ResetStatCounters();
340
	// }
341
}
342

343
void StereoResampler::ResetStatCounters() {
344
	underrunCount_ = 0;
345
	overrunCount_ = 0;
346
	underrunCountTotal_ = 0;
347
	overrunCountTotal_ = 0;
348
	inputSampleCount_ = 0;
349
	outputSampleCount_ = 0;
350
	startTime_ = time_now_d();
351
}
352

353
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company