CoCalc -- WASAPIContext.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Windows/WASAPIContext.cpp
⁴⁷⁷⁶ views
1
#include <windows.h>
2
#include <mmdeviceapi.h>
3
#include <functiondiscoverykeys_devpkey.h>
4
#include <audioclient.h>
5
#include <avrt.h>
6
#include <comdef.h>
7
#include <atomic>
8
#include <thread>
9
#include <vector>
10
#include <string_view>
11
#include <wrl/client.h>
12

13
#include "Common/Data/Encoding/Utf8.h"
14
#include "Common/Log.h"
15
#include "Common/Thread/ThreadUtil.h"
16
#include "WASAPIContext.h"
17

18
using Microsoft::WRL::ComPtr;
19

20
// We must have one of these already...
21
static inline s16 ClampFloatToS16(float f) {
22
	f *= 32768.0f;
23
	if (f >= 32767) {
24
		return 32767;
25
	} else if (f < -32767) {
26
		return -32767;
27
	} else {
28
		return (s16)(s32)f;
29
	}
30
}
31

32
void BuildStereoFloatFormat(const WAVEFORMATEXTENSIBLE *original, WAVEFORMATEXTENSIBLE *output) {
33
	// Zero‑init all fields first.
34
	ZeroMemory(output, sizeof(WAVEFORMATEXTENSIBLE));
35

36
	// Fill the WAVEFORMATEX base part.
37
	output->Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
38
	output->Format.nChannels = 2;
39
	output->Format.nSamplesPerSec = original->Format.nSamplesPerSec;
40
	output->Format.wBitsPerSample = 32;                                 // 32‑bit float
41
	output->Format.nBlockAlign = output->Format.nChannels *
42
		output->Format.wBitsPerSample / 8;
43
	output->Format.nAvgBytesPerSec = output->Format.nSamplesPerSec *
44
		output->Format.nBlockAlign;
45
	output->Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
46

47
	// Fill the extensible fields.
48
	output->Samples.wValidBitsPerSample = 32;
49
	output->dwChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
50
	output->SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
51
}
52

53
WASAPIContext::WASAPIContext() : notificationClient_(this) {
54
	HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, IID_PPV_ARGS(&enumerator_));
55
	if (FAILED(hr)) {
56
		// Bad!
57
		enumerator_ = nullptr;
58
		return;
59
	}
60
	enumerator_->RegisterEndpointNotificationCallback(&notificationClient_);
61
}
62

63
WASAPIContext::~WASAPIContext() {
64
	if (!enumerator_) {
65
		// Nothing can have been happening.
66
		return;
67
	}
68
	Stop();
69
	enumerator_->UnregisterEndpointNotificationCallback(&notificationClient_);
70
	delete[] tempBuf_;
71
}
72

73
WASAPIContext::AudioFormat WASAPIContext::Classify(const WAVEFORMATEX *format) {
74
	if (format->wFormatTag == WAVE_FORMAT_PCM && format->wBitsPerSample == 2) {
75
		return AudioFormat::S16;
76
	} else if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
77
		const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format;
78
		if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
79
			return AudioFormat::Float;
80
		}
81
	} else {
82
		WARN_LOG(Log::Audio, "Unhandled output format!");
83
	}
84
	return AudioFormat::Unhandled;
85
}
86

87
void WASAPIContext::EnumerateDevices(std::vector<AudioDeviceDesc> *output, bool captureDevices) {
88
	ComPtr<IMMDeviceCollection> collection;
89
	enumerator_->EnumAudioEndpoints(captureDevices ? eCapture : eRender, DEVICE_STATE_ACTIVE, &collection);
90

91
	if (!collection) {
92
		ERROR_LOG(Log::Audio, "Failed to enumerate devices");
93
		return;
94
	}
95

96
	UINT count = 0;
97
	collection->GetCount(&count);
98

99
	for (UINT i = 0; i < count; ++i) {
100
		ComPtr<IMMDevice> device;
101
		collection->Item(i, &device);
102

103
		ComPtr<IPropertyStore> props;
104
		device->OpenPropertyStore(STGM_READ, &props);
105

106
		PROPVARIANT nameProp;
107
		PropVariantInit(&nameProp);
108
		props->GetValue(PKEY_Device_FriendlyName, &nameProp);
109

110
		LPWSTR id_str = 0;
111
		if (SUCCEEDED(device->GetId(&id_str))) {
112
			AudioDeviceDesc desc;
113
			desc.name = ConvertWStringToUTF8(nameProp.pwszVal);
114
			desc.uniqueId = ConvertWStringToUTF8(id_str);
115
			output->push_back(desc);
116
			CoTaskMemFree(id_str);
117
		}
118

119
		PropVariantClear(&nameProp);
120
	}
121
}
122

123
bool WASAPIContext::InitOutputDevice(std::string_view uniqueId, LatencyMode latencyMode, bool *revertedToDefault) {
124
	Stop();
125

126
	*revertedToDefault = false;
127

128
	ComPtr<IMMDevice> device;
129
	if (uniqueId.empty()) {
130
		// Use the default device.
131
		if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
132
			return false;
133
		}
134
	} else {
135
		// Use whatever device.
136
		std::wstring wId = ConvertUTF8ToWString(uniqueId);
137
		if (FAILED(enumerator_->GetDevice(wId.c_str(), &device))) {
138
			// Fallback to default device
139
			INFO_LOG(Log::Audio, "Falling back to default device...\n");
140
			*revertedToDefault = true;
141
			if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
142
				return false;
143
			}
144
		}
145
	}
146

147
	deviceId_ = uniqueId;
148

149
	HRESULT hr = E_FAIL;
150
	// Try IAudioClient3 first if not in "safe" mode. It's probably safe anyway, but still, let's use the legacy client as a safe fallback option.
151
	if (false && latencyMode != LatencyMode::Safe) {
152
		hr = device->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, nullptr, (void**)&audioClient3_);
153
	}
154

155
	// Get rid of any old tempBuf_.
156
	delete[] tempBuf_;
157
	tempBuf_ = nullptr;
158

159
	if (SUCCEEDED(hr)) {
160
		audioClient3_->GetMixFormat(&format_);
161
		// We only use AudioClient3 if we got the format we wanted (stereo float).
162
		if (format_->nChannels != 2 || Classify(format_) != AudioFormat::Float) {
163
			// Let's fall back to the old path. The docs seem to be wrong, if you try to create an
164
			// AudioClient3 with low latency audio with AUTOCONVERTPCM, you get the error 0x88890021.
165
			audioClient3_.Reset();
166
			// Fall through to AudioClient creation below.
167
		} else {
168
			audioClient3_->GetSharedModeEnginePeriod(format_, &defaultPeriodFrames, &fundamentalPeriodFrames, &minPeriodFrames, &maxPeriodFrames);
169

170
			INFO_LOG(Log::Audio, "default: %d fundamental: %d min: %d max: %d\n", (int)defaultPeriodFrames, (int)fundamentalPeriodFrames, (int)minPeriodFrames, (int)maxPeriodFrames);
171
			INFO_LOG(Log::Audio, "initializing with %d frame period at %d Hz, meaning %0.1fms\n", (int)minPeriodFrames, (int)format_->nSamplesPerSec, FramesToMs(minPeriodFrames, format_->nSamplesPerSec));
172

173
			audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
174
			HRESULT result = audioClient3_->InitializeSharedAudioStream(
175
				AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
176
				minPeriodFrames,
177
				format_,
178
				nullptr
179
			);
180
			if (FAILED(result)) {
181
				WARN_LOG(Log::Audio, "Error initializing AudioClient3 shared audio stream: %08lx", result);
182
				audioClient3_.Reset();
183
				return false;
184
			}
185
			actualPeriodFrames_ = minPeriodFrames;
186

187
			audioClient3_->GetBufferSize(&reportedBufferSize_);
188
			audioClient3_->SetEventHandle(audioEvent_);
189
			audioClient3_->GetService(IID_PPV_ARGS(&renderClient_));
190
		}
191
	}
192

193
	if (!audioClient3_) {
194
		// Fallback to IAudioClient (older OS)
195
		device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audioClient_);
196

197
		audioClient_->GetMixFormat(&format_);
198

199
		// If there are too many channels, try asking for a 2-channel output format.
200
		DWORD extraStreamFlags = 0;
201
		const AudioFormat fmt = Classify(format_);
202

203
		bool createBuffer = false;
204
		if (fmt == AudioFormat::Float) {
205
			if (format_->nChannels != 2) {
206
				INFO_LOG(Log::Audio, "Got %d channels, asking for stereo instead", format_->nChannels);
207
				WAVEFORMATEXTENSIBLE stereo;
208
				BuildStereoFloatFormat((const WAVEFORMATEXTENSIBLE *)format_, &stereo);
209

210
				WAVEFORMATEX *closestMatch = nullptr;
211
				const HRESULT result = audioClient_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, (const WAVEFORMATEX *)&stereo, &closestMatch);
212
				if (result == S_OK) {
213
					// We got the format! Use it and set as current.
214
					_dbg_assert_(!closestMatch);
215
					format_ = (WAVEFORMATEX *)CoTaskMemAlloc(sizeof(WAVEFORMATEXTENSIBLE));
216
					memcpy(format_, &stereo, sizeof(WAVEFORMATEX) + stereo.Format.cbSize);
217
					extraStreamFlags = AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY;
218
					INFO_LOG(Log::Audio, "Successfully asked for two channels");
219
				} else if (result == S_FALSE) {
220
					// We got another format. Meh, let's just use what we got.
221
					if (closestMatch) {
222
						WARN_LOG(Log::Audio, "Didn't get the format we wanted, but got: %lu ch=%d", closestMatch->nSamplesPerSec, closestMatch->nChannels);
223
						CoTaskMemFree(closestMatch);
224
					} else {
225
						WARN_LOG(Log::Audio, "Failed to fall back to two channels. Using workarounds.");
226
					}
227
					createBuffer = true;
228
				} else {
229
					WARN_LOG(Log::Audio, "Got other error %08lx", result);
230
					_dbg_assert_(!closestMatch);
231
				}
232
			} else {
233
				// All good, nothing to convert.
234
			}
235
		} else {
236
			// Some other format.
237
			WARN_LOG(Log::Audio, "Format not float, applying conversion.");
238
			createBuffer = true;
239
		}
240

241
		// Get engine period info
242
		REFERENCE_TIME defaultPeriod = 0, minPeriod = 0;
243
		audioClient_->GetDevicePeriod(&defaultPeriod, &minPeriod);
244

245
		audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
246

247
		const REFERENCE_TIME duration = minPeriod;
248
		HRESULT hr = audioClient_->Initialize(
249
			AUDCLNT_SHAREMODE_SHARED,
250
			AUDCLNT_STREAMFLAGS_EVENTCALLBACK | extraStreamFlags,
251
			duration,  // This is a minimum, the result might be larger. We use GetBufferSize to check.
252
			0,  // ref duration, always 0 in shared mode.
253
			format_,
254
			nullptr
255
		);
256

257
		if (FAILED(hr)) {
258
			WARN_LOG(Log::Audio, "ERROR: Failed to initialize audio with all attempted buffer sizes\n");
259
			audioClient_.Reset();
260
			return false;
261
		}
262

263
		audioClient_->GetBufferSize(&reportedBufferSize_);
264
		actualPeriodFrames_ = reportedBufferSize_;  // we don't have a better estimate.
265
		audioClient_->SetEventHandle(audioEvent_);
266
		audioClient_->GetService(IID_PPV_ARGS(&renderClient_));
267

268
		if (createBuffer) {
269
			tempBuf_ = new float[reportedBufferSize_ * 2];
270
		}
271
	}
272

273
	latencyMode_ = latencyMode;
274

275
	Start();
276

277
	return true;
278
}
279

280
void WASAPIContext::Start() {
281
	running_ = true;
282
	audioThread_ = std::thread([this]() { AudioLoop(); });
283
}
284

285
void WASAPIContext::Stop() {
286
	running_ = false;
287
	if (audioClient_) audioClient_->Stop();
288
	if (audioEvent_) SetEvent(audioEvent_);
289
	if (audioThread_.joinable()) audioThread_.join();
290

291
	renderClient_.Reset();
292
	audioClient_.Reset();
293
	if (audioEvent_) {
294
		CloseHandle(audioEvent_);
295
		audioEvent_ = nullptr;
296
	}
297
	if (format_) {
298
		CoTaskMemFree(format_);
299
		format_ = nullptr;
300
	}
301
}
302

303
void WASAPIContext::FrameUpdate(bool allowAutoChange) {
304
	if (deviceId_.empty() && defaultDeviceChanged_ && allowAutoChange) {
305
		defaultDeviceChanged_ = false;
306
		Stop();
307
		Start();
308
	}
309
}
310

311
void WASAPIContext::AudioLoop() {
312
	SetCurrentThreadName("WASAPIAudioLoop");
313

314
	DWORD taskID = 0;
315
	HANDLE mmcssHandle = nullptr;
316
	if (latencyMode_ == LatencyMode::Aggressive) {
317
		mmcssHandle = AvSetMmThreadCharacteristics(L"Pro Audio", &taskID);
318
	}
319

320
	UINT32 available;
321
	if (audioClient3_) {
322
		audioClient3_->Start();
323
		audioClient3_->GetBufferSize(&available);
324
	} else {
325
		audioClient_->Start();
326
		audioClient_->GetBufferSize(&available);
327
	}
328

329
	const AudioFormat format = Classify(format_);
330
	const int nChannels = format_->nChannels;
331

332
	while (running_) {
333
		const DWORD waitResult = WaitForSingleObject(audioEvent_, INFINITE);
334
		if (waitResult != WAIT_OBJECT_0) {
335
			// Something bad happened.
336
			break;
337
		}
338

339
		UINT32 padding = 0;
340
		if (audioClient3_) {
341
			audioClient3_->GetCurrentPadding(&padding);
342
		} else {
343
			audioClient_->GetCurrentPadding(&padding);
344
		}
345

346
		const UINT32 framesToWrite = available - padding;
347
		BYTE* buffer = nullptr;
348
		if (framesToWrite > 0 && SUCCEEDED(renderClient_->GetBuffer(framesToWrite, &buffer))) {
349
			if (!tempBuf_) {
350
				// Mix directly to the output buffer, avoiding a copy.
351
				if (buffer) {
352
					callback_(reinterpret_cast<float *>(buffer), framesToWrite, format_->nSamplesPerSec, userdata_);
353
				}
354
			} else {
355
				// We decided previously that we need conversion, so mix to our temp buffer...
356
				callback_(tempBuf_, framesToWrite, format_->nSamplesPerSec, userdata_);
357
				// .. and convert according to format (we support multi-channel float and s16)
358
				if (format == AudioFormat::S16 && buffer) {
359
					// Need to convert.
360
					s16 *dest = reinterpret_cast<s16 *>(buffer);
361
					for (UINT32 i = 0; i < framesToWrite; i++) {
362
						if (nChannels == 1) {
363
							// Maybe some bluetooth speakers? Mixdown.
364
							float sum = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
365
							dest[i] = ClampFloatToS16(sum);
366
						} else {
367
							dest[i * nChannels] = ClampFloatToS16(tempBuf_[i * 2]);
368
							dest[i * nChannels + 1] = ClampFloatToS16(tempBuf_[i * 2 + 1]);
369
							// Zero other channels.
370
							for (int j = 2; j < nChannels; j++) {
371
								dest[i * nChannels + j] = 0;
372
							}
373
						}
374
					}
375
				} else if (format == AudioFormat::Float && buffer) {
376
					// We have a non-2 number of channels (since we're in the tempBuf_ 'if'), so we contract/expand.
377
					float *dest = reinterpret_cast<float *>(buffer);
378
					for (UINT32 i = 0; i < framesToWrite; i++) {
379
						if (nChannels == 1) {
380
							// Maybe some bluetooth speakers? Mixdown.
381
							dest[i] = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
382
						} else {
383
							dest[i * nChannels] = tempBuf_[i * 2];
384
							dest[i * nChannels + 1] = tempBuf_[i * 2 + 1];
385
							// Zero other channels.
386
							for (int j = 2; j < nChannels; j++) {
387
								dest[i * nChannels + j] = 0;
388
							}
389
						}
390
					}
391
				}
392
			}
393

394
			renderClient_->ReleaseBuffer(framesToWrite, 0);
395
		}
396

397
		// In the old mode, we just estimate the "actualPeriodFrames_" from the framesToWrite.
398
		if (audioClient_ && framesToWrite < actualPeriodFrames_) {
399
			actualPeriodFrames_ = framesToWrite;
400
		}
401
	}
402

403
	if (audioClient3_) {
404
		audioClient3_->Stop();
405
	} else {
406
		audioClient_->Stop();
407
	}
408

409
	if (mmcssHandle) {
410
		AvRevertMmThreadCharacteristics(mmcssHandle);
411
	}
412
}
413

414
void WASAPIContext::DescribeOutputFormat(char *buffer, size_t bufferSize) const {
415
	const int numChannels = format_->nChannels;
416
	const int sampleBits = format_->wBitsPerSample;
417
	const int sampleRateHz = format_->nSamplesPerSec;
418
	const char *fmt = "N/A";
419
	if (format_->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
420
		const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format_;
421
		if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
422
			fmt = "float";
423
		} else {
424
			fmt = "PCM";
425
		}
426
	} else {
427
		fmt = "PCM";  // probably
428
	}
429
	snprintf(buffer, bufferSize, "%d Hz %s %d-bit, %d ch%s", sampleRateHz, fmt, sampleBits, numChannels, audioClient3_ ? " (ac3)" : " (ac)");
430
}
431

432
Product

Resources

Company