Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/d3d12_device.cpp
4223 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "d3d12_device.h"
5
#include "d3d12_builders.h"
6
#include "d3d12_pipeline.h"
7
#include "d3d12_stream_buffer.h"
8
#include "d3d12_texture.h"
9
#include "d3d_common.h"
10
11
#include "common/align.h"
12
#include "common/assert.h"
13
#include "common/bitutils.h"
14
#include "common/error.h"
15
#include "common/file_system.h"
16
#include "common/log.h"
17
#include "common/path.h"
18
#include "common/scoped_guard.h"
19
#include "common/small_string.h"
20
#include "common/string_util.h"
21
22
#include "D3D12MemAlloc.h"
23
#include "fmt/format.h"
24
25
#include <limits>
26
#include <mutex>
27
28
LOG_CHANNEL(GPUDevice);
29
30
// Tweakables
31
enum : u32
32
{
33
MAX_DRAW_CALLS_PER_FRAME = 2048,
34
MAX_DESCRIPTORS_PER_FRAME = 32768,
35
MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE,
36
MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
37
38
MAX_PERSISTENT_DESCRIPTORS = 2048,
39
MAX_PERSISTENT_RTVS = 512,
40
MAX_PERSISTENT_DSVS = 128,
41
MAX_PERSISTENT_SAMPLERS = 512,
42
43
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
44
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
45
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
46
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
47
TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
48
49
UNIFORM_PUSH_CONSTANTS_SIZE = 128,
50
51
MAX_UNIFORM_BUFFER_SIZE = 1024,
52
};
53
54
// We need to synchronize instance creation because of adapter enumeration from the UI thread.
55
static std::mutex s_instance_mutex;
56
57
static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8;
58
59
// We just need to keep this alive, never reference it.
60
static DynamicHeapArray<u8> s_pipeline_cache_data;
61
62
#ifdef ENABLE_GPU_OBJECT_NAMES
63
#include "WinPixEventRuntime/pix3.h"
64
static u32 s_debug_scope_depth = 0;
65
#endif
66
67
static constexpr const u32 s_mipmap_blit_vs[] = {
68
0x43425844, 0xe0f571cf, 0x51234ef3, 0x3a6beab4, 0x141cd2ef, 0x00000001, 0x000003ac, 0x00000005, 0x00000034,
69
0x00000144, 0x00000178, 0x000001d0, 0x00000310, 0x46454452, 0x00000108, 0x00000001, 0x00000068, 0x00000001,
70
0x0000003c, 0xfffe0500, 0x00008100, 0x000000e0, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028,
71
0x00000024, 0x0000000c, 0x00000000, 0x0000005c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
72
0x00000001, 0x00000001, 0x424f4255, 0x6b636f6c, 0xababab00, 0x0000005c, 0x00000001, 0x00000080, 0x00000010,
73
0x00000000, 0x00000000, 0x000000a8, 0x00000000, 0x00000010, 0x00000002, 0x000000bc, 0x00000000, 0xffffffff,
74
0x00000000, 0xffffffff, 0x00000000, 0x72735f75, 0x65725f63, 0x66007463, 0x74616f6c, 0xabab0034, 0x00030001,
75
0x00040001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000b3, 0x7263694d,
76
0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30,
77
0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000006, 0x00000001, 0x00000000,
78
0x00000101, 0x565f5653, 0x65747265, 0x00444978, 0x4e47534f, 0x00000050, 0x00000002, 0x00000008, 0x00000038,
79
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000c03, 0x00000041, 0x00000000, 0x00000001, 0x00000003,
80
0x00000001, 0x0000000f, 0x43584554, 0x44524f4f, 0x5f565300, 0x69736f50, 0x6e6f6974, 0xababab00, 0x58454853,
81
0x00000138, 0x00010050, 0x0000004e, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04000060,
82
0x00101012, 0x00000000, 0x00000006, 0x03000065, 0x00102032, 0x00000000, 0x04000067, 0x001020f2, 0x00000001,
83
0x00000001, 0x02000068, 0x00000001, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000001, 0x00004001,
84
0x00000001, 0x0010100a, 0x00000000, 0x00004001, 0x00000000, 0x07000001, 0x00100042, 0x00000000, 0x0010100a,
85
0x00000000, 0x00004001, 0x00000002, 0x05000056, 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x0b000032,
86
0x00102032, 0x00000000, 0x00100046, 0x00000000, 0x00208ae6, 0x00000000, 0x00000000, 0x00208046, 0x00000000,
87
0x00000000, 0x0f000032, 0x00102032, 0x00000001, 0x00100046, 0x00000000, 0x00004002, 0x40000000, 0xc0000000,
88
0x00000000, 0x00000000, 0x00004002, 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, 0x08000036, 0x001020c2,
89
0x00000001, 0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x3f800000, 0x0100003e, 0x54415453, 0x00000094,
90
0x00000007, 0x00000001, 0x00000000, 0x00000003, 0x00000002, 0x00000000, 0x00000001, 0x00000001, 0x00000000,
91
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
92
0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
93
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
94
0x00000000};
95
96
static constexpr const u32 s_mipmap_blit_ps[] = {
97
0x43425844, 0x25500f77, 0x71f24271, 0x5f83f8b8, 0x3f405943, 0x00000001, 0x0000026c, 0x00000005, 0x00000034,
98
0x000000f0, 0x00000124, 0x00000158, 0x000001d0, 0x46454452, 0x000000b4, 0x00000000, 0x00000000, 0x00000002,
99
0x0000003c, 0xffff0500, 0x00008100, 0x0000008b, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028,
100
0x00000024, 0x0000000c, 0x00000000, 0x0000007c, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
101
0x00000001, 0x00000001, 0x00000085, 0x00000002, 0x00000005, 0x00000004, 0xffffffff, 0x00000000, 0x00000001,
102
0x0000000d, 0x706d6173, 0x73735f30, 0x6d617300, 0x4d003070, 0x6f726369, 0x74666f73, 0x29522820, 0x534c4820,
103
0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x30312072, 0xab00312e, 0x4e475349, 0x0000002c, 0x00000001,
104
0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000303, 0x43584554, 0x44524f4f,
105
0xababab00, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
106
0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000070, 0x00000050, 0x0000001c,
107
0x0100086a, 0x0300005a, 0x00106000, 0x00000000, 0x04001858, 0x00107000, 0x00000000, 0x00005555, 0x03001062,
108
0x00101032, 0x00000000, 0x03000065, 0x001020f2, 0x00000000, 0x8b000045, 0x800000c2, 0x00155543, 0x001020f2,
109
0x00000000, 0x00101046, 0x00000000, 0x00107e46, 0x00000000, 0x00106000, 0x00000000, 0x0100003e, 0x54415453,
110
0x00000094, 0x00000002, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000001,
111
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000,
112
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
113
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
114
0x00000000, 0x00000000};
115
116
D3D12Device::D3D12Device()
117
{
118
m_render_api = RenderAPI::D3D12;
119
m_features.exclusive_fullscreen = true; // set so the caller can pass a mode to CreateDeviceAndSwapChain()
120
121
#ifdef ENABLE_GPU_OBJECT_NAMES
122
s_debug_scope_depth = 0;
123
#endif
124
}
125
126
D3D12Device::~D3D12Device()
127
{
128
Assert(!m_device);
129
Assert(s_pipeline_cache_data.empty());
130
}
131
132
D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
133
Error* error)
134
{
135
ComPtr<ID3DBlob> blob = D3DCommon::SerializeRootSignature(desc, error);
136
if (!blob)
137
return {};
138
139
ComPtr<ID3D12RootSignature> rs;
140
const HRESULT hr =
141
m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf()));
142
if (FAILED(hr)) [[unlikely]]
143
{
144
Error::SetHResult(error, "CreateRootSignature() failed: ", hr);
145
return {};
146
}
147
148
return rs;
149
}
150
151
bool D3D12Device::CreateDeviceAndMainSwapChain(std::string_view adapter, CreateFlags create_flags,
152
const WindowInfo& wi, GPUVSyncMode vsync_mode,
153
bool allow_present_throttle,
154
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
155
std::optional<bool> exclusive_fullscreen_control, Error* error)
156
{
157
std::unique_lock lock(s_instance_mutex);
158
159
m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device, error);
160
if (!m_dxgi_factory)
161
return false;
162
163
m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
164
165
HRESULT hr = S_OK;
166
167
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
168
if (m_debug_device)
169
{
170
ComPtr<ID3D12Debug> debug12;
171
if (D3DCommon::GetD3D12DebugInterface(&debug12, nullptr))
172
{
173
INFO_LOG("Enabling debug layer.");
174
debug12->EnableDebugLayer();
175
176
if (HasCreateFlag(create_flags, GPUDevice::CreateFlags::EnableGPUValidation))
177
{
178
ComPtr<ID3D12Debug1> debug12_1;
179
if (SUCCEEDED(debug12.As(&debug12_1)))
180
{
181
INFO_LOG("Enabling GPU-based validation.");
182
debug12_1->SetEnableGPUBasedValidation(true);
183
}
184
else
185
{
186
ERROR_LOG("GPU-based validation requested but not available.");
187
}
188
}
189
}
190
else
191
{
192
ERROR_LOG("Debug layer requested but not available.");
193
m_debug_device = false;
194
}
195
}
196
197
// Create the actual device.
198
D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_1_0_CORE;
199
for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_0})
200
{
201
if (D3DCommon::CreateD3D12Device(m_adapter.Get(), try_feature_level, &m_device, error))
202
{
203
feature_level = try_feature_level;
204
break;
205
}
206
}
207
if (!m_device)
208
return false;
209
210
if (!m_adapter)
211
{
212
const LUID luid(m_device->GetAdapterLuid());
213
if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf()))))
214
ERROR_LOG("Failed to get lookup adapter by device LUID");
215
}
216
217
if (m_debug_device)
218
{
219
ComPtr<ID3D12InfoQueue> info_queue;
220
if (SUCCEEDED(m_device.As(&info_queue)))
221
{
222
if (IsDebuggerPresent())
223
{
224
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
225
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
226
}
227
228
D3D12_INFO_QUEUE_FILTER filter = {};
229
std::array<D3D12_MESSAGE_ID, 6> id_list{
230
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
231
D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,
232
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET,
233
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH,
234
D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE,
235
D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND,
236
};
237
filter.DenyList.NumIDs = static_cast<UINT>(id_list.size());
238
filter.DenyList.pIDList = id_list.data();
239
info_queue->PushStorageFilter(&filter);
240
}
241
}
242
243
GPUDriverType driver_type = GPUDriverType::Unknown;
244
if (std::string adapter_name = D3DCommon::GetAdapterName(m_adapter.Get(), &driver_type); adapter_name.empty())
245
INFO_LOG("D3D Adapter: {}", adapter_name);
246
SetDriverType(driver_type);
247
248
const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
249
D3D12_COMMAND_QUEUE_FLAG_NONE, 0u};
250
hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue));
251
if (FAILED(hr))
252
{
253
Error::SetHResult(error, "Failed to create command queue: ", hr);
254
return false;
255
}
256
257
D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
258
allocatorDesc.pDevice = m_device.Get();
259
allocatorDesc.pAdapter = m_adapter.Get();
260
allocatorDesc.Flags =
261
D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED |
262
D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/;
263
264
hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf());
265
if (FAILED(hr))
266
{
267
Error::SetHResult(error, "D3D12MA::CreateAllocator() failed: ", hr);
268
return false;
269
}
270
271
hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence));
272
if (FAILED(hr))
273
{
274
Error::SetHResult(error, "Failed to create fence: ", hr);
275
return false;
276
}
277
278
m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
279
if (m_fence_event == NULL)
280
{
281
Error::SetWin32(error, "Failed to create fence event: ", GetLastError());
282
return false;
283
}
284
285
SetFeatures(feature_level, create_flags);
286
287
if (!CreateCommandLists(error) || !CreateDescriptorHeaps(error))
288
return false;
289
290
if (!wi.IsSurfaceless())
291
{
292
m_main_swap_chain = CreateSwapChain(wi, vsync_mode, allow_present_throttle, exclusive_fullscreen_mode,
293
exclusive_fullscreen_control, error);
294
if (!m_main_swap_chain)
295
return false;
296
}
297
298
if (!CreateRootSignatures(error) || !CreateBuffers(error))
299
return false;
300
301
CreateTimestampQuery();
302
return true;
303
}
304
305
void D3D12Device::DestroyDevice()
306
{
307
std::unique_lock lock(s_instance_mutex);
308
309
// Toss command list if we're recording...
310
if (InRenderPass())
311
EndRenderPass();
312
313
WaitForAllFences();
314
315
m_main_swap_chain.reset();
316
317
DestroyDeferredObjects(m_current_fence_value);
318
DestroyTimestampQuery();
319
DestroyBuffers();
320
DestroyDescriptorHeaps();
321
DestroyRootSignatures();
322
DestroyCommandLists();
323
324
m_pipeline_library.Reset();
325
s_pipeline_cache_data.deallocate();
326
m_fence.Reset();
327
if (m_fence_event != NULL)
328
{
329
CloseHandle(m_fence_event);
330
m_fence_event = NULL;
331
}
332
333
m_allocator.Reset();
334
m_command_queue.Reset();
335
m_device.Reset();
336
m_adapter.Reset();
337
m_dxgi_factory.Reset();
338
}
339
340
void D3D12Device::GetPipelineCacheHeader(PIPELINE_CACHE_HEADER* hdr)
341
{
342
const LUID adapter_luid = m_device->GetAdapterLuid();
343
std::memcpy(&hdr->adapter_luid, &adapter_luid, sizeof(hdr->adapter_luid));
344
hdr->render_api_version = m_render_api_version;
345
hdr->unused = 0;
346
}
347
348
bool D3D12Device::ReadPipelineCache(DynamicHeapArray<u8> data, Error* error)
349
{
350
PIPELINE_CACHE_HEADER expected_header;
351
GetPipelineCacheHeader(&expected_header);
352
if ((data.size() < sizeof(PIPELINE_CACHE_HEADER) ||
353
std::memcmp(data.data(), &expected_header, sizeof(PIPELINE_CACHE_HEADER)) != 0))
354
{
355
Error::SetStringView(error, "Pipeline cache header does not match current device.");
356
return false;
357
}
358
359
const HRESULT hr =
360
m_device->CreatePipelineLibrary(&data[sizeof(PIPELINE_CACHE_HEADER)], data.size() - sizeof(PIPELINE_CACHE_HEADER),
361
IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
362
if (FAILED(hr))
363
{
364
Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr);
365
return false;
366
}
367
368
// Have to keep the buffer around, DX doesn't take a copy.
369
s_pipeline_cache_data = std::move(data);
370
return true;
371
}
372
373
bool D3D12Device::CreatePipelineCache(const std::string& path, Error* error)
374
{
375
const HRESULT hr =
376
m_device->CreatePipelineLibrary(nullptr, 0, IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
377
if (FAILED(hr))
378
{
379
Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr);
380
return false;
381
}
382
383
return true;
384
}
385
386
bool D3D12Device::GetPipelineCacheData(DynamicHeapArray<u8>* data, Error* error)
387
{
388
if (!m_pipeline_library)
389
return false;
390
391
const size_t size = m_pipeline_library->GetSerializedSize();
392
if (size == 0)
393
{
394
WARNING_LOG("Empty serialized pipeline state returned.");
395
return true;
396
}
397
398
PIPELINE_CACHE_HEADER header;
399
GetPipelineCacheHeader(&header);
400
401
data->resize(sizeof(PIPELINE_CACHE_HEADER) + size);
402
std::memcpy(data->data(), &header, sizeof(PIPELINE_CACHE_HEADER));
403
404
const HRESULT hr = m_pipeline_library->Serialize(data->data() + sizeof(PIPELINE_CACHE_HEADER), size);
405
if (FAILED(hr))
406
{
407
Error::SetHResult(error, "Serialize() failed: ", hr);
408
data->deallocate();
409
return false;
410
}
411
412
return true;
413
}
414
415
bool D3D12Device::CreateCommandLists(Error* error)
416
{
417
for (u32 i = 0; i < NUM_COMMAND_LISTS; i++)
418
{
419
CommandList& res = m_command_lists[i];
420
HRESULT hr;
421
422
for (u32 j = 0; j < 2; j++)
423
{
424
hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
425
IID_PPV_ARGS(res.command_allocators[j].GetAddressOf()));
426
if (FAILED(hr))
427
{
428
Error::SetHResult(error, "CreateCommandAllocator() failed: ", hr);
429
return false;
430
}
431
432
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr,
433
IID_PPV_ARGS(res.command_lists[j].GetAddressOf()));
434
if (FAILED(hr))
435
{
436
Error::SetHResult(error, "CreateCommandList() failed: ", hr);
437
return false;
438
}
439
440
// Close the command lists, since the first thing we do is reset them.
441
hr = res.command_lists[j]->Close();
442
if (FAILED(hr))
443
{
444
Error::SetHResult(error, "Close() for new command list failed: ", hr);
445
return false;
446
}
447
}
448
449
if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
450
MAX_DESCRIPTORS_PER_FRAME, error))
451
{
452
Error::AddPrefix(error, "Failed to create per frame descriptor allocator: ");
453
return false;
454
}
455
456
if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME, error))
457
{
458
Error::AddPrefix(error, "Failed to create per frame sampler allocator: ");
459
return false;
460
}
461
}
462
463
MoveToNextCommandList();
464
return true;
465
}
466
467
void D3D12Device::MoveToNextCommandList()
468
{
469
m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
470
m_current_fence_value++;
471
472
// We may have to wait if this command list hasn't finished on the GPU.
473
CommandList& res = m_command_lists[m_current_command_list];
474
WaitForFence(res.fence_counter);
475
res.fence_counter = m_current_fence_value;
476
res.init_list_used = false;
477
478
// Begin command list.
479
res.command_allocators[1]->Reset();
480
res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr);
481
res.descriptor_allocator.Reset();
482
if (res.sampler_allocator.ShouldReset())
483
res.sampler_allocator.Reset();
484
485
if (res.has_timestamp_query)
486
{
487
// readback timestamp from the last time this cmdlist was used.
488
// we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist.
489
const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
490
const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)};
491
void* map;
492
HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map);
493
if (SUCCEEDED(hr))
494
{
495
u64 timestamps[2];
496
std::memcpy(timestamps, static_cast<const u8*>(map) + offset, sizeof(timestamps));
497
m_accumulated_gpu_time +=
498
static_cast<float>(static_cast<double>(timestamps[1] - timestamps[0]) / m_timestamp_frequency);
499
500
const D3D12_RANGE write_range = {};
501
m_timestamp_query_buffer->Unmap(0, &write_range);
502
}
503
else
504
{
505
WARNING_LOG("Map() for timestamp query failed: {:08X}", static_cast<unsigned>(hr));
506
}
507
}
508
509
res.has_timestamp_query = m_gpu_timing_enabled;
510
if (m_gpu_timing_enabled)
511
{
512
res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
513
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST);
514
}
515
516
ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(),
517
res.sampler_allocator.GetDescriptorHeap()};
518
res.command_lists[1]->SetDescriptorHeaps(static_cast<UINT>(std::size(heaps)), heaps);
519
520
m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value));
521
InvalidateCachedState();
522
}
523
524
void D3D12Device::DestroyCommandLists()
525
{
526
for (CommandList& resources : m_command_lists)
527
{
528
resources.descriptor_allocator.Destroy();
529
resources.sampler_allocator.Destroy();
530
for (u32 i = 0; i < 2; i++)
531
{
532
resources.command_lists[i].Reset();
533
resources.command_allocators[i].Reset();
534
}
535
}
536
}
537
538
bool D3D12Device::CreateDescriptorHeaps(Error* error)
539
{
540
if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
541
MAX_PERSISTENT_DESCRIPTORS, false, error) ||
542
!m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false, error) ||
543
!m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false, error) ||
544
!m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, false,
545
error))
546
{
547
return false;
548
}
549
550
// Allocate null SRV descriptor for unbound textures.
551
static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {
552
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}};
553
if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor))
554
{
555
Error::SetStringView(error, "Failed to allocate null SRV descriptor");
556
return false;
557
}
558
m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle);
559
560
// Same for UAVs.
561
static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = {
562
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}};
563
if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor))
564
{
565
Error::SetStringView(error, "Failed to allocate null UAV descriptor");
566
return false;
567
}
568
m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle);
569
570
// Same for samplers.
571
GPUSampler* default_sampler = GetSampler(GPUSampler::GetNearestConfig(), error);
572
if (!default_sampler) [[unlikely]]
573
return false;
574
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
575
m_current_samplers[i] = static_cast<D3D12Sampler*>(default_sampler)->GetDescriptor();
576
return true;
577
}
578
579
void D3D12Device::DestroyDescriptorHeaps()
580
{
581
if (m_null_uav_descriptor)
582
m_descriptor_heap_manager.Free(&m_null_uav_descriptor);
583
if (m_null_srv_descriptor)
584
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
585
m_sampler_heap_manager.Destroy();
586
m_dsv_heap_manager.Destroy();
587
m_rtv_heap_manager.Destroy();
588
m_descriptor_heap_manager.Destroy();
589
}
590
591
ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList()
592
{
593
CommandList& res = m_command_lists[m_current_command_list];
594
if (!res.init_list_used)
595
{
596
HRESULT hr = res.command_allocators[0]->Reset();
597
AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed");
598
599
hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr);
600
AssertMsg(SUCCEEDED(hr), "Reset init command list failed");
601
res.init_list_used = true;
602
}
603
604
return res.command_lists[0].Get();
605
}
606
607
void D3D12Device::SubmitCommandList(bool wait_for_completion)
608
{
609
DebugAssert(!InRenderPass());
610
if (m_device_was_lost) [[unlikely]]
611
return;
612
613
CommandList& res = m_command_lists[m_current_command_list];
614
HRESULT hr;
615
616
if (res.has_timestamp_query)
617
{
618
// write the timestamp back at the end of the cmdlist
619
res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
620
(m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1);
621
res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
622
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST,
623
NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(),
624
m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
625
}
626
627
// TODO: error handling
628
if (res.init_list_used)
629
{
630
hr = res.command_lists[0]->Close();
631
if (FAILED(hr)) [[unlikely]]
632
{
633
ERROR_LOG("Closing init command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
634
m_device_was_lost = true;
635
return;
636
}
637
}
638
639
// Close and queue command list.
640
hr = res.command_lists[1]->Close();
641
if (FAILED(hr)) [[unlikely]]
642
{
643
ERROR_LOG("Closing main command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
644
m_device_was_lost = true;
645
return;
646
}
647
648
if (res.init_list_used)
649
{
650
const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()};
651
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
652
}
653
else
654
{
655
const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].Get()};
656
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
657
}
658
659
// Update fence when GPU has completed.
660
hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter);
661
if (FAILED(hr))
662
{
663
ERROR_LOG("Signal command queue fence failed with HRESULT {:08X}", static_cast<unsigned>(hr));
664
m_device_was_lost = true;
665
return;
666
}
667
668
MoveToNextCommandList();
669
670
if (wait_for_completion)
671
WaitForFence(res.fence_counter);
672
}
673
674
void D3D12Device::SubmitCommandList(bool wait_for_completion, const std::string_view reason)
675
{
676
WARNING_LOG("Executing command buffer due to '{}'", reason);
677
SubmitCommandList(wait_for_completion);
678
}
679
680
void D3D12Device::SubmitCommandListAndRestartRenderPass(const std::string_view reason)
681
{
682
if (InRenderPass())
683
EndRenderPass();
684
685
D3D12Pipeline* pl = m_current_pipeline;
686
SubmitCommandList(false, reason);
687
688
SetPipeline(pl);
689
BeginRenderPass();
690
}
691
692
void D3D12Device::WaitForFence(u64 fence)
693
{
694
if (m_device_was_lost) [[unlikely]]
695
return;
696
697
if (m_completed_fence_value >= fence)
698
return;
699
700
// Try non-blocking check.
701
m_completed_fence_value = m_fence->GetCompletedValue();
702
if (m_completed_fence_value < fence)
703
{
704
// Fall back to event.
705
HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event);
706
AssertMsg(SUCCEEDED(hr), "Set fence event on completion");
707
WaitForSingleObject(m_fence_event, INFINITE);
708
m_completed_fence_value = m_fence->GetCompletedValue();
709
}
710
711
// Release resources for as many command lists which have completed.
712
DestroyDeferredObjects(m_completed_fence_value);
713
}
714
715
void D3D12Device::WaitForAllFences()
716
{
717
u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
718
for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++)
719
{
720
WaitForFence(m_command_lists[index].fence_counter);
721
index = (index + 1) % NUM_COMMAND_LISTS;
722
}
723
}
724
725
void D3D12Device::FlushCommands()
726
{
727
if (InRenderPass())
728
EndRenderPass();
729
730
SubmitCommandList(false);
731
TrimTexturePool();
732
}
733
734
void D3D12Device::WaitForGPUIdle()
735
{
736
if (InRenderPass())
737
EndRenderPass();
738
739
SubmitCommandList(true);
740
}
741
742
bool D3D12Device::CreateTimestampQuery()
743
{
744
constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS;
745
constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT;
746
747
const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT, 0u};
748
HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf()));
749
if (FAILED(hr))
750
{
751
ERROR_LOG("CreateQueryHeap() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
752
m_features.gpu_timing = false;
753
return false;
754
}
755
756
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK,
757
D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
758
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
759
0,
760
BUFFER_SIZE,
761
1,
762
1,
763
1,
764
DXGI_FORMAT_UNKNOWN,
765
{1, 0},
766
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
767
D3D12_RESOURCE_FLAG_NONE};
768
hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
769
m_timestamp_query_allocation.GetAddressOf(),
770
IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf()));
771
if (FAILED(hr))
772
{
773
ERROR_LOG("CreateResource() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
774
m_features.gpu_timing = false;
775
return false;
776
}
777
778
u64 frequency;
779
hr = m_command_queue->GetTimestampFrequency(&frequency);
780
if (FAILED(hr))
781
{
782
ERROR_LOG("GetTimestampFrequency() failed: {:08X}", static_cast<unsigned>(hr));
783
m_features.gpu_timing = false;
784
return false;
785
}
786
787
m_timestamp_frequency = static_cast<double>(frequency) / 1000.0;
788
return true;
789
}
790
791
void D3D12Device::DestroyTimestampQuery()
792
{
793
m_timestamp_query_buffer.Reset();
794
m_timestamp_query_allocation.Reset();
795
m_timestamp_query_heap.Reset();
796
}
797
798
float D3D12Device::GetAndResetAccumulatedGPUTime()
799
{
800
const float time = m_accumulated_gpu_time;
801
m_accumulated_gpu_time = 0.0f;
802
return time;
803
}
804
805
bool D3D12Device::SetGPUTimingEnabled(bool enabled)
806
{
807
m_gpu_timing_enabled = enabled && m_features.gpu_timing;
808
return (enabled == m_gpu_timing_enabled);
809
}
810
811
void D3D12Device::DeferObjectDestruction(ComPtr<ID3D12Object> resource)
812
{
813
DebugAssert(resource);
814
m_cleanup_resources.emplace_back(GetCurrentFenceValue(),
815
std::pair<D3D12MA::Allocation*, ID3D12Object*>(nullptr, resource.Detach()));
816
}
817
818
void D3D12Device::DeferResourceDestruction(ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> resource)
819
{
820
DebugAssert(allocation && resource);
821
m_cleanup_resources.emplace_back(
822
GetCurrentFenceValue(), std::pair<D3D12MA::Allocation*, ID3D12Object*>(allocation.Detach(), resource.Detach()));
823
}
824
825
void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor)
826
{
827
DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX);
828
m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(),
829
std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>(&heap, *descriptor));
830
descriptor->Clear();
831
}
832
833
void D3D12Device::DestroyDeferredObjects(u64 fence_value)
834
{
835
while (!m_cleanup_descriptors.empty())
836
{
837
auto& it = m_cleanup_descriptors.front();
838
if (it.first > fence_value)
839
break;
840
841
it.second.first->Free(it.second.second.index);
842
m_cleanup_descriptors.pop_front();
843
}
844
845
while (!m_cleanup_resources.empty())
846
{
847
auto& it = m_cleanup_resources.front();
848
if (it.first > fence_value)
849
break;
850
851
it.second.second->Release();
852
if (it.second.first)
853
it.second.first->Release();
854
m_cleanup_resources.pop_front();
855
}
856
}
857
858
D3D12SwapChain::D3D12SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle,
859
const GPUDevice::ExclusiveFullscreenMode* fullscreen_mode)
860
: GPUSwapChain(wi, vsync_mode, allow_present_throttle)
861
{
862
if (fullscreen_mode)
863
InitializeExclusiveFullscreenMode(fullscreen_mode);
864
}
865
866
D3D12SwapChain::~D3D12SwapChain()
867
{
868
DestroyRTVs();
869
DestroySwapChain();
870
}
871
872
bool D3D12SwapChain::InitializeExclusiveFullscreenMode(const GPUDevice::ExclusiveFullscreenMode* mode)
873
{
874
const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format);
875
876
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
877
RECT client_rc{};
878
GetClientRect(window_hwnd, &client_rc);
879
880
m_fullscreen_mode = D3DCommon::GetRequestedExclusiveFullscreenModeDesc(
881
D3D12Device::GetInstance().GetAdapter(), client_rc, mode, fm.resource_format, m_fullscreen_output.GetAddressOf());
882
return m_fullscreen_mode.has_value();
883
}
884
885
u32 D3D12SwapChain::GetNewBufferCount(GPUVSyncMode vsync_mode)
886
{
887
// With vsync off, we only need two buffers. Same for blocking vsync.
888
// With triple buffering, we need three.
889
return (vsync_mode == GPUVSyncMode::Mailbox) ? 3 : 2;
890
}
891
892
bool D3D12SwapChain::CreateSwapChain(D3D12Device& dev, Error* error)
893
{
894
const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format);
895
896
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
897
RECT client_rc{};
898
GetClientRect(window_hwnd, &client_rc);
899
900
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
901
if (IsExclusiveFullscreen() && m_vsync_mode == GPUVSyncMode::Mailbox)
902
{
903
WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
904
m_vsync_mode = GPUVSyncMode::FIFO;
905
}
906
907
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
908
swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
909
swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
910
swap_chain_desc.Format = fm.resource_format;
911
swap_chain_desc.SampleDesc.Count = 1;
912
swap_chain_desc.BufferCount = GetNewBufferCount(m_vsync_mode);
913
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
914
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
915
916
HRESULT hr = S_OK;
917
918
if (IsExclusiveFullscreen())
919
{
920
DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
921
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
922
923
fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
924
fs_sd_desc.Width = m_fullscreen_mode->Width;
925
fs_sd_desc.Height = m_fullscreen_mode->Height;
926
fs_desc.RefreshRate = m_fullscreen_mode->RefreshRate;
927
fs_desc.ScanlineOrdering = m_fullscreen_mode->ScanlineOrdering;
928
fs_desc.Scaling = m_fullscreen_mode->Scaling;
929
fs_desc.Windowed = FALSE;
930
931
VERBOSE_LOG("Creating a {}x{} exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
932
hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &fs_sd_desc, &fs_desc,
933
m_fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf());
934
if (FAILED(hr))
935
{
936
WARNING_LOG("Failed to create fullscreen swap chain, trying windowed.");
937
m_fullscreen_output.Reset();
938
m_fullscreen_mode.reset();
939
}
940
}
941
942
if (!IsExclusiveFullscreen())
943
{
944
VERBOSE_LOG("Creating a {}x{} windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height);
945
m_using_allow_tearing = D3DCommon::SupportsAllowTearing(dev.GetDXGIFactory());
946
if (m_using_allow_tearing)
947
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
948
hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &swap_chain_desc, nullptr,
949
nullptr, m_swap_chain.ReleaseAndGetAddressOf());
950
if (FAILED(hr))
951
{
952
Error::SetHResult(error, "CreateSwapChainForHwnd() failed: ", hr);
953
return false;
954
}
955
}
956
957
hr = dev.GetDXGIFactory()->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
958
if (FAILED(hr))
959
WARNING_LOG("MakeWindowAssociation() to disable ALT+ENTER failed");
960
961
return true;
962
}
963
964
bool D3D12SwapChain::CreateRTV(D3D12Device& dev, Error* error)
965
{
966
DXGI_SWAP_CHAIN_DESC swap_chain_desc;
967
HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc);
968
if (FAILED(hr))
969
{
970
Error::SetHResult(error, "GetDesc() for swap chain failed: ", hr);
971
return false;
972
}
973
974
const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D, {}};
975
976
for (u32 i = 0; i < swap_chain_desc.BufferCount; i++)
977
{
978
ComPtr<ID3D12Resource> backbuffer;
979
hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf()));
980
if (FAILED(hr))
981
{
982
Error::SetHResult(error, "GetBuffer for RTV failed: ", hr);
983
DestroyRTVs();
984
return false;
985
}
986
987
D3D12::SetObjectName(backbuffer.Get(), TinyString::from_format("Swap Chain Buffer #{}", i));
988
989
D3D12DescriptorHandle rtv;
990
if (!dev.GetRTVHeapManager().Allocate(&rtv))
991
{
992
Error::SetStringView(error, "Failed to allocate RTV handle.");
993
DestroyRTVs();
994
return false;
995
}
996
997
dev.GetDevice()->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv);
998
m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv);
999
}
1000
1001
m_window_info.surface_width = static_cast<u16>(swap_chain_desc.BufferDesc.Width);
1002
m_window_info.surface_height = static_cast<u16>(swap_chain_desc.BufferDesc.Height);
1003
m_window_info.surface_format = s_swap_chain_format;
1004
VERBOSE_LOG("Swap chain buffer size: {}x{}", m_window_info.surface_width, m_window_info.surface_height);
1005
1006
BOOL fullscreen = FALSE;
1007
DXGI_SWAP_CHAIN_DESC desc;
1008
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
1009
SUCCEEDED(m_swap_chain->GetDesc(&desc)))
1010
{
1011
m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
1012
static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
1013
}
1014
1015
m_current_swap_chain_buffer = 0;
1016
return true;
1017
}
1018
1019
void D3D12SwapChain::DestroyRTVs()
1020
{
1021
if (m_swap_chain_buffers.empty())
1022
return;
1023
1024
D3D12Device& dev = D3D12Device::GetInstance();
1025
1026
// Runtime gets cranky if we don't submit the current buffer...
1027
if (dev.InRenderPass())
1028
dev.EndRenderPass();
1029
dev.SubmitCommandList(true);
1030
1031
for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it)
1032
{
1033
dev.GetRTVHeapManager().Free(it->second.index);
1034
it->first.Reset();
1035
}
1036
m_swap_chain_buffers.clear();
1037
m_current_swap_chain_buffer = 0;
1038
}
1039
1040
void D3D12SwapChain::DestroySwapChain()
1041
{
1042
if (!m_swap_chain)
1043
return;
1044
1045
// switch out of fullscreen before destroying
1046
BOOL is_fullscreen;
1047
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
1048
m_swap_chain->SetFullscreenState(FALSE, nullptr);
1049
1050
m_swap_chain.Reset();
1051
}
1052
1053
bool D3D12SwapChain::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle, Error* error)
1054
{
1055
m_allow_present_throttle = allow_present_throttle;
1056
1057
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
1058
if (mode == GPUVSyncMode::Mailbox && IsExclusiveFullscreen())
1059
{
1060
WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
1061
mode = GPUVSyncMode::FIFO;
1062
}
1063
1064
if (m_vsync_mode == mode)
1065
return true;
1066
1067
const u32 old_buffer_count = GetNewBufferCount(m_vsync_mode);
1068
const u32 new_buffer_count = GetNewBufferCount(mode);
1069
m_vsync_mode = mode;
1070
if (old_buffer_count == new_buffer_count)
1071
return true;
1072
1073
// Buffer count change => needs recreation.
1074
DestroyRTVs();
1075
DestroySwapChain();
1076
1077
D3D12Device& dev = D3D12Device::GetInstance();
1078
return CreateSwapChain(dev, error) && CreateRTV(dev, error);
1079
}
1080
1081
bool D3D12SwapChain::IsExclusiveFullscreen() const
1082
{
1083
return m_fullscreen_mode.has_value();
1084
}
1085
1086
bool D3D12SwapChain::ResizeBuffers(u32 new_width, u32 new_height, float new_scale, Error* error)
1087
{
1088
m_window_info.surface_scale = new_scale;
1089
if (m_window_info.surface_width == new_width && m_window_info.surface_height == new_height)
1090
return true;
1091
1092
DestroyRTVs();
1093
1094
HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN,
1095
m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
1096
if (FAILED(hr))
1097
ERROR_LOG("ResizeBuffers() failed: 0x{:08X}", static_cast<unsigned>(hr));
1098
1099
return CreateRTV(D3D12Device::GetInstance(), error);
1100
}
1101
1102
std::unique_ptr<GPUSwapChain> D3D12Device::CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
1103
bool allow_present_throttle,
1104
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
1105
std::optional<bool> exclusive_fullscreen_control,
1106
Error* error)
1107
{
1108
std::unique_ptr<D3D12SwapChain> ret;
1109
if (wi.type != WindowInfo::Type::Win32)
1110
{
1111
Error::SetStringView(error, "Cannot create a swap chain on non-win32 window.");
1112
return ret;
1113
}
1114
1115
ret = std::make_unique<D3D12SwapChain>(wi, vsync_mode, allow_present_throttle, exclusive_fullscreen_mode);
1116
if (ret->CreateSwapChain(*this, error) && ret->CreateRTV(*this, error))
1117
{
1118
// Render a frame as soon as possible to clear out whatever was previously being displayed.
1119
RenderBlankFrame(ret.get());
1120
}
1121
else
1122
{
1123
ret.reset();
1124
}
1125
1126
return ret;
1127
}
1128
1129
void D3D12Device::RenderBlankFrame(D3D12SwapChain* swap_chain)
1130
{
1131
if (InRenderPass())
1132
EndRenderPass();
1133
1134
const D3D12SwapChain::BufferPair& swap_chain_buf = swap_chain->GetCurrentBuffer();
1135
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1136
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
1137
D3D12_RESOURCE_STATE_RENDER_TARGET);
1138
cmdlist->ClearRenderTargetView(swap_chain_buf.second, GSVector4::cxpr(0.0f, 0.0f, 0.0f, 1.0f).F32, 0, nullptr);
1139
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
1140
D3D12_RESOURCE_STATE_PRESENT);
1141
SubmitCommandList(false);
1142
swap_chain->GetSwapChain()->Present(0, swap_chain->IsUsingAllowTearing() ? DXGI_PRESENT_ALLOW_TEARING : 0);
1143
swap_chain->AdvanceBuffer();
1144
}
1145
1146
bool D3D12Device::SupportsTextureFormat(GPUTexture::Format format) const
1147
{
1148
constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE;
1149
1150
const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format;
1151
if (dfmt == DXGI_FORMAT_UNKNOWN)
1152
return false;
1153
1154
D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt, {}, {}};
1155
return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) &&
1156
(support.Support1 & required) == required;
1157
}
1158
1159
std::string D3D12Device::GetDriverInfo() const
1160
{
1161
std::string ret = fmt::format("{} (Shader Model {})\n", D3DCommon::GetFeatureLevelString(m_render_api_version),
1162
D3DCommon::GetShaderModelForFeatureLevelNumber(m_render_api_version));
1163
1164
DXGI_ADAPTER_DESC desc;
1165
if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc)))
1166
{
1167
fmt::format_to(std::back_inserter(ret), "VID: 0x{:04X} PID: 0x{:04X}\n", desc.VendorId, desc.DeviceId);
1168
ret += StringUtil::WideStringToUTF8String(desc.Description);
1169
ret += "\n";
1170
1171
const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid));
1172
if (!driver_version.empty())
1173
{
1174
ret += "Driver Version: ";
1175
ret += driver_version;
1176
}
1177
}
1178
1179
return ret;
1180
}
1181
1182
GPUDevice::PresentResult D3D12Device::BeginPresent(GPUSwapChain* swap_chain, u32 clear_color)
1183
{
1184
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1185
if (InRenderPass())
1186
EndRenderPass();
1187
1188
if (m_device_was_lost) [[unlikely]]
1189
return PresentResult::DeviceLost;
1190
1191
// TODO: Check if the device was lost.
1192
1193
// Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
1194
// This might get called repeatedly if it takes a while to switch back, that's the host's problem.
1195
BOOL is_fullscreen;
1196
if (SC->IsExclusiveFullscreen() &&
1197
(FAILED(SC->GetSwapChain()->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
1198
{
1199
FlushCommands();
1200
TrimTexturePool();
1201
return PresentResult::ExclusiveFullscreenLost;
1202
}
1203
1204
m_current_swap_chain = SC;
1205
1206
const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer();
1207
ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
1208
1209
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
1210
D3D12_RESOURCE_STATE_RENDER_TARGET);
1211
1212
// All textures should be in shader read only optimal already, but just in case..
1213
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
1214
for (u32 i = 0; i < num_textures; i++)
1215
{
1216
if (m_current_textures[i])
1217
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
1218
}
1219
1220
D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {swap_chain_buf.second,
1221
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {}},
1222
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
1223
GSVector4::store<false>(rt_desc.BeginningAccess.Clear.ClearValue.Color, GSVector4::unorm8(clear_color));
1224
cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
1225
1226
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
1227
m_num_current_render_targets = 0;
1228
m_dirty_flags =
1229
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0);
1230
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
1231
m_current_depth_target = nullptr;
1232
m_in_render_pass = true;
1233
s_stats.num_render_passes++;
1234
1235
// Clear pipeline, it's likely incompatible.
1236
m_current_pipeline = nullptr;
1237
1238
return PresentResult::OK;
1239
}
1240
1241
void D3D12Device::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time)
1242
{
1243
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1244
DebugAssert(present_time == 0);
1245
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
1246
EndRenderPass();
1247
1248
DebugAssert(SC == m_current_swap_chain);
1249
m_current_swap_chain = nullptr;
1250
1251
const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer();
1252
SC->AdvanceBuffer();
1253
1254
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
1255
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
1256
D3D12_RESOURCE_STATE_PRESENT);
1257
1258
SubmitCommandList(false);
1259
TrimTexturePool();
1260
1261
if (!explicit_present)
1262
SubmitPresent(swap_chain);
1263
}
1264
1265
void D3D12Device::SubmitPresent(GPUSwapChain* swap_chain)
1266
{
1267
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1268
if (m_device_was_lost) [[unlikely]]
1269
return;
1270
1271
const UINT sync_interval = static_cast<UINT>(SC->GetVSyncMode() == GPUVSyncMode::FIFO);
1272
const UINT flags =
1273
(SC->GetVSyncMode() == GPUVSyncMode::Disabled && SC->IsUsingAllowTearing()) ? DXGI_PRESENT_ALLOW_TEARING : 0;
1274
SC->GetSwapChain()->Present(sync_interval, flags);
1275
}
1276
1277
#ifdef ENABLE_GPU_OBJECT_NAMES
1278
1279
static UINT64 Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
1280
const std::array<float, 3>& c, const std::array<float, 3>& d)
1281
{
1282
std::array<float, 3> result;
1283
result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0]));
1284
result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1]));
1285
result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2]));
1286
1287
return PIX_COLOR(static_cast<BYTE>(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)),
1288
static_cast<BYTE>(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)),
1289
static_cast<BYTE>(std::clamp(result[2] * 255.0f, 0.0f, 255.0f)));
1290
}
1291
1292
void D3D12Device::PushDebugGroup(const char* name)
1293
{
1294
if (!m_debug_device)
1295
return;
1296
1297
const UINT64 color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f},
1298
{1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
1299
PIXBeginEvent(GetCommandList(), color, "%s", name);
1300
}
1301
1302
void D3D12Device::PopDebugGroup()
1303
{
1304
if (!m_debug_device)
1305
return;
1306
1307
s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
1308
PIXEndEvent(GetCommandList());
1309
}
1310
1311
void D3D12Device::InsertDebugMessage(const char* msg)
1312
{
1313
if (!m_debug_device)
1314
return;
1315
1316
PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", msg);
1317
}
1318
1319
#endif
1320
1321
void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, CreateFlags create_flags)
1322
{
1323
m_render_api_version = D3DCommon::GetRenderAPIVersionForFeatureLevel(feature_level);
1324
m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
1325
m_max_multisamples = 1;
1326
for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++)
1327
{
1328
D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast<UINT>(multisamples),
1329
D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0u};
1330
1331
if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) &&
1332
fd.NumQualityLevels > 0)
1333
{
1334
m_max_multisamples = static_cast<u16>(multisamples);
1335
}
1336
}
1337
1338
m_features.dual_source_blend = !HasCreateFlag(create_flags, CreateFlags::DisableDualSourceBlend);
1339
m_features.framebuffer_fetch = false;
1340
m_features.per_sample_shading = true;
1341
m_features.noperspective_interpolation = true;
1342
m_features.texture_copy_to_self =
1343
/*!HasCreateFlag(create_flags, CreateFlag::DisableTextureCopyToSelf)*/ false; // TODO: Support with Enhanced Barriers
1344
m_features.texture_buffers = !HasCreateFlag(create_flags, CreateFlags::DisableTextureBuffers);
1345
m_features.texture_buffers_emulated_with_ssbo = false;
1346
m_features.feedback_loops = false;
1347
m_features.geometry_shaders = !HasCreateFlag(create_flags, CreateFlags::DisableGeometryShaders);
1348
m_features.compute_shaders = !HasCreateFlag(create_flags, CreateFlags::DisableComputeShaders);
1349
m_features.partial_msaa_resolve = true;
1350
m_features.memory_import = false;
1351
m_features.exclusive_fullscreen = true;
1352
m_features.explicit_present = true;
1353
m_features.timed_present = false;
1354
m_features.gpu_timing = true;
1355
m_features.shader_cache = true;
1356
m_features.pipeline_cache = true;
1357
m_features.prefer_unused_textures = true;
1358
1359
m_features.raster_order_views = false;
1360
if (!HasCreateFlag(create_flags, CreateFlags::DisableRasterOrderViews))
1361
{
1362
D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
1363
m_features.raster_order_views =
1364
SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
1365
options.ROVsSupported;
1366
}
1367
1368
m_features.dxt_textures =
1369
(!HasCreateFlag(create_flags, CreateFlags::DisableCompressedTextures) &&
1370
(SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
1371
SupportsTextureFormat(GPUTexture::Format::BC3)));
1372
m_features.bptc_textures =
1373
(!HasCreateFlag(create_flags, CreateFlags::DisableCompressedTextures) && SupportsTextureFormat(GPUTexture::Format::BC7));
1374
}
1375
1376
void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
1377
GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
1378
u32 height)
1379
{
1380
D3D12Texture* const S = static_cast<D3D12Texture*>(src);
1381
D3D12Texture* const D = static_cast<D3D12Texture*>(dst);
1382
1383
if (S->GetState() == GPUTexture::State::Cleared)
1384
{
1385
// source is cleared. if destination is a render target, we can carry the clear forward
1386
if (D->IsRenderTargetOrDepthStencil())
1387
{
1388
if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight())
1389
{
1390
// pass it forward if we're clearing the whole thing
1391
if (S->IsDepthStencil())
1392
D->SetClearDepth(S->GetClearDepth());
1393
else
1394
D->SetClearColor(S->GetClearColor());
1395
1396
return;
1397
}
1398
1399
if (D->GetState() == GPUTexture::State::Cleared)
1400
{
1401
// destination is cleared, if it's the same colour and rect, we can just avoid this entirely
1402
if (D->IsDepthStencil())
1403
{
1404
if (D->GetClearDepth() == S->GetClearDepth())
1405
return;
1406
}
1407
else
1408
{
1409
if (D->GetClearColor() == S->GetClearColor())
1410
return;
1411
}
1412
}
1413
}
1414
1415
// commit the clear to the source first, then do normal copy
1416
S->CommitClear();
1417
}
1418
1419
// if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first
1420
// (the area outside of where we're copying to)
1421
if (D->GetState() == GPUTexture::State::Cleared &&
1422
(dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight()))
1423
{
1424
D->CommitClear();
1425
}
1426
1427
s_stats.num_copies++;
1428
1429
// *now* we can do a normal image copy.
1430
if (InRenderPass())
1431
EndRenderPass();
1432
1433
S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE);
1434
S->SetUseFenceValue(GetCurrentFenceValue());
1435
1436
D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST);
1437
D->SetUseFenceValue(GetCurrentFenceValue());
1438
1439
D3D12_TEXTURE_COPY_LOCATION srcloc;
1440
srcloc.pResource = S->GetResource();
1441
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
1442
srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level);
1443
1444
D3D12_TEXTURE_COPY_LOCATION dstloc;
1445
dstloc.pResource = D->GetResource();
1446
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
1447
dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level);
1448
1449
const D3D12_BOX srcbox{static_cast<UINT>(src_x), static_cast<UINT>(src_y), 0u,
1450
static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u};
1451
GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox);
1452
1453
D->SetState(GPUTexture::State::Dirty);
1454
}
1455
1456
void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
1457
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
1458
{
1459
DebugAssert((src_x + width) <= src->GetWidth());
1460
DebugAssert((src_y + height) <= src->GetHeight());
1461
DebugAssert(src->IsMultisampled());
1462
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
1463
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
1464
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
1465
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
1466
1467
if (InRenderPass())
1468
EndRenderPass();
1469
1470
s_stats.num_copies++;
1471
1472
D3D12Texture* D = static_cast<D3D12Texture*>(dst);
1473
D3D12Texture* S = static_cast<D3D12Texture*>(src);
1474
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1475
const u32 DSR = D->CalculateSubresource(dst_layer, dst_level);
1476
1477
S->CommitClear(cmdlist);
1478
D->CommitClear(cmdlist);
1479
1480
S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
1481
D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST);
1482
1483
if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 &&
1484
width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level))
1485
{
1486
cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat());
1487
}
1488
else
1489
{
1490
D3D12_RECT src_rc{static_cast<LONG>(src_x), static_cast<LONG>(src_y), static_cast<LONG>(src_x + width),
1491
static_cast<LONG>(src_y + height)};
1492
cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y,
1493
S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE);
1494
}
1495
1496
S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState());
1497
D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState());
1498
}
1499
1500
void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c)
1501
{
1502
GPUDevice::ClearRenderTarget(t, c);
1503
if (InRenderPass() && IsRenderTargetBound(t))
1504
EndRenderPass();
1505
}
1506
1507
void D3D12Device::ClearDepth(GPUTexture* t, float d)
1508
{
1509
GPUDevice::ClearDepth(t, d);
1510
if (InRenderPass() && m_current_depth_target == t)
1511
EndRenderPass();
1512
}
1513
1514
void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
1515
{
1516
GPUDevice::InvalidateRenderTarget(t);
1517
if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : IsRenderTargetBound(t)))
1518
EndRenderPass();
1519
}
1520
1521
bool D3D12Device::CreateBuffers(Error* error)
1522
{
1523
if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE, error))
1524
{
1525
ERROR_LOG("Failed to allocate vertex buffer");
1526
return false;
1527
}
1528
1529
if (!m_index_buffer.Create(INDEX_BUFFER_SIZE, error))
1530
{
1531
ERROR_LOG("Failed to allocate index buffer");
1532
return false;
1533
}
1534
1535
if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE, error))
1536
{
1537
ERROR_LOG("Failed to allocate uniform buffer");
1538
return false;
1539
}
1540
1541
if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE, error))
1542
{
1543
ERROR_LOG("Failed to allocate texture upload buffer");
1544
return false;
1545
}
1546
1547
return true;
1548
}
1549
1550
void D3D12Device::DestroyBuffers()
1551
{
1552
m_texture_upload_buffer.Destroy(false);
1553
m_uniform_buffer.Destroy(false);
1554
m_index_buffer.Destroy(false);
1555
m_vertex_buffer.Destroy(false);
1556
}
1557
1558
void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
1559
u32* map_base_vertex)
1560
{
1561
const u32 req_size = vertex_size * vertex_count;
1562
if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
1563
{
1564
SubmitCommandListAndRestartRenderPass("out of vertex space");
1565
if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
1566
Panic("Failed to allocate vertex space");
1567
}
1568
1569
*map_ptr = m_vertex_buffer.GetCurrentHostPointer();
1570
*map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size;
1571
*map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size;
1572
}
1573
1574
void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
1575
{
1576
const u32 upload_size = vertex_size * vertex_count;
1577
s_stats.buffer_streamed += upload_size;
1578
m_vertex_buffer.CommitMemory(upload_size);
1579
}
1580
1581
void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
1582
{
1583
const u32 req_size = sizeof(DrawIndex) * index_count;
1584
if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
1585
{
1586
SubmitCommandListAndRestartRenderPass("out of index space");
1587
if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
1588
Panic("Failed to allocate index space");
1589
}
1590
1591
*map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer());
1592
*map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex);
1593
*map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex);
1594
}
1595
1596
void D3D12Device::UnmapIndexBuffer(u32 used_index_count)
1597
{
1598
const u32 upload_size = sizeof(DrawIndex) * used_index_count;
1599
s_stats.buffer_streamed += upload_size;
1600
m_index_buffer.CommitMemory(upload_size);
1601
}
1602
1603
void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
1604
{
1605
static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameters = {
1606
0, // SingleTextureAndUBO
1607
2, // SingleTextureAndPushConstants
1608
1, // SingleTextureBufferAndPushConstants
1609
0, // MultiTextureAndUBO
1610
2, // MultiTextureAndPushConstants
1611
2, // ComputeSingleTextureAndPushConstants
1612
};
1613
1614
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
1615
if (m_dirty_flags & DIRTY_FLAG_PIPELINE_LAYOUT)
1616
{
1617
m_dirty_flags &= ~DIRTY_FLAG_PIPELINE_LAYOUT;
1618
UpdateRootSignature();
1619
}
1620
1621
s_stats.buffer_streamed += data_size;
1622
1623
const u32 push_param =
1624
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
1625
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1626
if (!IsUsingComputeRootSignature())
1627
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
1628
else
1629
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
1630
}
1631
1632
void* D3D12Device::MapUniformBuffer(u32 size)
1633
{
1634
const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
1635
if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
1636
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
1637
{
1638
SubmitCommandListAndRestartRenderPass("out of uniform space");
1639
if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
1640
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
1641
Panic("Failed to allocate uniform space.");
1642
}
1643
1644
return m_uniform_buffer.GetCurrentHostPointer();
1645
}
1646
1647
void D3D12Device::UnmapUniformBuffer(u32 size)
1648
{
1649
s_stats.buffer_streamed += size;
1650
m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset();
1651
m_uniform_buffer.CommitMemory(size);
1652
m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER;
1653
}
1654
1655
bool D3D12Device::CreateRootSignatures(Error* error)
1656
{
1657
D3D12::RootSignatureBuilder rsb;
1658
1659
for (u32 rov = 0; rov < 2; rov++)
1660
{
1661
if (rov && !m_features.raster_order_views)
1662
break;
1663
1664
{
1665
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
1666
1667
rsb.SetInputAssemblerFlag();
1668
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1669
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1670
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1671
if (rov)
1672
{
1673
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1674
D3D12_SHADER_VISIBILITY_PIXEL);
1675
}
1676
if (!(rs = rsb.Create(error, true)))
1677
return false;
1678
D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout");
1679
}
1680
1681
{
1682
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
1683
1684
rsb.SetInputAssemblerFlag();
1685
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1686
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1687
if (rov)
1688
{
1689
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1690
D3D12_SHADER_VISIBILITY_PIXEL);
1691
}
1692
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1693
if (!(rs = rsb.Create(error, true)))
1694
return false;
1695
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
1696
}
1697
1698
{
1699
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
1700
1701
rsb.SetInputAssemblerFlag();
1702
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1703
if (rov)
1704
{
1705
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1706
D3D12_SHADER_VISIBILITY_PIXEL);
1707
}
1708
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1709
if (!(rs = rsb.Create(error, true)))
1710
return false;
1711
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
1712
}
1713
1714
{
1715
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
1716
1717
rsb.SetInputAssemblerFlag();
1718
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
1719
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
1720
D3D12_SHADER_VISIBILITY_PIXEL);
1721
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1722
if (rov)
1723
{
1724
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1725
D3D12_SHADER_VISIBILITY_PIXEL);
1726
}
1727
if (!(rs = rsb.Create(error, true)))
1728
return false;
1729
D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout");
1730
}
1731
1732
{
1733
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
1734
1735
rsb.SetInputAssemblerFlag();
1736
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
1737
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
1738
D3D12_SHADER_VISIBILITY_PIXEL);
1739
if (rov)
1740
{
1741
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1742
D3D12_SHADER_VISIBILITY_PIXEL);
1743
}
1744
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1745
if (!(rs = rsb.Create(error, true)))
1746
return false;
1747
D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");
1748
}
1749
}
1750
1751
{
1752
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndUBO)];
1753
1754
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1755
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1756
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
1757
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1758
if (!(rs = rsb.Create(error, true)))
1759
return false;
1760
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture + UBO Pipeline Layout");
1761
}
1762
1763
{
1764
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)];
1765
1766
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1767
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1768
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
1769
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1770
if (!(rs = rsb.Create(error, true)))
1771
return false;
1772
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture Pipeline Layout");
1773
}
1774
1775
return true;
1776
}
1777
1778
void D3D12Device::DestroyRootSignatures()
1779
{
1780
m_root_signatures.enumerate([](auto& it) { it.Reset(); });
1781
}
1782
1783
void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
1784
GPUPipeline::RenderPassFlag flags)
1785
{
1786
DebugAssert(
1787
!(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer)));
1788
1789
const bool image_bind_changed = ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages);
1790
bool changed =
1791
(m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags);
1792
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
1793
bool needs_rt_clear = false;
1794
1795
if (InRenderPass())
1796
EndRenderPass();
1797
1798
m_current_depth_target = static_cast<D3D12Texture*>(ds);
1799
for (u32 i = 0; i < num_rts; i++)
1800
{
1801
D3D12Texture* const RT = static_cast<D3D12Texture*>(rts[i]);
1802
changed |= m_current_render_targets[i] != RT;
1803
m_current_render_targets[i] = RT;
1804
needs_rt_clear |= RT->IsClearedOrInvalidated();
1805
}
1806
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
1807
m_current_render_targets[i] = nullptr;
1808
m_num_current_render_targets = Truncate8(num_rts);
1809
m_current_render_pass_flags = flags;
1810
1811
// Don't end render pass unless it's necessary.
1812
if (changed)
1813
{
1814
if (InRenderPass())
1815
EndRenderPass();
1816
1817
// Need a root signature change if switching to UAVs.
1818
m_dirty_flags |= image_bind_changed ? LAYOUT_DEPENDENT_DIRTY_STATE : 0;
1819
m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) :
1820
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS);
1821
}
1822
else if (needs_rt_clear || needs_ds_clear)
1823
{
1824
if (InRenderPass())
1825
EndRenderPass();
1826
}
1827
}
1828
1829
void D3D12Device::BeginRenderPass()
1830
{
1831
DebugAssert(!InRenderPass());
1832
1833
std::array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, MAX_RENDER_TARGETS> rt_desc;
1834
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc;
1835
1836
D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr;
1837
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr;
1838
u32 num_rt_descs = 0;
1839
1840
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1841
1842
if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]]
1843
{
1844
if (!IsUsingROVRootSignature()) [[likely]]
1845
{
1846
for (u32 i = 0; i < m_num_current_render_targets; i++)
1847
{
1848
D3D12Texture* const rt = m_current_render_targets[i];
1849
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
1850
rt->SetUseFenceValue(GetCurrentFenceValue());
1851
1852
D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
1853
desc.cpuDescriptor = rt->GetWriteDescriptor();
1854
desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
1855
1856
switch (rt->GetState())
1857
{
1858
case GPUTexture::State::Cleared:
1859
{
1860
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
1861
std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
1862
sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
1863
rt->SetState(GPUTexture::State::Dirty);
1864
}
1865
break;
1866
1867
case GPUTexture::State::Invalidated:
1868
{
1869
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
1870
rt->SetState(GPUTexture::State::Dirty);
1871
}
1872
break;
1873
1874
case GPUTexture::State::Dirty:
1875
{
1876
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
1877
}
1878
break;
1879
1880
default:
1881
UnreachableCode();
1882
break;
1883
}
1884
}
1885
1886
rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
1887
num_rt_descs = m_num_current_render_targets;
1888
}
1889
else
1890
{
1891
// Still need to clear the RTs.
1892
for (u32 i = 0; i < m_num_current_render_targets; i++)
1893
{
1894
D3D12Texture* const rt = m_current_render_targets[i];
1895
rt->SetUseFenceValue(GetCurrentFenceValue());
1896
rt->CommitClear(cmdlist);
1897
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1898
rt->SetState(GPUTexture::State::Dirty);
1899
}
1900
}
1901
if (m_current_depth_target)
1902
{
1903
D3D12Texture* const ds = m_current_depth_target;
1904
ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE);
1905
ds->SetUseFenceValue(GetCurrentFenceValue());
1906
ds_desc.cpuDescriptor = ds->GetWriteDescriptor();
1907
ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
1908
ds_desc.StencilBeginningAccess = {};
1909
ds_desc.StencilEndingAccess = {};
1910
1911
switch (ds->GetState())
1912
{
1913
case GPUTexture::State::Cleared:
1914
{
1915
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
1916
ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth();
1917
ds->SetState(GPUTexture::State::Dirty);
1918
}
1919
break;
1920
1921
case GPUTexture::State::Invalidated:
1922
{
1923
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
1924
ds->SetState(GPUTexture::State::Dirty);
1925
}
1926
break;
1927
1928
case GPUTexture::State::Dirty:
1929
{
1930
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
1931
}
1932
break;
1933
1934
default:
1935
UnreachableCode();
1936
break;
1937
}
1938
1939
ds_desc_p = &ds_desc;
1940
}
1941
}
1942
else
1943
{
1944
// Re-rendering to swap chain.
1945
const auto& swap_chain_buf = m_current_swap_chain->GetCurrentBuffer();
1946
rt_desc[0] = {swap_chain_buf.second,
1947
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}},
1948
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
1949
rt_desc_p = &rt_desc[0];
1950
num_rt_descs = 1;
1951
}
1952
1953
// All textures should be in shader read only optimal already, but just in case..
1954
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
1955
for (u32 i = 0; i < num_textures; i++)
1956
{
1957
if (m_current_textures[i])
1958
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
1959
}
1960
1961
DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature());
1962
cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
1963
1964
// TODO: Stats
1965
m_in_render_pass = true;
1966
s_stats.num_render_passes++;
1967
1968
// If this is a new command buffer, bind the pipeline and such.
1969
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
1970
SetInitialPipelineState();
1971
}
1972
1973
bool D3D12Device::InRenderPass()
1974
{
1975
return m_in_render_pass;
1976
}
1977
1978
void D3D12Device::EndRenderPass()
1979
{
1980
DebugAssert(m_in_render_pass);
1981
1982
// TODO: stats
1983
m_in_render_pass = false;
1984
1985
GetCommandList()->EndRenderPass();
1986
}
1987
1988
void D3D12Device::SetPipeline(GPUPipeline* pipeline)
1989
{
1990
// First draw? Bind everything.
1991
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
1992
{
1993
m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
1994
if (!m_current_pipeline)
1995
return;
1996
1997
SetInitialPipelineState();
1998
return;
1999
}
2000
else if (m_current_pipeline == pipeline)
2001
{
2002
return;
2003
}
2004
2005
m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
2006
2007
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2008
cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
2009
2010
if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology)
2011
{
2012
m_current_topology = topology;
2013
cmdlist->IASetPrimitiveTopology(topology);
2014
}
2015
2016
if (u32 vertex_stride = m_current_pipeline->GetVertexStride();
2017
vertex_stride > 0 && m_current_vertex_stride != vertex_stride)
2018
{
2019
m_current_vertex_stride = vertex_stride;
2020
SetVertexBuffer(cmdlist);
2021
}
2022
2023
// TODO: we don't need to change the blend constant if blending isn't on.
2024
if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants)
2025
{
2026
m_current_blend_constant = blend_constants;
2027
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
2028
}
2029
2030
if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout)
2031
{
2032
m_current_pipeline_layout = layout;
2033
m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS);
2034
}
2035
}
2036
2037
void D3D12Device::UnbindPipeline(D3D12Pipeline* pl)
2038
{
2039
if (m_current_pipeline != pl)
2040
return;
2041
2042
m_current_pipeline = nullptr;
2043
}
2044
2045
bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
2046
{
2047
for (u32 i = 0; i < m_num_current_render_targets; i++)
2048
{
2049
if (m_current_render_targets[i] == tex)
2050
return true;
2051
}
2052
2053
return false;
2054
}
2055
2056
void D3D12Device::InvalidateCachedState()
2057
{
2058
DebugAssert(!m_in_render_pass);
2059
m_dirty_flags = ALL_DIRTY_STATE &
2060
((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS);
2061
}
2062
2063
void D3D12Device::SetInitialPipelineState()
2064
{
2065
DebugAssert(m_current_pipeline);
2066
m_dirty_flags &= ~DIRTY_FLAG_INITIAL;
2067
2068
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2069
2070
m_current_vertex_stride = m_current_pipeline->GetVertexStride();
2071
SetVertexBuffer(cmdlist);
2072
const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(),
2073
DXGI_FORMAT_R16_UINT};
2074
cmdlist->IASetIndexBuffer(&ib_view);
2075
2076
cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
2077
m_current_pipeline_layout = m_current_pipeline->GetLayout();
2078
2079
m_current_topology = m_current_pipeline->GetTopology();
2080
cmdlist->IASetPrimitiveTopology(m_current_topology);
2081
2082
m_current_blend_constant = m_current_pipeline->GetBlendConstants();
2083
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
2084
2085
SetViewport(cmdlist);
2086
SetScissor(cmdlist);
2087
}
2088
2089
void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist)
2090
{
2091
const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(),
2092
m_current_vertex_stride};
2093
cmdlist->IASetVertexBuffers(0, 1, &vb_view);
2094
}
2095
2096
void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist)
2097
{
2098
const D3D12_VIEWPORT vp = {static_cast<float>(m_current_viewport.left),
2099
static_cast<float>(m_current_viewport.top),
2100
static_cast<float>(m_current_viewport.width()),
2101
static_cast<float>(m_current_viewport.height()),
2102
0.0f,
2103
1.0f};
2104
cmdlist->RSSetViewports(1, &vp);
2105
}
2106
2107
void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist)
2108
{
2109
static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT));
2110
cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor));
2111
}
2112
2113
void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
2114
{
2115
D3D12Texture* T = static_cast<D3D12Texture*>(texture);
2116
if (m_current_textures[slot] != T)
2117
{
2118
m_current_textures[slot] = T;
2119
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2120
2121
if (T)
2122
{
2123
T->CommitClear();
2124
T->SetUseFenceValue(GetCurrentFenceValue());
2125
if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2126
{
2127
if (InRenderPass())
2128
EndRenderPass();
2129
T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
2130
}
2131
}
2132
}
2133
2134
const D3D12DescriptorHandle& handle =
2135
static_cast<D3D12Sampler*>(sampler ? sampler : m_nearest_sampler)->GetDescriptor();
2136
if (m_current_samplers[slot] != handle)
2137
{
2138
m_current_samplers[slot] = handle;
2139
m_dirty_flags |= DIRTY_FLAG_SAMPLERS;
2140
}
2141
}
2142
2143
void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
2144
{
2145
DebugAssert(slot == 0);
2146
if (m_current_texture_buffer == buffer)
2147
return;
2148
2149
m_current_texture_buffer = static_cast<D3D12TextureBuffer*>(buffer);
2150
if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2151
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2152
}
2153
2154
void D3D12Device::UnbindTexture(D3D12Texture* tex)
2155
{
2156
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
2157
{
2158
if (m_current_textures[i] == tex)
2159
{
2160
m_current_textures[i] = nullptr;
2161
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2162
}
2163
}
2164
2165
if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage))
2166
{
2167
for (u32 i = 0; i < m_num_current_render_targets; i++)
2168
{
2169
if (m_current_render_targets[i] == tex)
2170
{
2171
if (InRenderPass())
2172
EndRenderPass();
2173
m_current_render_targets[i] = nullptr;
2174
}
2175
}
2176
}
2177
else if (tex->IsDepthStencil())
2178
{
2179
if (m_current_depth_target == tex)
2180
{
2181
if (InRenderPass())
2182
EndRenderPass();
2183
m_current_depth_target = nullptr;
2184
}
2185
}
2186
}
2187
2188
void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf)
2189
{
2190
if (m_current_texture_buffer != buf)
2191
return;
2192
2193
m_current_texture_buffer = nullptr;
2194
2195
if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2196
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2197
}
2198
2199
void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height,
2200
u32 src_level, u32 src_width, u32 src_height)
2201
{
2202
ID3D12RootSignature* rootsig =
2203
m_root_signatures[0][static_cast<size_t>(GPUPipeline::Layout::SingleTextureAndPushConstants)].Get();
2204
ComPtr<ID3D12PipelineState>& pipeline = m_mipmap_render_pipelines[static_cast<size_t>(texture->GetFormat())];
2205
if (!pipeline)
2206
{
2207
D3D12::GraphicsPipelineBuilder gpb;
2208
gpb.SetRootSignature(rootsig);
2209
gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
2210
gpb.SetRenderTarget(0, texture->GetDXGIFormat());
2211
gpb.SetVertexShader(s_mipmap_blit_vs, std::size(s_mipmap_blit_vs));
2212
gpb.SetPixelShader(s_mipmap_blit_ps, std::size(s_mipmap_blit_ps));
2213
gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false);
2214
gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS);
2215
gpb.SetBlendState(0, false, D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_BLEND_ZERO,
2216
D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL);
2217
2218
const std::wstring name = StringUtil::UTF8StringToWideString(
2219
TinyString::from_format("MipmapRender-{}", GPUTexture::GetFormatName(texture->GetFormat())));
2220
Error error;
2221
if (m_pipeline_library)
2222
{
2223
HRESULT hr =
2224
m_pipeline_library->LoadGraphicsPipeline(name.c_str(), gpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
2225
if (FAILED(hr))
2226
{
2227
// E_INVALIDARG = not found.
2228
if (hr != E_INVALIDARG)
2229
ERROR_LOG("LoadGraphicsPipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
2230
2231
// Need to create it normally.
2232
pipeline = gpb.Create(m_device.Get(), &error, false);
2233
2234
// Store if it wasn't an OOM or something else.
2235
if (pipeline && hr == E_INVALIDARG)
2236
{
2237
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
2238
if (FAILED(hr))
2239
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
2240
}
2241
}
2242
}
2243
else
2244
{
2245
pipeline = gpb.Create(m_device.Get(), &error, false);
2246
}
2247
if (!pipeline)
2248
{
2249
ERROR_LOG("Failed to compile mipmap render pipeline for {}: {}", GPUTexture::GetFormatName(texture->GetFormat()),
2250
error.GetDescription());
2251
return;
2252
}
2253
}
2254
2255
EndRenderPass();
2256
2257
// we need a temporary SRV and RTV for each mip level
2258
// Safe to use the init buffer after exec, because everything will be done with the texture.
2259
D3D12DescriptorHandle rtv_handle;
2260
while (!GetRTVHeapManager().Allocate(&rtv_handle))
2261
SubmitCommandList(false, "Allocate RTV for RenderTextureMipmap()");
2262
2263
D3D12DescriptorHandle srv_handle;
2264
while (!GetDescriptorHeapManager().Allocate(&srv_handle))
2265
SubmitCommandList(false, "Allocate SRV for RenderTextureMipmap()");
2266
2267
// Setup views. This will be a partial view for the SRV.
2268
const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {.Format = texture->GetDXGIFormat(),
2269
.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
2270
.Texture2D = {.MipSlice = dst_level, .PlaneSlice = 0}};
2271
m_device->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle);
2272
2273
const D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
2274
.Format = texture->GetDXGIFormat(),
2275
.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
2276
.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
2277
.Texture2D = {.MostDetailedMip = src_level, .MipLevels = 1, .PlaneSlice = 0, .ResourceMinLODClamp = 0.0f}};
2278
m_device->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle);
2279
2280
// *now* we don't have to worry about running out of anything.
2281
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2282
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2283
{
2284
texture->TransitionSubresourceToState(cmdlist, src_level, texture->GetResourceState(),
2285
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
2286
}
2287
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
2288
{
2289
texture->TransitionSubresourceToState(cmdlist, dst_level, texture->GetResourceState(),
2290
D3D12_RESOURCE_STATE_RENDER_TARGET);
2291
}
2292
2293
const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {
2294
.cpuDescriptor = rtv_handle,
2295
.BeginningAccess = {.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, .Clear = {}},
2296
.EndingAccess = {.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, .Resolve = {}}};
2297
cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
2298
2299
const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height), 0.0f, 1.0f};
2300
cmdlist->RSSetViewports(1, &vp);
2301
2302
const D3D12_RECT scissor = {0, 0, static_cast<LONG>(dst_width), static_cast<LONG>(dst_height)};
2303
cmdlist->RSSetScissorRects(1, &scissor);
2304
2305
cmdlist->SetPipelineState(pipeline.Get());
2306
cmdlist->SetGraphicsRootDescriptorTable(0, srv_handle);
2307
cmdlist->SetGraphicsRootDescriptorTable(1, static_cast<D3D12Sampler*>(m_linear_sampler)->GetDescriptor());
2308
cmdlist->DrawInstanced(3, 1, 0, 0);
2309
2310
cmdlist->EndRenderPass();
2311
2312
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2313
{
2314
texture->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
2315
texture->GetResourceState());
2316
}
2317
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
2318
{
2319
texture->TransitionSubresourceToState(cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET,
2320
texture->GetResourceState());
2321
}
2322
2323
// Must destroy after current cmdlist.
2324
DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle);
2325
DeferDescriptorDestruction(m_rtv_heap_manager, &rtv_handle);
2326
2327
// Restore for next normal draw.
2328
SetViewport(GetCommandList());
2329
SetScissor(GetCommandList());
2330
m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE;
2331
}
2332
2333
void D3D12Device::SetViewport(const GSVector4i rc)
2334
{
2335
if (m_current_viewport.eq(rc))
2336
return;
2337
2338
m_current_viewport = rc;
2339
2340
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2341
return;
2342
2343
SetViewport(GetCommandList());
2344
}
2345
2346
void D3D12Device::SetScissor(const GSVector4i rc)
2347
{
2348
if (m_current_scissor.eq(rc))
2349
return;
2350
2351
m_current_scissor = rc;
2352
2353
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2354
return;
2355
2356
SetScissor(GetCommandList());
2357
}
2358
2359
void D3D12Device::PreDrawCheck()
2360
{
2361
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
2362
2363
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
2364
const u32 dirty = std::exchange(m_dirty_flags, 0);
2365
if (dirty != 0)
2366
{
2367
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
2368
{
2369
UpdateRootSignature();
2370
if (!UpdateRootParameters(dirty))
2371
{
2372
SubmitCommandListAndRestartRenderPass("out of descriptors");
2373
PreDrawCheck();
2374
return;
2375
}
2376
}
2377
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
2378
{
2379
if (!UpdateRootParameters(dirty))
2380
{
2381
SubmitCommandListAndRestartRenderPass("out of descriptors");
2382
PreDrawCheck();
2383
return;
2384
}
2385
}
2386
}
2387
2388
if (!InRenderPass())
2389
BeginRenderPass();
2390
}
2391
2392
void D3D12Device::PreDispatchCheck()
2393
{
2394
if (InRenderPass())
2395
EndRenderPass();
2396
2397
// Transition images.
2398
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2399
2400
// All textures should be in shader read only optimal already, but just in case..
2401
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
2402
for (u32 i = 0; i < num_textures; i++)
2403
{
2404
if (m_current_textures[i])
2405
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
2406
}
2407
2408
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
2409
{
2410
// Still need to clear the RTs.
2411
for (u32 i = 0; i < m_num_current_render_targets; i++)
2412
{
2413
D3D12Texture* const rt = m_current_render_targets[i];
2414
rt->SetUseFenceValue(GetCurrentFenceValue());
2415
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
2416
rt->CommitClear(cmdlist);
2417
rt->SetState(GPUTexture::State::Dirty);
2418
}
2419
}
2420
2421
// If this is a new command buffer, bind the pipeline and such.
2422
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2423
SetInitialPipelineState();
2424
2425
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
2426
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
2427
const u32 dirty = std::exchange(m_dirty_flags, 0);
2428
if (dirty != 0)
2429
{
2430
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
2431
{
2432
UpdateRootSignature();
2433
if (!UpdateRootParameters(dirty))
2434
{
2435
SubmitCommandList(false, "out of descriptors");
2436
PreDispatchCheck();
2437
return;
2438
}
2439
}
2440
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
2441
{
2442
if (!UpdateRootParameters(dirty))
2443
{
2444
SubmitCommandList(false, "out of descriptors");
2445
PreDispatchCheck();
2446
return;
2447
}
2448
}
2449
}
2450
}
2451
2452
bool D3D12Device::IsUsingROVRootSignature() const
2453
{
2454
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
2455
}
2456
2457
bool D3D12Device::IsUsingComputeRootSignature() const
2458
{
2459
return IsComputeLayout(m_current_pipeline_layout);
2460
}
2461
2462
void D3D12Device::UpdateRootSignature()
2463
{
2464
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2465
if (!IsUsingComputeRootSignature())
2466
{
2467
cmdlist->SetGraphicsRootSignature(
2468
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
2469
}
2470
else
2471
{
2472
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
2473
}
2474
}
2475
2476
template<GPUPipeline::Layout layout>
2477
bool D3D12Device::UpdateParametersForLayout(u32 dirty)
2478
{
2479
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2480
2481
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
2482
layout == GPUPipeline::Layout::MultiTextureAndUBO ||
2483
layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO)
2484
{
2485
if (dirty & DIRTY_FLAG_CONSTANT_BUFFER)
2486
{
2487
if constexpr (!IsComputeLayout(layout))
2488
cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
2489
else
2490
cmdlist->SetComputeRootConstantBufferView(3, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
2491
}
2492
}
2493
2494
constexpr u32 num_textures = GetActiveTexturesForLayout(layout);
2495
if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0)
2496
{
2497
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2498
D3D12DescriptorHandle gpu_handle;
2499
if (!allocator.Allocate(num_textures, &gpu_handle))
2500
return false;
2501
2502
if constexpr (num_textures == 1)
2503
{
2504
m_device->CopyDescriptorsSimple(
2505
1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor,
2506
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2507
}
2508
else
2509
{
2510
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS];
2511
UINT src_sizes[MAX_TEXTURE_SAMPLERS];
2512
for (u32 i = 0; i < num_textures; i++)
2513
{
2514
src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor;
2515
src_sizes[i] = 1;
2516
}
2517
m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes,
2518
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2519
}
2520
2521
if constexpr (!IsComputeLayout(layout))
2522
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
2523
else
2524
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
2525
}
2526
2527
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
2528
{
2529
auto& allocator = m_command_lists[m_current_command_list].sampler_allocator;
2530
D3D12DescriptorHandle gpu_handle;
2531
if constexpr (num_textures == 1)
2532
{
2533
if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0]))
2534
return false;
2535
}
2536
else
2537
{
2538
if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data()))
2539
return false;
2540
}
2541
2542
if constexpr (!IsComputeLayout(layout))
2543
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
2544
else
2545
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
2546
}
2547
2548
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2549
{
2550
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2551
D3D12DescriptorHandle gpu_handle;
2552
if (!allocator.Allocate(1, &gpu_handle))
2553
return false;
2554
2555
m_device->CopyDescriptorsSimple(
2556
1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor,
2557
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2558
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
2559
}
2560
2561
if (dirty & DIRTY_FLAG_RT_UAVS)
2562
{
2563
DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages);
2564
2565
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2566
D3D12DescriptorHandle gpu_handle;
2567
if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle))
2568
return false;
2569
2570
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS];
2571
UINT src_sizes[MAX_IMAGE_RENDER_TARGETS];
2572
const UINT dst_size = MAX_IMAGE_RENDER_TARGETS;
2573
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
2574
{
2575
src_handles[i] =
2576
m_current_render_targets[i] ? m_current_render_targets[i]->GetUAVDescriptor() : m_null_uav_descriptor;
2577
src_sizes[i] = 1;
2578
}
2579
m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes,
2580
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2581
2582
constexpr u32 rov_param =
2583
IsComputeLayout(layout) ?
2584
2 :
2585
((layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
2586
1 :
2587
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ?
2588
3 :
2589
2));
2590
if constexpr (!IsComputeLayout(layout))
2591
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
2592
else
2593
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
2594
}
2595
2596
return true;
2597
}
2598
2599
bool D3D12Device::UpdateRootParameters(u32 dirty)
2600
{
2601
switch (m_current_pipeline_layout)
2602
{
2603
case GPUPipeline::Layout::SingleTextureAndUBO:
2604
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty);
2605
2606
case GPUPipeline::Layout::SingleTextureAndPushConstants:
2607
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty);
2608
2609
case GPUPipeline::Layout::SingleTextureBufferAndPushConstants:
2610
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty);
2611
2612
case GPUPipeline::Layout::MultiTextureAndUBO:
2613
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty);
2614
2615
case GPUPipeline::Layout::MultiTextureAndPushConstants:
2616
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
2617
2618
case GPUPipeline::Layout::ComputeMultiTextureAndUBO:
2619
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndUBO>(dirty);
2620
2621
case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants:
2622
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndPushConstants>(dirty);
2623
2624
default:
2625
UnreachableCode();
2626
}
2627
}
2628
2629
void D3D12Device::Draw(u32 vertex_count, u32 base_vertex)
2630
{
2631
PreDrawCheck();
2632
s_stats.num_draws++;
2633
GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0);
2634
}
2635
2636
void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
2637
{
2638
PreDrawCheck();
2639
s_stats.num_draws++;
2640
GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0);
2641
}
2642
2643
void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
2644
{
2645
Panic("Barriers are not supported");
2646
}
2647
2648
void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
2649
u32 group_size_z)
2650
{
2651
PreDispatchCheck();
2652
s_stats.num_draws++;
2653
2654
const u32 groups_x = threads_x / group_size_x;
2655
const u32 groups_y = threads_y / group_size_y;
2656
const u32 groups_z = threads_z / group_size_z;
2657
GetCommandList()->Dispatch(groups_x, groups_y, groups_z);
2658
}
2659
2660