Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/d3d12_device.cpp
7347 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "d3d12_device.h"
5
#include "d3d12_builders.h"
6
#include "d3d12_pipeline.h"
7
#include "d3d12_stream_buffer.h"
8
#include "d3d12_texture.h"
9
#include "d3d_common.h"
10
11
#include "common/align.h"
12
#include "common/assert.h"
13
#include "common/bitutils.h"
14
#include "common/error.h"
15
#include "common/file_system.h"
16
#include "common/log.h"
17
#include "common/path.h"
18
#include "common/scoped_guard.h"
19
#include "common/small_string.h"
20
#include "common/string_util.h"
21
22
#include "D3D12MemAlloc.h"
23
#include "fmt/format.h"
24
25
#include <limits>
26
#include <mutex>
27
28
LOG_CHANNEL(GPUDevice);
29
30
// Tweakables
31
enum : u32
32
{
33
MAX_DRAW_CALLS_PER_FRAME = 2048,
34
MAX_DESCRIPTORS_PER_FRAME = 32768,
35
MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE,
36
MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
37
38
MAX_PERSISTENT_DESCRIPTORS = 2048,
39
MAX_PERSISTENT_RTVS = 512,
40
MAX_PERSISTENT_DSVS = 128,
41
MAX_PERSISTENT_SAMPLERS = 512,
42
43
VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
44
INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
45
VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
46
FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
47
TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
48
49
UNIFORM_PUSH_CONSTANTS_SIZE = 128,
50
51
MAX_UNIFORM_BUFFER_SIZE = 1024,
52
};
53
54
// We need to synchronize instance creation because of adapter enumeration from the UI thread.
55
static std::mutex s_instance_mutex;
56
57
static constexpr GPUTextureFormat s_swap_chain_format = GPUTextureFormat::RGBA8;
58
59
// We just need to keep this alive, never reference it.
60
static DynamicHeapArray<u8> s_pipeline_cache_data;
61
62
#ifdef ENABLE_GPU_OBJECT_NAMES
63
#include "WinPixEventRuntime/pix3.h"
64
static u32 s_debug_scope_depth = 0;
65
#endif
66
67
static constexpr const u32 s_mipmap_blit_vs[] = {
68
0x43425844, 0x1790f572, 0x2810683a, 0xdff0fe9d, 0x8f210489, 0x00000001, 0x000002e0, 0x00000005, 0x00000034,
69
0x000000a0, 0x000000d4, 0x0000012c, 0x00000244, 0x46454452, 0x00000064, 0x00000000, 0x00000000, 0x00000000,
70
0x0000003c, 0xfffe0500, 0x00008100, 0x0000003c, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028,
71
0x00000024, 0x0000000c, 0x00000000, 0x7263694d, 0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168,
72
0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
73
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978, 0x4e47534f,
74
0x00000050, 0x00000002, 0x00000008, 0x00000038, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000c03,
75
0x00000041, 0x00000000, 0x00000001, 0x00000003, 0x00000001, 0x0000000f, 0x43584554, 0x44524f4f, 0x5f565300,
76
0x69736f50, 0x6e6f6974, 0xababab00, 0x58454853, 0x00000110, 0x00010050, 0x00000044, 0x0100086a, 0x04000060,
77
0x00101012, 0x00000000, 0x00000006, 0x03000065, 0x00102032, 0x00000000, 0x04000067, 0x001020f2, 0x00000001,
78
0x00000001, 0x02000068, 0x00000001, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000001, 0x00004001,
79
0x00000001, 0x0010100a, 0x00000000, 0x00004001, 0x00000000, 0x07000001, 0x00100042, 0x00000000, 0x0010100a,
80
0x00000000, 0x00004001, 0x00000002, 0x05000056, 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x05000036,
81
0x00102032, 0x00000000, 0x00100046, 0x00000000, 0x0f000032, 0x00102032, 0x00000001, 0x00100046, 0x00000000,
82
0x00004002, 0x40000000, 0xc0000000, 0x00000000, 0x00000000, 0x00004002, 0xbf800000, 0x3f800000, 0x00000000,
83
0x00000000, 0x08000036, 0x001020c2, 0x00000001, 0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x3f800000,
84
0x0100003e, 0x54415453, 0x00000094, 0x00000007, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x00000000,
85
0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
86
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000, 0x00000000,
87
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
88
0x00000000, 0x00000000, 0x00000000, 0x00000000,
89
};
90
91
static constexpr const u32 s_mipmap_blit_ps[] = {
92
0x43425844, 0x25500f77, 0x71f24271, 0x5f83f8b8, 0x3f405943, 0x00000001, 0x0000026c, 0x00000005, 0x00000034,
93
0x000000f0, 0x00000124, 0x00000158, 0x000001d0, 0x46454452, 0x000000b4, 0x00000000, 0x00000000, 0x00000002,
94
0x0000003c, 0xffff0500, 0x00008100, 0x0000008b, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028,
95
0x00000024, 0x0000000c, 0x00000000, 0x0000007c, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
96
0x00000001, 0x00000001, 0x00000085, 0x00000002, 0x00000005, 0x00000004, 0xffffffff, 0x00000000, 0x00000001,
97
0x0000000d, 0x706d6173, 0x73735f30, 0x6d617300, 0x4d003070, 0x6f726369, 0x74666f73, 0x29522820, 0x534c4820,
98
0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x30312072, 0xab00312e, 0x4e475349, 0x0000002c, 0x00000001,
99
0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000303, 0x43584554, 0x44524f4f,
100
0xababab00, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
101
0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000070, 0x00000050, 0x0000001c,
102
0x0100086a, 0x0300005a, 0x00106000, 0x00000000, 0x04001858, 0x00107000, 0x00000000, 0x00005555, 0x03001062,
103
0x00101032, 0x00000000, 0x03000065, 0x001020f2, 0x00000000, 0x8b000045, 0x800000c2, 0x00155543, 0x001020f2,
104
0x00000000, 0x00101046, 0x00000000, 0x00107e46, 0x00000000, 0x00106000, 0x00000000, 0x0100003e, 0x54415453,
105
0x00000094, 0x00000002, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000001,
106
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000,
107
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
108
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
109
0x00000000, 0x00000000};
110
111
D3D12Device::D3D12Device()
112
{
113
m_render_api = RenderAPI::D3D12;
114
m_features.exclusive_fullscreen = true; // set so the caller can pass a mode to CreateDeviceAndSwapChain()
115
116
#ifdef ENABLE_GPU_OBJECT_NAMES
117
s_debug_scope_depth = 0;
118
#endif
119
}
120
121
D3D12Device::~D3D12Device()
122
{
123
Assert(!m_device);
124
Assert(s_pipeline_cache_data.empty());
125
}
126
127
D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
128
Error* error)
129
{
130
ComPtr<ID3DBlob> blob = D3DCommon::SerializeRootSignature(desc, error);
131
if (!blob)
132
return {};
133
134
ComPtr<ID3D12RootSignature> rs;
135
const HRESULT hr =
136
m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf()));
137
if (FAILED(hr)) [[unlikely]]
138
{
139
Error::SetHResult(error, "CreateRootSignature() failed: ", hr);
140
return {};
141
}
142
143
return rs;
144
}
145
146
bool D3D12Device::CreateDeviceAndMainSwapChain(std::string_view adapter, CreateFlags create_flags, const WindowInfo& wi,
147
GPUVSyncMode vsync_mode,
148
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
149
std::optional<bool> exclusive_fullscreen_control, Error* error)
150
{
151
std::unique_lock lock(s_instance_mutex);
152
153
m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device, error);
154
if (!m_dxgi_factory)
155
return false;
156
157
m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
158
159
HRESULT hr = S_OK;
160
161
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
162
if (m_debug_device)
163
{
164
ComPtr<ID3D12Debug> debug12;
165
if (D3DCommon::GetD3D12DebugInterface(&debug12, nullptr))
166
{
167
INFO_LOG("Enabling debug layer.");
168
debug12->EnableDebugLayer();
169
170
if (HasCreateFlag(create_flags, GPUDevice::CreateFlags::EnableGPUValidation))
171
{
172
ComPtr<ID3D12Debug1> debug12_1;
173
if (SUCCEEDED(debug12.As(&debug12_1)))
174
{
175
INFO_LOG("Enabling GPU-based validation.");
176
debug12_1->SetEnableGPUBasedValidation(true);
177
}
178
else
179
{
180
ERROR_LOG("GPU-based validation requested but not available.");
181
}
182
}
183
}
184
else
185
{
186
ERROR_LOG("Debug layer requested but not available.");
187
m_debug_device = false;
188
}
189
}
190
191
// Create the actual device.
192
D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_1_0_CORE;
193
for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_0})
194
{
195
if (D3DCommon::CreateD3D12Device(m_adapter.Get(), try_feature_level, &m_device, error))
196
{
197
feature_level = try_feature_level;
198
break;
199
}
200
}
201
if (!m_device)
202
return false;
203
204
if (!m_adapter)
205
{
206
const LUID luid(m_device->GetAdapterLuid());
207
if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf()))))
208
ERROR_LOG("Failed to get lookup adapter by device LUID");
209
}
210
211
if (m_debug_device)
212
{
213
ComPtr<ID3D12InfoQueue> info_queue;
214
if (SUCCEEDED(m_device.As(&info_queue)))
215
{
216
if (IsDebuggerPresent())
217
{
218
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
219
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
220
}
221
222
D3D12_INFO_QUEUE_FILTER filter = {};
223
std::array<D3D12_MESSAGE_ID, 6> id_list{
224
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
225
D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,
226
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET,
227
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH,
228
D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE,
229
D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND,
230
};
231
filter.DenyList.NumIDs = static_cast<UINT>(id_list.size());
232
filter.DenyList.pIDList = id_list.data();
233
info_queue->PushStorageFilter(&filter);
234
}
235
}
236
237
GPUDriverType driver_type = GPUDriverType::Unknown;
238
if (std::string adapter_name = D3DCommon::GetAdapterName(m_adapter.Get(), &driver_type); adapter_name.empty())
239
INFO_LOG("D3D Adapter: {}", adapter_name);
240
SetDriverType(driver_type);
241
242
const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
243
D3D12_COMMAND_QUEUE_FLAG_NONE, 0u};
244
hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue));
245
if (FAILED(hr))
246
{
247
Error::SetHResult(error, "Failed to create command queue: ", hr);
248
return false;
249
}
250
251
D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
252
allocatorDesc.pDevice = m_device.Get();
253
allocatorDesc.pAdapter = m_adapter.Get();
254
allocatorDesc.Flags =
255
D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED |
256
D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/;
257
258
hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf());
259
if (FAILED(hr))
260
{
261
Error::SetHResult(error, "D3D12MA::CreateAllocator() failed: ", hr);
262
return false;
263
}
264
265
hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence));
266
if (FAILED(hr))
267
{
268
Error::SetHResult(error, "Failed to create fence: ", hr);
269
return false;
270
}
271
272
m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
273
if (m_fence_event == NULL)
274
{
275
Error::SetWin32(error, "Failed to create fence event: ", GetLastError());
276
return false;
277
}
278
279
SetFeatures(feature_level, create_flags);
280
281
if (!CreateCommandLists(error) || !CreateDescriptorHeaps(error))
282
return false;
283
284
if (!wi.IsSurfaceless())
285
{
286
m_main_swap_chain = CreateSwapChain(wi, vsync_mode, exclusive_fullscreen_mode, exclusive_fullscreen_control, error);
287
if (!m_main_swap_chain)
288
return false;
289
}
290
291
if (!CreateRootSignatures(error) || !CreateBuffers(error))
292
return false;
293
294
CreateTimestampQuery();
295
return true;
296
}
297
298
void D3D12Device::DestroyDevice()
299
{
300
std::unique_lock lock(s_instance_mutex);
301
302
// Toss command list if we're recording...
303
if (InRenderPass())
304
EndRenderPass();
305
306
WaitForAllFences();
307
308
m_main_swap_chain.reset();
309
310
DestroyDeferredObjects(m_current_fence_value);
311
DestroyTimestampQuery();
312
DestroyBuffers();
313
DestroyDescriptorHeaps();
314
DestroyRootSignatures();
315
DestroyCommandLists();
316
317
m_pipeline_library.Reset();
318
s_pipeline_cache_data.deallocate();
319
m_fence.Reset();
320
if (m_fence_event != NULL)
321
{
322
CloseHandle(m_fence_event);
323
m_fence_event = NULL;
324
}
325
326
m_allocator.Reset();
327
m_command_queue.Reset();
328
m_device.Reset();
329
m_adapter.Reset();
330
m_dxgi_factory.Reset();
331
}
332
333
void D3D12Device::GetPipelineCacheHeader(PIPELINE_CACHE_HEADER* hdr)
334
{
335
const LUID adapter_luid = m_device->GetAdapterLuid();
336
std::memcpy(&hdr->adapter_luid, &adapter_luid, sizeof(hdr->adapter_luid));
337
hdr->render_api_version = m_render_api_version;
338
hdr->unused = 0;
339
}
340
341
bool D3D12Device::ReadPipelineCache(DynamicHeapArray<u8> data, Error* error)
342
{
343
PIPELINE_CACHE_HEADER expected_header;
344
GetPipelineCacheHeader(&expected_header);
345
if ((data.size() < sizeof(PIPELINE_CACHE_HEADER) ||
346
std::memcmp(data.data(), &expected_header, sizeof(PIPELINE_CACHE_HEADER)) != 0))
347
{
348
Error::SetStringView(error, "Pipeline cache header does not match current device.");
349
return false;
350
}
351
352
const HRESULT hr =
353
m_device->CreatePipelineLibrary(&data[sizeof(PIPELINE_CACHE_HEADER)], data.size() - sizeof(PIPELINE_CACHE_HEADER),
354
IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
355
if (FAILED(hr))
356
{
357
Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr);
358
return false;
359
}
360
361
// Have to keep the buffer around, DX doesn't take a copy.
362
s_pipeline_cache_data = std::move(data);
363
return true;
364
}
365
366
bool D3D12Device::CreatePipelineCache(const std::string& path, Error* error)
367
{
368
const HRESULT hr =
369
m_device->CreatePipelineLibrary(nullptr, 0, IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
370
if (FAILED(hr))
371
{
372
Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr);
373
return false;
374
}
375
376
return true;
377
}
378
379
bool D3D12Device::GetPipelineCacheData(DynamicHeapArray<u8>* data, Error* error)
380
{
381
if (!m_pipeline_library)
382
return false;
383
384
const size_t size = m_pipeline_library->GetSerializedSize();
385
if (size == 0)
386
{
387
WARNING_LOG("Empty serialized pipeline state returned.");
388
return true;
389
}
390
391
PIPELINE_CACHE_HEADER header;
392
GetPipelineCacheHeader(&header);
393
394
data->resize(sizeof(PIPELINE_CACHE_HEADER) + size);
395
std::memcpy(data->data(), &header, sizeof(PIPELINE_CACHE_HEADER));
396
397
const HRESULT hr = m_pipeline_library->Serialize(data->data() + sizeof(PIPELINE_CACHE_HEADER), size);
398
if (FAILED(hr))
399
{
400
Error::SetHResult(error, "Serialize() failed: ", hr);
401
data->deallocate();
402
return false;
403
}
404
405
return true;
406
}
407
408
bool D3D12Device::CreateCommandLists(Error* error)
409
{
410
for (u32 i = 0; i < NUM_COMMAND_LISTS; i++)
411
{
412
CommandList& res = m_command_lists[i];
413
HRESULT hr;
414
415
for (u32 j = 0; j < 2; j++)
416
{
417
hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
418
IID_PPV_ARGS(res.command_allocators[j].GetAddressOf()));
419
if (FAILED(hr))
420
{
421
Error::SetHResult(error, "CreateCommandAllocator() failed: ", hr);
422
return false;
423
}
424
425
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr,
426
IID_PPV_ARGS(res.command_lists[j].GetAddressOf()));
427
if (FAILED(hr))
428
{
429
Error::SetHResult(error, "CreateCommandList() failed: ", hr);
430
return false;
431
}
432
433
// Close the command lists, since the first thing we do is reset them.
434
hr = res.command_lists[j]->Close();
435
if (FAILED(hr))
436
{
437
Error::SetHResult(error, "Close() for new command list failed: ", hr);
438
return false;
439
}
440
}
441
442
if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
443
MAX_DESCRIPTORS_PER_FRAME, error))
444
{
445
Error::AddPrefix(error, "Failed to create per frame descriptor allocator: ");
446
return false;
447
}
448
449
if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME, error))
450
{
451
Error::AddPrefix(error, "Failed to create per frame sampler allocator: ");
452
return false;
453
}
454
}
455
456
BeginCommandList(0);
457
return true;
458
}
459
460
void D3D12Device::BeginCommandList(u32 index)
461
{
462
m_current_command_list = index;
463
m_current_fence_value++;
464
465
// We may have to wait if this command list hasn't finished on the GPU.
466
CommandList& res = m_command_lists[index];
467
WaitForFence(res.fence_counter);
468
res.fence_counter = m_current_fence_value;
469
res.init_list_used = false;
470
471
// Begin command list.
472
res.command_allocators[1]->Reset();
473
res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr);
474
res.descriptor_allocator.Reset();
475
if (res.sampler_allocator.ShouldReset())
476
res.sampler_allocator.Reset();
477
478
if (res.has_timestamp_query)
479
{
480
// readback timestamp from the last time this cmdlist was used.
481
// we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist.
482
const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
483
const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)};
484
void* map;
485
HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map);
486
if (SUCCEEDED(hr))
487
{
488
u64 timestamps[2];
489
std::memcpy(timestamps, static_cast<const u8*>(map) + offset, sizeof(timestamps));
490
m_accumulated_gpu_time +=
491
static_cast<float>(static_cast<double>(timestamps[1] - timestamps[0]) / m_timestamp_frequency);
492
493
const D3D12_RANGE write_range = {};
494
m_timestamp_query_buffer->Unmap(0, &write_range);
495
}
496
else
497
{
498
WARNING_LOG("Map() for timestamp query failed: {:08X}", static_cast<unsigned>(hr));
499
}
500
}
501
502
res.has_timestamp_query = m_gpu_timing_enabled;
503
if (m_gpu_timing_enabled)
504
{
505
res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
506
index * NUM_TIMESTAMP_QUERIES_PER_CMDLIST);
507
}
508
509
ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(),
510
res.sampler_allocator.GetDescriptorHeap()};
511
res.command_lists[1]->SetDescriptorHeaps(static_cast<UINT>(std::size(heaps)), heaps);
512
513
m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value));
514
InvalidateCachedState();
515
}
516
517
void D3D12Device::DestroyCommandLists()
518
{
519
for (CommandList& resources : m_command_lists)
520
{
521
resources.descriptor_allocator.Destroy();
522
resources.sampler_allocator.Destroy();
523
for (u32 i = 0; i < 2; i++)
524
{
525
resources.command_lists[i].Reset();
526
resources.command_allocators[i].Reset();
527
}
528
}
529
}
530
531
bool D3D12Device::CreateDescriptorHeaps(Error* error)
532
{
533
if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
534
MAX_PERSISTENT_DESCRIPTORS, false, error) ||
535
!m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false, error) ||
536
!m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false, error) ||
537
!m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, false,
538
error))
539
{
540
return false;
541
}
542
543
// Allocate null SRV descriptor for unbound textures.
544
static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {
545
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}};
546
if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor))
547
{
548
Error::SetStringView(error, "Failed to allocate null SRV descriptor");
549
return false;
550
}
551
m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle);
552
553
// Same for UAVs.
554
static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = {
555
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}};
556
if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor))
557
{
558
Error::SetStringView(error, "Failed to allocate null UAV descriptor");
559
return false;
560
}
561
m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle);
562
563
// Same for samplers.
564
GPUSampler* default_sampler = GetSampler(GPUSampler::GetNearestConfig(), error);
565
if (!default_sampler) [[unlikely]]
566
return false;
567
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
568
m_current_samplers[i] = static_cast<D3D12Sampler*>(default_sampler)->GetDescriptor();
569
return true;
570
}
571
572
void D3D12Device::DestroyDescriptorHeaps()
573
{
574
if (m_null_uav_descriptor)
575
m_descriptor_heap_manager.Free(&m_null_uav_descriptor);
576
if (m_null_srv_descriptor)
577
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
578
m_sampler_heap_manager.Destroy();
579
m_dsv_heap_manager.Destroy();
580
m_rtv_heap_manager.Destroy();
581
m_descriptor_heap_manager.Destroy();
582
}
583
584
ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList()
585
{
586
CommandList& res = m_command_lists[m_current_command_list];
587
if (!res.init_list_used)
588
{
589
HRESULT hr = res.command_allocators[0]->Reset();
590
AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed");
591
592
hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr);
593
AssertMsg(SUCCEEDED(hr), "Reset init command list failed");
594
res.init_list_used = true;
595
}
596
597
return res.command_lists[0].Get();
598
}
599
600
void D3D12Device::SubmitCommandList(bool wait_for_completion)
601
{
602
DebugAssert(!InRenderPass());
603
if (m_device_was_lost) [[unlikely]]
604
return;
605
606
CommandList& res = m_command_lists[m_current_command_list];
607
HRESULT hr;
608
609
if (res.has_timestamp_query)
610
{
611
// write the timestamp back at the end of the cmdlist
612
res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
613
(m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1);
614
res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
615
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST,
616
NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(),
617
m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
618
}
619
620
// TODO: error handling
621
if (res.init_list_used)
622
{
623
hr = res.command_lists[0]->Close();
624
if (FAILED(hr)) [[unlikely]]
625
{
626
ERROR_LOG("Closing init command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
627
m_device_was_lost = true;
628
return;
629
}
630
}
631
632
// Close and queue command list.
633
hr = res.command_lists[1]->Close();
634
if (FAILED(hr)) [[unlikely]]
635
{
636
ERROR_LOG("Closing main command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
637
m_device_was_lost = true;
638
return;
639
}
640
641
if (res.init_list_used)
642
{
643
const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()};
644
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
645
}
646
else
647
{
648
const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].Get()};
649
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
650
}
651
652
// Update fence when GPU has completed.
653
hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter);
654
if (FAILED(hr))
655
{
656
ERROR_LOG("Signal command queue fence failed with HRESULT {:08X}", static_cast<unsigned>(hr));
657
m_device_was_lost = true;
658
return;
659
}
660
661
// Wait before if the next command buffer has not already been waited for.
662
// Waiting afterwards ends up slightly faster because we can do the resets and such before blocking on the
663
// fence wait, but only if the next buffer is definitely not in use. Otherwise we'll do 2 fence waits.
664
const u32 next_command_list_index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
665
if (wait_for_completion)
666
{
667
const u64 fence_counter = res.fence_counter;
668
if (m_completed_fence_value >= m_command_lists[next_command_list_index].fence_counter)
669
{
670
BeginCommandList(next_command_list_index);
671
WaitForFence(fence_counter);
672
}
673
else
674
{
675
WaitForFence(fence_counter);
676
BeginCommandList(next_command_list_index);
677
}
678
}
679
else
680
{
681
BeginCommandList(next_command_list_index);
682
}
683
}
684
685
void D3D12Device::SubmitCommandList(bool wait_for_completion, const std::string_view reason)
686
{
687
WARNING_LOG("Executing command buffer due to '{}'", reason);
688
SubmitCommandList(wait_for_completion);
689
}
690
691
void D3D12Device::SubmitCommandListAndRestartRenderPass(const std::string_view reason)
692
{
693
if (InRenderPass())
694
EndRenderPass();
695
696
D3D12Pipeline* pl = m_current_pipeline;
697
SubmitCommandList(false, reason);
698
699
SetPipeline(pl);
700
BeginRenderPass();
701
}
702
703
void D3D12Device::WaitForFence(u64 fence)
704
{
705
if (m_device_was_lost) [[unlikely]]
706
return;
707
708
if (m_completed_fence_value >= fence)
709
return;
710
711
// Try non-blocking check.
712
m_completed_fence_value = m_fence->GetCompletedValue();
713
if (m_completed_fence_value < fence)
714
{
715
// Fall back to event.
716
HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event);
717
AssertMsg(SUCCEEDED(hr), "Set fence event on completion");
718
WaitForSingleObject(m_fence_event, INFINITE);
719
m_completed_fence_value = m_fence->GetCompletedValue();
720
}
721
722
// Release resources for as many command lists which have completed.
723
DestroyDeferredObjects(m_completed_fence_value);
724
}
725
726
void D3D12Device::WaitForAllFences()
727
{
728
u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
729
for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++)
730
{
731
WaitForFence(m_command_lists[index].fence_counter);
732
index = (index + 1) % NUM_COMMAND_LISTS;
733
}
734
}
735
736
void D3D12Device::FlushCommands()
737
{
738
if (InRenderPass())
739
EndRenderPass();
740
741
SubmitCommandList(false);
742
TrimTexturePool();
743
}
744
745
void D3D12Device::WaitForGPUIdle()
746
{
747
if (InRenderPass())
748
EndRenderPass();
749
750
SubmitCommandList(true);
751
}
752
753
bool D3D12Device::CreateTimestampQuery()
754
{
755
constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS;
756
constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT;
757
758
const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT, 0u};
759
HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf()));
760
if (FAILED(hr))
761
{
762
ERROR_LOG("CreateQueryHeap() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
763
m_features.gpu_timing = false;
764
return false;
765
}
766
767
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK,
768
D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
769
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
770
0,
771
BUFFER_SIZE,
772
1,
773
1,
774
1,
775
DXGI_FORMAT_UNKNOWN,
776
{1, 0},
777
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
778
D3D12_RESOURCE_FLAG_NONE};
779
hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
780
m_timestamp_query_allocation.GetAddressOf(),
781
IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf()));
782
if (FAILED(hr))
783
{
784
ERROR_LOG("CreateResource() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
785
m_features.gpu_timing = false;
786
return false;
787
}
788
789
u64 frequency;
790
hr = m_command_queue->GetTimestampFrequency(&frequency);
791
if (FAILED(hr))
792
{
793
ERROR_LOG("GetTimestampFrequency() failed: {:08X}", static_cast<unsigned>(hr));
794
m_features.gpu_timing = false;
795
return false;
796
}
797
798
m_timestamp_frequency = static_cast<double>(frequency) / 1000.0;
799
return true;
800
}
801
802
void D3D12Device::DestroyTimestampQuery()
803
{
804
m_timestamp_query_buffer.Reset();
805
m_timestamp_query_allocation.Reset();
806
m_timestamp_query_heap.Reset();
807
}
808
809
float D3D12Device::GetAndResetAccumulatedGPUTime()
810
{
811
const float time = m_accumulated_gpu_time;
812
m_accumulated_gpu_time = 0.0f;
813
return time;
814
}
815
816
bool D3D12Device::SetGPUTimingEnabled(bool enabled)
817
{
818
m_gpu_timing_enabled = enabled && m_features.gpu_timing;
819
return (enabled == m_gpu_timing_enabled);
820
}
821
822
void D3D12Device::DeferObjectDestruction(ComPtr<ID3D12Object> resource)
823
{
824
DebugAssert(resource);
825
m_cleanup_resources.emplace_back(GetCurrentFenceValue(),
826
std::pair<D3D12MA::Allocation*, ID3D12Object*>(nullptr, resource.Detach()));
827
}
828
829
void D3D12Device::DeferResourceDestruction(ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> resource)
830
{
831
DebugAssert(allocation && resource);
832
m_cleanup_resources.emplace_back(
833
GetCurrentFenceValue(), std::pair<D3D12MA::Allocation*, ID3D12Object*>(allocation.Detach(), resource.Detach()));
834
}
835
836
void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor)
837
{
838
DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX);
839
m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(),
840
std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>(&heap, *descriptor));
841
descriptor->Clear();
842
}
843
844
void D3D12Device::DestroyDeferredObjects(u64 fence_value)
845
{
846
while (!m_cleanup_descriptors.empty())
847
{
848
auto& it = m_cleanup_descriptors.front();
849
if (it.first > fence_value)
850
break;
851
852
it.second.first->Free(it.second.second.index);
853
m_cleanup_descriptors.pop_front();
854
}
855
856
while (!m_cleanup_resources.empty())
857
{
858
auto& it = m_cleanup_resources.front();
859
if (it.first > fence_value)
860
break;
861
862
it.second.second->Release();
863
if (it.second.first)
864
it.second.first->Release();
865
m_cleanup_resources.pop_front();
866
}
867
}
868
869
D3D12SwapChain::D3D12SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
870
const GPUDevice::ExclusiveFullscreenMode* fullscreen_mode)
871
: GPUSwapChain(wi, vsync_mode)
872
{
873
if (fullscreen_mode)
874
InitializeExclusiveFullscreenMode(fullscreen_mode);
875
}
876
877
D3D12SwapChain::~D3D12SwapChain()
878
{
879
DestroyRTVs();
880
DestroySwapChain();
881
}
882
883
bool D3D12SwapChain::InitializeExclusiveFullscreenMode(const GPUDevice::ExclusiveFullscreenMode* mode)
884
{
885
const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format);
886
887
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
888
RECT client_rc{};
889
GetClientRect(window_hwnd, &client_rc);
890
891
m_fullscreen_mode = D3DCommon::GetRequestedExclusiveFullscreenModeDesc(
892
D3D12Device::GetInstance().GetAdapter(), client_rc, mode, fm.resource_format, m_fullscreen_output.GetAddressOf());
893
return m_fullscreen_mode.has_value();
894
}
895
896
u32 D3D12SwapChain::GetNewBufferCount(GPUVSyncMode vsync_mode)
897
{
898
// With vsync off, we only need two buffers. Same for blocking vsync.
899
// With triple buffering, we need three.
900
return (vsync_mode == GPUVSyncMode::Mailbox) ? 3 : 2;
901
}
902
903
bool D3D12SwapChain::CreateSwapChain(D3D12Device& dev, Error* error)
904
{
905
const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format);
906
907
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
908
RECT client_rc{};
909
GetClientRect(window_hwnd, &client_rc);
910
911
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
912
if (IsExclusiveFullscreen() && m_vsync_mode == GPUVSyncMode::Mailbox)
913
{
914
WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
915
m_vsync_mode = GPUVSyncMode::FIFO;
916
}
917
918
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
919
swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
920
swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
921
swap_chain_desc.Format = fm.resource_format;
922
swap_chain_desc.SampleDesc.Count = 1;
923
swap_chain_desc.BufferCount = GetNewBufferCount(m_vsync_mode);
924
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
925
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
926
927
HRESULT hr = S_OK;
928
929
if (IsExclusiveFullscreen())
930
{
931
DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
932
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
933
934
fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
935
fs_sd_desc.Width = m_fullscreen_mode->Width;
936
fs_sd_desc.Height = m_fullscreen_mode->Height;
937
fs_desc.RefreshRate = m_fullscreen_mode->RefreshRate;
938
fs_desc.ScanlineOrdering = m_fullscreen_mode->ScanlineOrdering;
939
fs_desc.Scaling = m_fullscreen_mode->Scaling;
940
fs_desc.Windowed = FALSE;
941
942
VERBOSE_LOG("Creating a {}x{} exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
943
hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &fs_sd_desc, &fs_desc,
944
m_fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf());
945
if (FAILED(hr))
946
{
947
WARNING_LOG("Failed to create fullscreen swap chain, trying windowed.");
948
m_fullscreen_output.Reset();
949
m_fullscreen_mode.reset();
950
}
951
}
952
953
if (!IsExclusiveFullscreen())
954
{
955
VERBOSE_LOG("Creating a {}x{} windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height);
956
m_using_allow_tearing = D3DCommon::SupportsAllowTearing(dev.GetDXGIFactory());
957
if (m_using_allow_tearing)
958
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
959
hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &swap_chain_desc, nullptr,
960
nullptr, m_swap_chain.ReleaseAndGetAddressOf());
961
if (FAILED(hr))
962
{
963
Error::SetHResult(error, "CreateSwapChainForHwnd() failed: ", hr);
964
return false;
965
}
966
}
967
968
hr = dev.GetDXGIFactory()->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
969
if (FAILED(hr))
970
WARNING_LOG("MakeWindowAssociation() to disable ALT+ENTER failed");
971
972
return true;
973
}
974
975
bool D3D12SwapChain::CreateRTV(D3D12Device& dev, Error* error)
976
{
977
DXGI_SWAP_CHAIN_DESC swap_chain_desc;
978
HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc);
979
if (FAILED(hr))
980
{
981
Error::SetHResult(error, "GetDesc() for swap chain failed: ", hr);
982
return false;
983
}
984
985
const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D, {}};
986
987
for (u32 i = 0; i < swap_chain_desc.BufferCount; i++)
988
{
989
ComPtr<ID3D12Resource> backbuffer;
990
hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf()));
991
if (FAILED(hr))
992
{
993
Error::SetHResult(error, "GetBuffer for RTV failed: ", hr);
994
DestroyRTVs();
995
return false;
996
}
997
998
D3D12::SetObjectName(backbuffer.Get(), TinyString::from_format("Swap Chain Buffer #{}", i));
999
1000
D3D12DescriptorHandle rtv;
1001
if (!dev.GetRTVHeapManager().Allocate(&rtv))
1002
{
1003
Error::SetStringView(error, "Failed to allocate RTV handle.");
1004
DestroyRTVs();
1005
return false;
1006
}
1007
1008
dev.GetDevice()->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv);
1009
m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv);
1010
}
1011
1012
m_window_info.surface_width = static_cast<u16>(swap_chain_desc.BufferDesc.Width);
1013
m_window_info.surface_height = static_cast<u16>(swap_chain_desc.BufferDesc.Height);
1014
m_window_info.surface_format = s_swap_chain_format;
1015
VERBOSE_LOG("Swap chain buffer size: {}x{}", m_window_info.surface_width, m_window_info.surface_height);
1016
1017
BOOL fullscreen = FALSE;
1018
DXGI_SWAP_CHAIN_DESC desc;
1019
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
1020
SUCCEEDED(m_swap_chain->GetDesc(&desc)))
1021
{
1022
m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
1023
static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
1024
}
1025
1026
m_current_swap_chain_buffer = 0;
1027
return true;
1028
}
1029
1030
void D3D12SwapChain::DestroyRTVs()
1031
{
1032
if (m_swap_chain_buffers.empty())
1033
return;
1034
1035
D3D12Device& dev = D3D12Device::GetInstance();
1036
1037
// Runtime gets cranky if we don't submit the current buffer...
1038
if (dev.InRenderPass())
1039
dev.EndRenderPass();
1040
dev.SubmitCommandList(true);
1041
1042
for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it)
1043
{
1044
dev.GetRTVHeapManager().Free(it->second.index);
1045
it->first.Reset();
1046
}
1047
m_swap_chain_buffers.clear();
1048
m_current_swap_chain_buffer = 0;
1049
}
1050
1051
void D3D12SwapChain::DestroySwapChain()
1052
{
1053
if (!m_swap_chain)
1054
return;
1055
1056
// switch out of fullscreen before destroying
1057
BOOL is_fullscreen;
1058
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
1059
m_swap_chain->SetFullscreenState(FALSE, nullptr);
1060
1061
m_swap_chain.Reset();
1062
}
1063
1064
bool D3D12SwapChain::SetVSyncMode(GPUVSyncMode mode, Error* error)
1065
{
1066
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
1067
if (mode == GPUVSyncMode::Mailbox && IsExclusiveFullscreen())
1068
{
1069
WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
1070
mode = GPUVSyncMode::FIFO;
1071
}
1072
1073
if (m_vsync_mode == mode)
1074
return true;
1075
1076
const u32 old_buffer_count = GetNewBufferCount(m_vsync_mode);
1077
const u32 new_buffer_count = GetNewBufferCount(mode);
1078
m_vsync_mode = mode;
1079
if (old_buffer_count == new_buffer_count)
1080
return true;
1081
1082
// Buffer count change => needs recreation.
1083
DestroyRTVs();
1084
DestroySwapChain();
1085
1086
D3D12Device& dev = D3D12Device::GetInstance();
1087
return CreateSwapChain(dev, error) && CreateRTV(dev, error);
1088
}
1089
1090
bool D3D12SwapChain::IsExclusiveFullscreen() const
1091
{
1092
return m_fullscreen_mode.has_value();
1093
}
1094
1095
bool D3D12SwapChain::ResizeBuffers(u32 new_width, u32 new_height, Error* error)
1096
{
1097
if (m_window_info.surface_width == new_width && m_window_info.surface_height == new_height)
1098
return true;
1099
1100
DestroyRTVs();
1101
1102
HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN,
1103
m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
1104
if (FAILED(hr))
1105
ERROR_LOG("ResizeBuffers() failed: 0x{:08X}", static_cast<unsigned>(hr));
1106
1107
return CreateRTV(D3D12Device::GetInstance(), error);
1108
}
1109
1110
std::unique_ptr<GPUSwapChain> D3D12Device::CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
1111
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
1112
std::optional<bool> exclusive_fullscreen_control,
1113
Error* error)
1114
{
1115
std::unique_ptr<D3D12SwapChain> ret;
1116
if (wi.type != WindowInfoType::Win32)
1117
{
1118
Error::SetStringView(error, "Cannot create a swap chain on non-win32 window.");
1119
return ret;
1120
}
1121
1122
ret = std::make_unique<D3D12SwapChain>(wi, vsync_mode, exclusive_fullscreen_mode);
1123
if (ret->CreateSwapChain(*this, error) && ret->CreateRTV(*this, error))
1124
{
1125
// Render a frame as soon as possible to clear out whatever was previously being displayed.
1126
RenderBlankFrame(ret.get());
1127
}
1128
else
1129
{
1130
ret.reset();
1131
}
1132
1133
return ret;
1134
}
1135
1136
void D3D12Device::RenderBlankFrame(D3D12SwapChain* swap_chain)
1137
{
1138
if (InRenderPass())
1139
EndRenderPass();
1140
1141
const D3D12SwapChain::BufferPair& swap_chain_buf = swap_chain->GetCurrentBuffer();
1142
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1143
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
1144
D3D12_RESOURCE_STATE_RENDER_TARGET);
1145
cmdlist->ClearRenderTargetView(swap_chain_buf.second, GSVector4::cxpr(0.0f, 0.0f, 0.0f, 1.0f).F32, 0, nullptr);
1146
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
1147
D3D12_RESOURCE_STATE_PRESENT);
1148
SubmitCommandList(false);
1149
swap_chain->GetSwapChain()->Present(0, swap_chain->IsUsingAllowTearing() ? DXGI_PRESENT_ALLOW_TEARING : 0);
1150
swap_chain->AdvanceBuffer();
1151
}
1152
1153
bool D3D12Device::SupportsTextureFormat(GPUTextureFormat format) const
1154
{
1155
constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE;
1156
1157
const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format;
1158
if (dfmt == DXGI_FORMAT_UNKNOWN)
1159
return false;
1160
1161
D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt, {}, {}};
1162
return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) &&
1163
(support.Support1 & required) == required;
1164
}
1165
1166
std::string D3D12Device::GetDriverInfo() const
1167
{
1168
std::string ret = fmt::format("{} (Shader Model {})\n", D3DCommon::GetFeatureLevelString(m_render_api_version),
1169
D3DCommon::GetShaderModelForFeatureLevelNumber(m_render_api_version));
1170
1171
DXGI_ADAPTER_DESC desc;
1172
if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc)))
1173
{
1174
fmt::format_to(std::back_inserter(ret), "VID: 0x{:04X} PID: 0x{:04X}\n", desc.VendorId, desc.DeviceId);
1175
ret += StringUtil::WideStringToUTF8String(desc.Description);
1176
ret += "\n";
1177
1178
const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid));
1179
if (!driver_version.empty())
1180
{
1181
ret += "Driver Version: ";
1182
ret += driver_version;
1183
}
1184
}
1185
1186
return ret;
1187
}
1188
1189
GPUDevice::PresentResult D3D12Device::BeginPresent(GPUSwapChain* swap_chain, u32 clear_color)
1190
{
1191
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1192
if (InRenderPass())
1193
EndRenderPass();
1194
1195
if (m_device_was_lost) [[unlikely]]
1196
return PresentResult::DeviceLost;
1197
1198
// TODO: Check if the device was lost.
1199
1200
// Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
1201
// This might get called repeatedly if it takes a while to switch back, that's the host's problem.
1202
BOOL is_fullscreen;
1203
if (SC->IsExclusiveFullscreen() &&
1204
(FAILED(SC->GetSwapChain()->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
1205
{
1206
FlushCommands();
1207
TrimTexturePool();
1208
return PresentResult::ExclusiveFullscreenLost;
1209
}
1210
1211
m_current_swap_chain = SC;
1212
1213
const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer();
1214
ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
1215
1216
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
1217
D3D12_RESOURCE_STATE_RENDER_TARGET);
1218
1219
// All textures should be in shader read only optimal already, but just in case..
1220
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
1221
for (u32 i = 0; i < num_textures; i++)
1222
{
1223
if (m_current_textures[i])
1224
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
1225
}
1226
1227
D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {swap_chain_buf.second,
1228
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {}},
1229
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
1230
GSVector4::store<false>(rt_desc.BeginningAccess.Clear.ClearValue.Color, GSVector4::unorm8(clear_color));
1231
cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
1232
1233
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
1234
m_num_current_render_targets = 0;
1235
m_dirty_flags =
1236
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0);
1237
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
1238
m_current_depth_target = nullptr;
1239
m_in_render_pass = true;
1240
s_stats.num_render_passes++;
1241
1242
// Clear pipeline, it's likely incompatible.
1243
m_current_pipeline = nullptr;
1244
1245
return PresentResult::OK;
1246
}
1247
1248
void D3D12Device::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time)
1249
{
1250
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1251
DebugAssert(present_time == 0);
1252
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
1253
EndRenderPass();
1254
1255
DebugAssert(SC == m_current_swap_chain);
1256
m_current_swap_chain = nullptr;
1257
1258
const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer();
1259
SC->AdvanceBuffer();
1260
1261
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
1262
D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
1263
D3D12_RESOURCE_STATE_PRESENT);
1264
1265
SubmitCommandList(false);
1266
TrimTexturePool();
1267
1268
if (!explicit_present)
1269
SubmitPresent(swap_chain);
1270
}
1271
1272
void D3D12Device::SubmitPresent(GPUSwapChain* swap_chain)
1273
{
1274
D3D12SwapChain* const SC = static_cast<D3D12SwapChain*>(swap_chain);
1275
if (m_device_was_lost) [[unlikely]]
1276
return;
1277
1278
const UINT sync_interval = static_cast<UINT>(SC->GetVSyncMode() == GPUVSyncMode::FIFO);
1279
const UINT flags =
1280
(SC->GetVSyncMode() == GPUVSyncMode::Disabled && SC->IsUsingAllowTearing()) ? DXGI_PRESENT_ALLOW_TEARING : 0;
1281
SC->GetSwapChain()->Present(sync_interval, flags);
1282
}
1283
1284
#ifdef ENABLE_GPU_OBJECT_NAMES
1285
1286
static UINT64 Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
1287
const std::array<float, 3>& c, const std::array<float, 3>& d)
1288
{
1289
std::array<float, 3> result;
1290
result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0]));
1291
result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1]));
1292
result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2]));
1293
1294
return PIX_COLOR(static_cast<BYTE>(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)),
1295
static_cast<BYTE>(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)),
1296
static_cast<BYTE>(std::clamp(result[2] * 255.0f, 0.0f, 255.0f)));
1297
}
1298
1299
void D3D12Device::PushDebugGroup(const char* name)
1300
{
1301
if (!m_debug_device)
1302
return;
1303
1304
const UINT64 color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f},
1305
{1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
1306
PIXBeginEvent(GetCommandList(), color, "%s", name);
1307
}
1308
1309
void D3D12Device::PopDebugGroup()
1310
{
1311
if (!m_debug_device)
1312
return;
1313
1314
s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
1315
PIXEndEvent(GetCommandList());
1316
}
1317
1318
void D3D12Device::InsertDebugMessage(const char* msg)
1319
{
1320
if (!m_debug_device)
1321
return;
1322
1323
PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", msg);
1324
}
1325
1326
#endif
1327
1328
void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, CreateFlags create_flags)
1329
{
1330
m_render_api_version = D3DCommon::GetRenderAPIVersionForFeatureLevel(feature_level);
1331
m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
1332
m_max_multisamples = 1;
1333
for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++)
1334
{
1335
D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast<UINT>(multisamples),
1336
D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0u};
1337
1338
if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) &&
1339
fd.NumQualityLevels > 0)
1340
{
1341
m_max_multisamples = static_cast<u16>(multisamples);
1342
}
1343
}
1344
1345
m_features.dual_source_blend = !HasCreateFlag(create_flags, CreateFlags::DisableDualSourceBlend);
1346
m_features.framebuffer_fetch = false;
1347
m_features.per_sample_shading = true;
1348
m_features.noperspective_interpolation = true;
1349
m_features.texture_copy_to_self =
1350
/*!HasCreateFlag(create_flags, CreateFlag::DisableTextureCopyToSelf)*/ false; // TODO: Support with Enhanced
1351
// Barriers
1352
m_features.texture_buffers = !HasCreateFlag(create_flags, CreateFlags::DisableTextureBuffers);
1353
m_features.texture_buffers_emulated_with_ssbo = false;
1354
m_features.feedback_loops = false;
1355
m_features.geometry_shaders = !HasCreateFlag(create_flags, CreateFlags::DisableGeometryShaders);
1356
m_features.compute_shaders = !HasCreateFlag(create_flags, CreateFlags::DisableComputeShaders);
1357
m_features.partial_msaa_resolve = true;
1358
m_features.memory_import = false;
1359
m_features.exclusive_fullscreen = true;
1360
m_features.explicit_present = true;
1361
m_features.timed_present = false;
1362
m_features.gpu_timing = true;
1363
m_features.shader_cache = true;
1364
m_features.pipeline_cache = true;
1365
m_features.prefer_unused_textures = true;
1366
1367
m_features.raster_order_views = false;
1368
if (!HasCreateFlag(create_flags, CreateFlags::DisableRasterOrderViews))
1369
{
1370
D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
1371
m_features.raster_order_views =
1372
SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
1373
options.ROVsSupported;
1374
}
1375
1376
m_features.dxt_textures =
1377
(!HasCreateFlag(create_flags, CreateFlags::DisableCompressedTextures) &&
1378
(SupportsTextureFormat(GPUTextureFormat::BC1) && SupportsTextureFormat(GPUTextureFormat::BC2) &&
1379
SupportsTextureFormat(GPUTextureFormat::BC3)));
1380
m_features.bptc_textures = (!HasCreateFlag(create_flags, CreateFlags::DisableCompressedTextures) &&
1381
SupportsTextureFormat(GPUTextureFormat::BC7));
1382
}
1383
1384
void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
1385
GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
1386
u32 height)
1387
{
1388
D3D12Texture* const S = static_cast<D3D12Texture*>(src);
1389
D3D12Texture* const D = static_cast<D3D12Texture*>(dst);
1390
1391
if (S->GetState() == GPUTexture::State::Cleared)
1392
{
1393
// source is cleared. if destination is a render target, we can carry the clear forward
1394
if (D->IsRenderTargetOrDepthStencil())
1395
{
1396
if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight())
1397
{
1398
// pass it forward if we're clearing the whole thing
1399
if (S->IsDepthStencil())
1400
D->SetClearDepth(S->GetClearDepth());
1401
else
1402
D->SetClearColor(S->GetClearColor());
1403
1404
return;
1405
}
1406
1407
if (D->GetState() == GPUTexture::State::Cleared)
1408
{
1409
// destination is cleared, if it's the same colour and rect, we can just avoid this entirely
1410
if (D->IsDepthStencil())
1411
{
1412
if (D->GetClearDepth() == S->GetClearDepth())
1413
return;
1414
}
1415
else
1416
{
1417
if (D->GetClearColor() == S->GetClearColor())
1418
return;
1419
}
1420
}
1421
}
1422
1423
// commit the clear to the source first, then do normal copy
1424
S->CommitClear();
1425
}
1426
1427
// if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first
1428
// (the area outside of where we're copying to)
1429
if (D->GetState() == GPUTexture::State::Cleared &&
1430
(dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight()))
1431
{
1432
D->CommitClear();
1433
}
1434
1435
s_stats.num_copies++;
1436
1437
// *now* we can do a normal image copy.
1438
if (InRenderPass())
1439
EndRenderPass();
1440
1441
S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE);
1442
S->SetUseFenceValue(GetCurrentFenceValue());
1443
1444
D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST);
1445
D->SetUseFenceValue(GetCurrentFenceValue());
1446
1447
D3D12_TEXTURE_COPY_LOCATION srcloc;
1448
srcloc.pResource = S->GetResource();
1449
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
1450
srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level);
1451
1452
D3D12_TEXTURE_COPY_LOCATION dstloc;
1453
dstloc.pResource = D->GetResource();
1454
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
1455
dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level);
1456
1457
const D3D12_BOX srcbox{static_cast<UINT>(src_x), static_cast<UINT>(src_y), 0u,
1458
static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u};
1459
GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox);
1460
1461
D->SetState(GPUTexture::State::Dirty);
1462
}
1463
1464
void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
1465
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
1466
{
1467
DebugAssert((src_x + width) <= src->GetWidth());
1468
DebugAssert((src_y + height) <= src->GetHeight());
1469
DebugAssert(src->IsMultisampled());
1470
DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
1471
DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
1472
DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
1473
DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
1474
1475
if (InRenderPass())
1476
EndRenderPass();
1477
1478
s_stats.num_copies++;
1479
1480
D3D12Texture* D = static_cast<D3D12Texture*>(dst);
1481
D3D12Texture* S = static_cast<D3D12Texture*>(src);
1482
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1483
const u32 DSR = D->CalculateSubresource(dst_layer, dst_level);
1484
1485
S->CommitClear(cmdlist);
1486
D->CommitClear(cmdlist);
1487
1488
S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
1489
D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST);
1490
1491
if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 &&
1492
width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level))
1493
{
1494
cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat());
1495
}
1496
else
1497
{
1498
D3D12_RECT src_rc{static_cast<LONG>(src_x), static_cast<LONG>(src_y), static_cast<LONG>(src_x + width),
1499
static_cast<LONG>(src_y + height)};
1500
cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y,
1501
S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE);
1502
}
1503
1504
S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState());
1505
D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState());
1506
}
1507
1508
void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c)
1509
{
1510
GPUDevice::ClearRenderTarget(t, c);
1511
if (InRenderPass() && IsRenderTargetBound(t))
1512
EndRenderPass();
1513
}
1514
1515
void D3D12Device::ClearDepth(GPUTexture* t, float d)
1516
{
1517
GPUDevice::ClearDepth(t, d);
1518
if (InRenderPass() && m_current_depth_target == t)
1519
EndRenderPass();
1520
}
1521
1522
void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
1523
{
1524
GPUDevice::InvalidateRenderTarget(t);
1525
if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : IsRenderTargetBound(t)))
1526
EndRenderPass();
1527
}
1528
1529
bool D3D12Device::CreateBuffers(Error* error)
1530
{
1531
if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE, error))
1532
{
1533
ERROR_LOG("Failed to allocate vertex buffer");
1534
return false;
1535
}
1536
1537
if (!m_index_buffer.Create(INDEX_BUFFER_SIZE, error))
1538
{
1539
ERROR_LOG("Failed to allocate index buffer");
1540
return false;
1541
}
1542
1543
if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE, error))
1544
{
1545
ERROR_LOG("Failed to allocate uniform buffer");
1546
return false;
1547
}
1548
1549
if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE, error))
1550
{
1551
ERROR_LOG("Failed to allocate texture upload buffer");
1552
return false;
1553
}
1554
1555
return true;
1556
}
1557
1558
void D3D12Device::DestroyBuffers()
1559
{
1560
m_texture_upload_buffer.Destroy(false);
1561
m_uniform_buffer.Destroy(false);
1562
m_index_buffer.Destroy(false);
1563
m_vertex_buffer.Destroy(false);
1564
}
1565
1566
void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
1567
u32* map_base_vertex)
1568
{
1569
const u32 req_size = vertex_size * vertex_count;
1570
if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
1571
{
1572
SubmitCommandListAndRestartRenderPass("out of vertex space");
1573
if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
1574
Panic("Failed to allocate vertex space");
1575
}
1576
1577
*map_ptr = m_vertex_buffer.GetCurrentHostPointer();
1578
*map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size;
1579
*map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size;
1580
}
1581
1582
void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
1583
{
1584
const u32 upload_size = vertex_size * vertex_count;
1585
s_stats.buffer_streamed += upload_size;
1586
m_vertex_buffer.CommitMemory(upload_size);
1587
}
1588
1589
void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
1590
{
1591
const u32 req_size = sizeof(DrawIndex) * index_count;
1592
if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
1593
{
1594
SubmitCommandListAndRestartRenderPass("out of index space");
1595
if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
1596
Panic("Failed to allocate index space");
1597
}
1598
1599
*map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer());
1600
*map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex);
1601
*map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex);
1602
}
1603
1604
void D3D12Device::UnmapIndexBuffer(u32 used_index_count)
1605
{
1606
const u32 upload_size = sizeof(DrawIndex) * used_index_count;
1607
s_stats.buffer_streamed += upload_size;
1608
m_index_buffer.CommitMemory(upload_size);
1609
}
1610
1611
void D3D12Device::PushUniformBuffer(ID3D12GraphicsCommandList4* const cmdlist, bool compute, const void* data,
1612
u32 data_size)
1613
{
1614
static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameters = {
1615
0, // SingleTextureAndUBO
1616
2, // SingleTextureAndPushConstants
1617
1, // SingleTextureBufferAndPushConstants
1618
0, // MultiTextureAndUBO
1619
2, // MultiTextureAndPushConstants
1620
3, // MultiTextureAndUBOAndPushConstants
1621
0, // ComputeMultiTextureAndUBO
1622
2, // ComputeSingleTextureAndPushConstants
1623
};
1624
1625
s_stats.buffer_streamed += data_size;
1626
1627
const u32 push_param =
1628
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
1629
if (!compute)
1630
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
1631
else
1632
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
1633
}
1634
1635
void* D3D12Device::MapUniformBuffer(u32 size)
1636
{
1637
const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
1638
if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
1639
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
1640
{
1641
SubmitCommandListAndRestartRenderPass("out of uniform space");
1642
if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
1643
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
1644
Panic("Failed to allocate uniform space.");
1645
}
1646
1647
return m_uniform_buffer.GetCurrentHostPointer();
1648
}
1649
1650
void D3D12Device::UnmapUniformBuffer(u32 size)
1651
{
1652
s_stats.buffer_streamed += size;
1653
m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset();
1654
m_uniform_buffer.CommitMemory(size);
1655
m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER;
1656
}
1657
1658
bool D3D12Device::CreateRootSignatures(Error* error)
1659
{
1660
D3D12::RootSignatureBuilder rsb;
1661
1662
for (u32 rov = 0; rov < 2; rov++)
1663
{
1664
if (rov && !m_features.raster_order_views)
1665
break;
1666
1667
{
1668
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
1669
1670
rsb.SetInputAssemblerFlag();
1671
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1672
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1673
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1674
if (rov)
1675
{
1676
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1677
D3D12_SHADER_VISIBILITY_PIXEL);
1678
}
1679
if (!(rs = rsb.Create(error, true)))
1680
return false;
1681
D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout");
1682
}
1683
1684
{
1685
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
1686
1687
rsb.SetInputAssemblerFlag();
1688
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1689
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1690
if (rov)
1691
{
1692
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1693
D3D12_SHADER_VISIBILITY_PIXEL);
1694
}
1695
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1696
if (!(rs = rsb.Create(error, true)))
1697
return false;
1698
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
1699
}
1700
1701
{
1702
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
1703
1704
rsb.SetInputAssemblerFlag();
1705
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1706
if (rov)
1707
{
1708
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1709
D3D12_SHADER_VISIBILITY_PIXEL);
1710
}
1711
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1712
if (!(rs = rsb.Create(error, true)))
1713
return false;
1714
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
1715
}
1716
1717
{
1718
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
1719
1720
rsb.SetInputAssemblerFlag();
1721
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
1722
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
1723
D3D12_SHADER_VISIBILITY_PIXEL);
1724
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1725
if (rov)
1726
{
1727
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1728
D3D12_SHADER_VISIBILITY_PIXEL);
1729
}
1730
if (!(rs = rsb.Create(error, true)))
1731
return false;
1732
D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout");
1733
}
1734
1735
{
1736
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
1737
1738
rsb.SetInputAssemblerFlag();
1739
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
1740
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
1741
D3D12_SHADER_VISIBILITY_PIXEL);
1742
if (rov)
1743
{
1744
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1745
D3D12_SHADER_VISIBILITY_PIXEL);
1746
}
1747
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1748
if (!(rs = rsb.Create(error, true)))
1749
return false;
1750
D3D12::SetObjectName(rs.Get(), "Multi Texture + Push Constant Pipeline Layout");
1751
}
1752
1753
{
1754
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBOAndPushConstants)];
1755
1756
rsb.SetInputAssemblerFlag();
1757
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
1758
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
1759
D3D12_SHADER_VISIBILITY_PIXEL);
1760
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1761
if (rov)
1762
{
1763
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
1764
D3D12_SHADER_VISIBILITY_PIXEL);
1765
}
1766
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1767
if (!(rs = rsb.Create(error, true)))
1768
return false;
1769
D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO + Push Constant Pipeline Layout");
1770
}
1771
}
1772
1773
{
1774
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndUBO)];
1775
1776
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1777
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1778
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
1779
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
1780
if (!(rs = rsb.Create(error, true)))
1781
return false;
1782
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture + UBO Pipeline Layout");
1783
}
1784
1785
{
1786
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)];
1787
1788
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1789
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
1790
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
1791
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
1792
if (!(rs = rsb.Create(error, true)))
1793
return false;
1794
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture Pipeline Layout");
1795
}
1796
1797
{
1798
auto& rs = m_mipmap_render_root_signature;
1799
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
1800
rsb.AddStaticSampler(0, D3D12Sampler::GetD3DSamplerDesc(GPUSampler::GetLinearConfig()),
1801
D3D12_SHADER_VISIBILITY_PIXEL);
1802
if (!(rs = rsb.Create(error, true)))
1803
return false;
1804
D3D12::SetObjectName(rs.Get(), "Render Mipmap Pipeline Layout");
1805
}
1806
1807
return true;
1808
}
1809
1810
void D3D12Device::DestroyRootSignatures()
1811
{
1812
for (ComPtr<ID3D12PipelineState>& it : m_mipmap_render_pipelines)
1813
it.Reset();
1814
m_mipmap_render_root_signature.Reset();
1815
m_root_signatures.enumerate([](auto& it) { it.Reset(); });
1816
}
1817
1818
void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
1819
GPUPipeline::RenderPassFlag flags)
1820
{
1821
DebugAssert(
1822
!(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer)));
1823
1824
const bool image_bind_changed = ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages);
1825
bool changed =
1826
(m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags);
1827
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
1828
bool needs_rt_clear = false;
1829
1830
m_current_depth_target = static_cast<D3D12Texture*>(ds);
1831
for (u32 i = 0; i < num_rts; i++)
1832
{
1833
D3D12Texture* const RT = static_cast<D3D12Texture*>(rts[i]);
1834
changed |= m_current_render_targets[i] != RT;
1835
m_current_render_targets[i] = RT;
1836
needs_rt_clear |= RT->IsClearedOrInvalidated();
1837
}
1838
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
1839
m_current_render_targets[i] = nullptr;
1840
m_num_current_render_targets = Truncate8(num_rts);
1841
m_current_render_pass_flags = flags;
1842
1843
// Don't end render pass unless it's necessary.
1844
if (changed)
1845
{
1846
if (InRenderPass())
1847
EndRenderPass();
1848
1849
// Need a root signature change if switching to UAVs.
1850
m_dirty_flags |= image_bind_changed ? LAYOUT_DEPENDENT_DIRTY_STATE : 0;
1851
m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) :
1852
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS);
1853
}
1854
else if (needs_rt_clear || needs_ds_clear)
1855
{
1856
if (InRenderPass())
1857
EndRenderPass();
1858
}
1859
}
1860
1861
void D3D12Device::BeginRenderPass()
1862
{
1863
DebugAssert(!InRenderPass());
1864
1865
std::array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, MAX_RENDER_TARGETS> rt_desc;
1866
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc;
1867
1868
D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr;
1869
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr;
1870
u32 num_rt_descs = 0;
1871
1872
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
1873
1874
if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]]
1875
{
1876
if (!IsUsingROVRootSignature()) [[likely]]
1877
{
1878
for (u32 i = 0; i < m_num_current_render_targets; i++)
1879
{
1880
D3D12Texture* const rt = m_current_render_targets[i];
1881
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
1882
rt->SetUseFenceValue(GetCurrentFenceValue());
1883
1884
D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
1885
desc.cpuDescriptor = rt->GetWriteDescriptor();
1886
desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
1887
1888
switch (rt->GetState())
1889
{
1890
case GPUTexture::State::Cleared:
1891
{
1892
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
1893
std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
1894
sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
1895
rt->SetState(GPUTexture::State::Dirty);
1896
}
1897
break;
1898
1899
case GPUTexture::State::Invalidated:
1900
{
1901
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
1902
rt->SetState(GPUTexture::State::Dirty);
1903
}
1904
break;
1905
1906
case GPUTexture::State::Dirty:
1907
{
1908
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
1909
}
1910
break;
1911
1912
default:
1913
UnreachableCode();
1914
break;
1915
}
1916
}
1917
1918
rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
1919
num_rt_descs = m_num_current_render_targets;
1920
}
1921
else
1922
{
1923
// Still need to clear the RTs.
1924
for (u32 i = 0; i < m_num_current_render_targets; i++)
1925
{
1926
D3D12Texture* const rt = m_current_render_targets[i];
1927
rt->SetUseFenceValue(GetCurrentFenceValue());
1928
rt->CommitClear(cmdlist);
1929
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1930
rt->SetState(GPUTexture::State::Dirty);
1931
}
1932
}
1933
if (m_current_depth_target)
1934
{
1935
D3D12Texture* const ds = m_current_depth_target;
1936
ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE);
1937
ds->SetUseFenceValue(GetCurrentFenceValue());
1938
ds_desc.cpuDescriptor = ds->GetWriteDescriptor();
1939
ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
1940
ds_desc.StencilBeginningAccess = {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, {}};
1941
ds_desc.StencilEndingAccess = {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, {}};
1942
1943
switch (ds->GetState())
1944
{
1945
case GPUTexture::State::Cleared:
1946
{
1947
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
1948
ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth();
1949
ds->SetState(GPUTexture::State::Dirty);
1950
}
1951
break;
1952
1953
case GPUTexture::State::Invalidated:
1954
{
1955
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
1956
ds->SetState(GPUTexture::State::Dirty);
1957
}
1958
break;
1959
1960
case GPUTexture::State::Dirty:
1961
{
1962
ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
1963
}
1964
break;
1965
1966
default:
1967
UnreachableCode();
1968
break;
1969
}
1970
1971
ds_desc_p = &ds_desc;
1972
}
1973
}
1974
else
1975
{
1976
// Re-rendering to swap chain.
1977
const auto& swap_chain_buf = m_current_swap_chain->GetCurrentBuffer();
1978
rt_desc[0] = {swap_chain_buf.second,
1979
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}},
1980
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
1981
rt_desc_p = &rt_desc[0];
1982
num_rt_descs = 1;
1983
}
1984
1985
// All textures should be in shader read only optimal already, but just in case..
1986
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
1987
for (u32 i = 0; i < num_textures; i++)
1988
{
1989
if (m_current_textures[i])
1990
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
1991
}
1992
1993
DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature());
1994
cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
1995
1996
// TODO: Stats
1997
m_in_render_pass = true;
1998
s_stats.num_render_passes++;
1999
2000
// If this is a new command buffer, bind the pipeline and such.
2001
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2002
SetInitialPipelineState();
2003
}
2004
2005
bool D3D12Device::InRenderPass()
2006
{
2007
return m_in_render_pass;
2008
}
2009
2010
void D3D12Device::EndRenderPass()
2011
{
2012
DebugAssert(m_in_render_pass);
2013
2014
// TODO: stats
2015
m_in_render_pass = false;
2016
2017
GetCommandList()->EndRenderPass();
2018
}
2019
2020
void D3D12Device::SetPipeline(GPUPipeline* pipeline)
2021
{
2022
// First draw? Bind everything.
2023
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2024
{
2025
m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
2026
if (!m_current_pipeline)
2027
return;
2028
2029
SetInitialPipelineState();
2030
return;
2031
}
2032
else if (m_current_pipeline == pipeline)
2033
{
2034
return;
2035
}
2036
2037
m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
2038
2039
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2040
cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
2041
2042
if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology)
2043
{
2044
m_current_topology = topology;
2045
cmdlist->IASetPrimitiveTopology(topology);
2046
}
2047
2048
if (u32 vertex_stride = m_current_pipeline->GetVertexStride();
2049
vertex_stride > 0 && m_current_vertex_stride != vertex_stride)
2050
{
2051
m_current_vertex_stride = vertex_stride;
2052
SetVertexBuffer(cmdlist);
2053
}
2054
2055
// TODO: we don't need to change the blend constant if blending isn't on.
2056
if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants)
2057
{
2058
m_current_blend_constant = blend_constants;
2059
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
2060
}
2061
2062
if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout)
2063
{
2064
m_current_pipeline_layout = layout;
2065
m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS);
2066
}
2067
}
2068
2069
void D3D12Device::UnbindPipeline(D3D12Pipeline* pl)
2070
{
2071
if (m_current_pipeline != pl)
2072
return;
2073
2074
m_current_pipeline = nullptr;
2075
}
2076
2077
bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
2078
{
2079
for (u32 i = 0; i < m_num_current_render_targets; i++)
2080
{
2081
if (m_current_render_targets[i] == tex)
2082
return true;
2083
}
2084
2085
return false;
2086
}
2087
2088
void D3D12Device::InvalidateCachedState()
2089
{
2090
DebugAssert(!m_in_render_pass);
2091
m_dirty_flags = ALL_DIRTY_STATE &
2092
((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS);
2093
}
2094
2095
void D3D12Device::SetInitialPipelineState()
2096
{
2097
DebugAssert(m_current_pipeline);
2098
m_dirty_flags &= ~DIRTY_FLAG_INITIAL;
2099
2100
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2101
2102
m_current_vertex_stride = m_current_pipeline->GetVertexStride();
2103
SetVertexBuffer(cmdlist);
2104
const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(),
2105
DXGI_FORMAT_R16_UINT};
2106
cmdlist->IASetIndexBuffer(&ib_view);
2107
2108
cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
2109
m_current_pipeline_layout = m_current_pipeline->GetLayout();
2110
2111
m_current_topology = m_current_pipeline->GetTopology();
2112
cmdlist->IASetPrimitiveTopology(m_current_topology);
2113
2114
m_current_blend_constant = m_current_pipeline->GetBlendConstants();
2115
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
2116
2117
SetViewport(cmdlist);
2118
SetScissor(cmdlist);
2119
}
2120
2121
void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist)
2122
{
2123
const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(),
2124
m_current_vertex_stride};
2125
cmdlist->IASetVertexBuffers(0, 1, &vb_view);
2126
}
2127
2128
void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist)
2129
{
2130
const D3D12_VIEWPORT vp = {static_cast<float>(m_current_viewport.left),
2131
static_cast<float>(m_current_viewport.top),
2132
static_cast<float>(m_current_viewport.width()),
2133
static_cast<float>(m_current_viewport.height()),
2134
0.0f,
2135
1.0f};
2136
cmdlist->RSSetViewports(1, &vp);
2137
}
2138
2139
void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist)
2140
{
2141
static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT));
2142
cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor));
2143
}
2144
2145
void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
2146
{
2147
D3D12Texture* T = static_cast<D3D12Texture*>(texture);
2148
if (m_current_textures[slot] != T)
2149
{
2150
m_current_textures[slot] = T;
2151
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2152
2153
if (T)
2154
{
2155
T->CommitClear();
2156
T->SetUseFenceValue(GetCurrentFenceValue());
2157
if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2158
{
2159
if (InRenderPass())
2160
EndRenderPass();
2161
T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
2162
}
2163
}
2164
}
2165
2166
const D3D12DescriptorHandle& handle =
2167
static_cast<D3D12Sampler*>(sampler ? sampler : m_nearest_sampler)->GetDescriptor();
2168
if (m_current_samplers[slot] != handle)
2169
{
2170
m_current_samplers[slot] = handle;
2171
m_dirty_flags |= DIRTY_FLAG_SAMPLERS;
2172
}
2173
}
2174
2175
void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
2176
{
2177
DebugAssert(slot == 0);
2178
if (m_current_texture_buffer == buffer)
2179
return;
2180
2181
m_current_texture_buffer = static_cast<D3D12TextureBuffer*>(buffer);
2182
if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2183
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2184
}
2185
2186
void D3D12Device::UnbindTexture(D3D12Texture* tex)
2187
{
2188
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
2189
{
2190
if (m_current_textures[i] == tex)
2191
{
2192
m_current_textures[i] = nullptr;
2193
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2194
}
2195
}
2196
2197
if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage))
2198
{
2199
for (u32 i = 0; i < m_num_current_render_targets; i++)
2200
{
2201
if (m_current_render_targets[i] == tex)
2202
{
2203
if (InRenderPass())
2204
EndRenderPass();
2205
m_current_render_targets[i] = nullptr;
2206
}
2207
}
2208
}
2209
else if (tex->IsDepthStencil())
2210
{
2211
if (m_current_depth_target == tex)
2212
{
2213
if (InRenderPass())
2214
EndRenderPass();
2215
m_current_depth_target = nullptr;
2216
}
2217
}
2218
}
2219
2220
void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf)
2221
{
2222
if (m_current_texture_buffer != buf)
2223
return;
2224
2225
m_current_texture_buffer = nullptr;
2226
2227
if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2228
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
2229
}
2230
2231
void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height,
2232
u32 src_level, u32 src_width, u32 src_height)
2233
{
2234
ComPtr<ID3D12PipelineState>& pipeline = m_mipmap_render_pipelines[static_cast<size_t>(texture->GetFormat())];
2235
if (!pipeline)
2236
{
2237
D3D12::GraphicsPipelineBuilder gpb;
2238
gpb.SetRootSignature(m_mipmap_render_root_signature.Get());
2239
gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
2240
gpb.SetRenderTarget(0, texture->GetDXGIFormat());
2241
gpb.SetVertexShader(s_mipmap_blit_vs, sizeof(s_mipmap_blit_vs));
2242
gpb.SetPixelShader(s_mipmap_blit_ps, sizeof(s_mipmap_blit_ps));
2243
gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false);
2244
gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS);
2245
gpb.SetBlendState(0, false, D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_BLEND_ZERO,
2246
D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL);
2247
2248
const std::wstring name = StringUtil::UTF8StringToWideString(
2249
TinyString::from_format("MipmapRender-{}", GPUTexture::GetFormatName(texture->GetFormat())));
2250
Error error;
2251
if (m_pipeline_library)
2252
{
2253
HRESULT hr =
2254
m_pipeline_library->LoadGraphicsPipeline(name.c_str(), gpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
2255
if (FAILED(hr))
2256
{
2257
// E_INVALIDARG = not found.
2258
if (hr != E_INVALIDARG)
2259
ERROR_LOG("LoadGraphicsPipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
2260
2261
// Need to create it normally.
2262
pipeline = gpb.Create(m_device.Get(), &error, false);
2263
2264
// Store if it wasn't an OOM or something else.
2265
if (pipeline && hr == E_INVALIDARG)
2266
{
2267
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
2268
if (FAILED(hr))
2269
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
2270
}
2271
}
2272
}
2273
else
2274
{
2275
pipeline = gpb.Create(m_device.Get(), &error, false);
2276
}
2277
if (!pipeline)
2278
{
2279
ERROR_LOG("Failed to compile mipmap render pipeline for {}: {}", GPUTexture::GetFormatName(texture->GetFormat()),
2280
error.GetDescription());
2281
return;
2282
}
2283
}
2284
2285
if (InRenderPass())
2286
EndRenderPass();
2287
2288
// we need a temporary SRV and RTV for each mip level
2289
// Safe to use the init buffer after exec, because everything will be done with the texture.
2290
D3D12DescriptorHandle rtv_handle;
2291
while (!GetRTVHeapManager().Allocate(&rtv_handle))
2292
{
2293
SubmitCommandList(false, "Allocate RTV for RenderTextureMipmap()");
2294
if (m_device_was_lost)
2295
return;
2296
}
2297
2298
D3D12DescriptorHandle srv_handle;
2299
while (!m_command_lists[m_current_command_list].descriptor_allocator.Allocate(1, &srv_handle))
2300
{
2301
SubmitCommandList(false, "Allocate SRV/sampler for RenderTextureMipmap()");
2302
if (m_device_was_lost)
2303
return;
2304
}
2305
2306
// Setup views. This will be a partial view for the SRV.
2307
const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {.Format = texture->GetDXGIFormat(),
2308
.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D,
2309
.Texture2D = {.MipSlice = dst_level, .PlaneSlice = 0}};
2310
m_device->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle);
2311
2312
const D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
2313
.Format = texture->GetDXGIFormat(),
2314
.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D,
2315
.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
2316
.Texture2D = {.MostDetailedMip = src_level, .MipLevels = 1, .PlaneSlice = 0, .ResourceMinLODClamp = 0.0f}};
2317
m_device->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle);
2318
2319
// *now* we don't have to worry about running out of anything.
2320
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2321
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2322
{
2323
texture->TransitionSubresourceToState(cmdlist, src_level, texture->GetResourceState(),
2324
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
2325
}
2326
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
2327
{
2328
texture->TransitionSubresourceToState(cmdlist, dst_level, texture->GetResourceState(),
2329
D3D12_RESOURCE_STATE_RENDER_TARGET);
2330
}
2331
2332
const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {
2333
.cpuDescriptor = rtv_handle,
2334
.BeginningAccess = {.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, .Clear = {}},
2335
.EndingAccess = {.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, .Resolve = {}}};
2336
cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
2337
2338
const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height), 0.0f, 1.0f};
2339
cmdlist->RSSetViewports(1, &vp);
2340
2341
const D3D12_RECT scissor = {0, 0, static_cast<LONG>(dst_width), static_cast<LONG>(dst_height)};
2342
cmdlist->RSSetScissorRects(1, &scissor);
2343
2344
cmdlist->SetPipelineState(pipeline.Get());
2345
cmdlist->SetGraphicsRootSignature(m_mipmap_render_root_signature.Get());
2346
cmdlist->SetGraphicsRootDescriptorTable(0, srv_handle);
2347
cmdlist->DrawInstanced(3, 1, 0, 0);
2348
2349
cmdlist->EndRenderPass();
2350
2351
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
2352
{
2353
texture->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
2354
texture->GetResourceState());
2355
}
2356
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
2357
{
2358
texture->TransitionSubresourceToState(cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET,
2359
texture->GetResourceState());
2360
}
2361
2362
// Must destroy after current cmdlist.
2363
DeferDescriptorDestruction(m_rtv_heap_manager, &rtv_handle);
2364
2365
// Restore for next normal draw.
2366
SetViewport(GetCommandList());
2367
SetScissor(GetCommandList());
2368
m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE;
2369
}
2370
2371
void D3D12Device::SetViewport(const GSVector4i rc)
2372
{
2373
if (m_current_viewport.eq(rc))
2374
return;
2375
2376
m_current_viewport = rc;
2377
2378
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2379
return;
2380
2381
SetViewport(GetCommandList());
2382
}
2383
2384
void D3D12Device::SetScissor(const GSVector4i rc)
2385
{
2386
if (m_current_scissor.eq(rc))
2387
return;
2388
2389
m_current_scissor = rc;
2390
2391
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2392
return;
2393
2394
SetScissor(GetCommandList());
2395
}
2396
2397
void D3D12Device::PreDrawCheck()
2398
{
2399
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
2400
2401
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
2402
const u32 dirty = std::exchange(m_dirty_flags, 0);
2403
if (dirty != 0)
2404
{
2405
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
2406
{
2407
UpdateRootSignature();
2408
if (!UpdateRootParameters(dirty))
2409
{
2410
SubmitCommandListAndRestartRenderPass("out of descriptors");
2411
PreDrawCheck();
2412
return;
2413
}
2414
}
2415
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
2416
{
2417
if (!UpdateRootParameters(dirty))
2418
{
2419
SubmitCommandListAndRestartRenderPass("out of descriptors");
2420
PreDrawCheck();
2421
return;
2422
}
2423
}
2424
}
2425
2426
if (!InRenderPass())
2427
BeginRenderPass();
2428
}
2429
2430
void D3D12Device::PreDispatchCheck()
2431
{
2432
if (InRenderPass())
2433
EndRenderPass();
2434
2435
// Transition images.
2436
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2437
2438
// All textures should be in shader read only optimal already, but just in case..
2439
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
2440
for (u32 i = 0; i < num_textures; i++)
2441
{
2442
if (m_current_textures[i])
2443
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
2444
}
2445
2446
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
2447
{
2448
// Still need to clear the RTs.
2449
for (u32 i = 0; i < m_num_current_render_targets; i++)
2450
{
2451
D3D12Texture* const rt = m_current_render_targets[i];
2452
rt->SetUseFenceValue(GetCurrentFenceValue());
2453
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
2454
rt->CommitClear(cmdlist);
2455
rt->SetState(GPUTexture::State::Dirty);
2456
}
2457
}
2458
2459
// If this is a new command buffer, bind the pipeline and such.
2460
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
2461
SetInitialPipelineState();
2462
2463
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
2464
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
2465
const u32 dirty = std::exchange(m_dirty_flags, 0);
2466
if (dirty != 0)
2467
{
2468
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
2469
{
2470
UpdateRootSignature();
2471
if (!UpdateRootParameters(dirty))
2472
{
2473
SubmitCommandList(false, "out of descriptors");
2474
PreDispatchCheck();
2475
return;
2476
}
2477
}
2478
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
2479
{
2480
if (!UpdateRootParameters(dirty))
2481
{
2482
SubmitCommandList(false, "out of descriptors");
2483
PreDispatchCheck();
2484
return;
2485
}
2486
}
2487
}
2488
}
2489
2490
bool D3D12Device::IsUsingROVRootSignature() const
2491
{
2492
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
2493
}
2494
2495
bool D3D12Device::IsUsingComputeRootSignature() const
2496
{
2497
return IsComputeLayout(m_current_pipeline_layout);
2498
}
2499
2500
void D3D12Device::UpdateRootSignature()
2501
{
2502
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2503
if (!IsUsingComputeRootSignature())
2504
{
2505
cmdlist->SetGraphicsRootSignature(
2506
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
2507
}
2508
else
2509
{
2510
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
2511
}
2512
}
2513
2514
template<GPUPipeline::Layout layout>
2515
bool D3D12Device::UpdateParametersForLayout(u32 dirty)
2516
{
2517
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
2518
2519
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
2520
layout == GPUPipeline::Layout::MultiTextureAndUBO ||
2521
layout == GPUPipeline::Layout::MultiTextureAndUBOAndPushConstants ||
2522
layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO)
2523
{
2524
if (dirty & DIRTY_FLAG_CONSTANT_BUFFER)
2525
{
2526
if constexpr (!IsComputeLayout(layout))
2527
cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
2528
else
2529
cmdlist->SetComputeRootConstantBufferView(3, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
2530
}
2531
}
2532
2533
constexpr u32 num_textures = GetActiveTexturesForLayout(layout);
2534
if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0)
2535
{
2536
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2537
D3D12DescriptorHandle gpu_handle;
2538
if (!allocator.Allocate(num_textures, &gpu_handle))
2539
return false;
2540
2541
if constexpr (num_textures == 1)
2542
{
2543
m_device->CopyDescriptorsSimple(
2544
1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor,
2545
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2546
}
2547
else
2548
{
2549
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS];
2550
UINT src_sizes[MAX_TEXTURE_SAMPLERS];
2551
for (u32 i = 0; i < num_textures; i++)
2552
{
2553
src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor;
2554
src_sizes[i] = 1;
2555
}
2556
m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes,
2557
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2558
}
2559
2560
if constexpr (!IsComputeLayout(layout))
2561
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
2562
else
2563
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
2564
}
2565
2566
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
2567
{
2568
auto& allocator = m_command_lists[m_current_command_list].sampler_allocator;
2569
D3D12DescriptorHandle gpu_handle;
2570
if constexpr (num_textures == 1)
2571
{
2572
if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0]))
2573
return false;
2574
}
2575
else
2576
{
2577
if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data()))
2578
return false;
2579
}
2580
2581
if constexpr (!IsComputeLayout(layout))
2582
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
2583
else
2584
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
2585
}
2586
2587
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
2588
{
2589
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2590
D3D12DescriptorHandle gpu_handle;
2591
if (!allocator.Allocate(1, &gpu_handle))
2592
return false;
2593
2594
m_device->CopyDescriptorsSimple(
2595
1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor,
2596
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2597
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
2598
}
2599
2600
if (dirty & DIRTY_FLAG_RT_UAVS)
2601
{
2602
DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages);
2603
2604
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
2605
D3D12DescriptorHandle gpu_handle;
2606
if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle))
2607
return false;
2608
2609
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS];
2610
UINT src_sizes[MAX_IMAGE_RENDER_TARGETS];
2611
const UINT dst_size = MAX_IMAGE_RENDER_TARGETS;
2612
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
2613
{
2614
src_handles[i] =
2615
m_current_render_targets[i] ? m_current_render_targets[i]->GetUAVDescriptor() : m_null_uav_descriptor;
2616
src_sizes[i] = 1;
2617
}
2618
m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes,
2619
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
2620
2621
constexpr u32 rov_param =
2622
IsComputeLayout(layout) ?
2623
2 :
2624
((layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
2625
1 :
2626
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO ||
2627
layout == GPUPipeline::Layout::MultiTextureAndUBOAndPushConstants) ?
2628
3 :
2629
2));
2630
if constexpr (!IsComputeLayout(layout))
2631
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
2632
else
2633
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
2634
}
2635
2636
return true;
2637
}
2638
2639
bool D3D12Device::UpdateRootParameters(u32 dirty)
2640
{
2641
switch (m_current_pipeline_layout)
2642
{
2643
case GPUPipeline::Layout::SingleTextureAndUBO:
2644
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty);
2645
2646
case GPUPipeline::Layout::SingleTextureAndPushConstants:
2647
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty);
2648
2649
case GPUPipeline::Layout::SingleTextureBufferAndPushConstants:
2650
return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty);
2651
2652
case GPUPipeline::Layout::MultiTextureAndUBO:
2653
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty);
2654
2655
case GPUPipeline::Layout::MultiTextureAndPushConstants:
2656
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
2657
2658
case GPUPipeline::Layout::MultiTextureAndUBOAndPushConstants:
2659
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndUBOAndPushConstants>(dirty);
2660
2661
case GPUPipeline::Layout::ComputeMultiTextureAndUBO:
2662
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndUBO>(dirty);
2663
2664
case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants:
2665
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndPushConstants>(dirty);
2666
2667
default:
2668
UnreachableCode();
2669
}
2670
}
2671
2672
void D3D12Device::Draw(u32 vertex_count, u32 base_vertex)
2673
{
2674
PreDrawCheck();
2675
s_stats.num_draws++;
2676
GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0);
2677
}
2678
2679
void D3D12Device::DrawWithPushConstants(u32 vertex_count, u32 base_vertex, const void* push_constants,
2680
u32 push_constants_size)
2681
{
2682
PreDrawCheck();
2683
s_stats.num_draws++;
2684
2685
ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
2686
PushUniformBuffer(cmdlist, false, push_constants, push_constants_size);
2687
cmdlist->DrawInstanced(vertex_count, 1, base_vertex, 0);
2688
}
2689
2690
void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
2691
{
2692
PreDrawCheck();
2693
s_stats.num_draws++;
2694
GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0);
2695
}
2696
2697
void D3D12Device::DrawIndexedWithPushConstants(u32 index_count, u32 base_index, u32 base_vertex,
2698
const void* push_constants, u32 push_constants_size)
2699
{
2700
PreDrawCheck();
2701
s_stats.num_draws++;
2702
2703
ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
2704
PushUniformBuffer(cmdlist, false, push_constants, push_constants_size);
2705
cmdlist->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0);
2706
}
2707
2708
void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
2709
u32 group_size_z)
2710
{
2711
PreDispatchCheck();
2712
s_stats.num_draws++;
2713
2714
const u32 groups_x = threads_x / group_size_x;
2715
const u32 groups_y = threads_y / group_size_y;
2716
const u32 groups_z = threads_z / group_size_z;
2717
GetCommandList()->Dispatch(groups_x, groups_y, groups_z);
2718
}
2719
2720
void D3D12Device::DispatchWithPushConstants(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x,
2721
u32 group_size_y, u32 group_size_z, const void* push_constants,
2722
u32 push_constants_size)
2723
{
2724
PreDispatchCheck();
2725
s_stats.num_draws++;
2726
2727
ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
2728
PushUniformBuffer(cmdlist, true, push_constants, push_constants_size);
2729
2730
const u32 groups_x = threads_x / group_size_x;
2731
const u32 groups_y = threads_y / group_size_y;
2732
const u32 groups_z = threads_z / group_size_z;
2733
cmdlist->Dispatch(groups_x, groups_y, groups_z);
2734
}
2735
2736