Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/d3d12_device.h
7448 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#pragma once
5
6
#include "d3d12_descriptor_heap_manager.h"
7
#include "d3d12_stream_buffer.h"
8
#include "gpu_device.h"
9
#include "gpu_texture.h"
10
11
#include "common/dimensional_array.h"
12
#include "common/windows_headers.h"
13
14
#include <array>
15
#include <atomic>
16
#include <condition_variable>
17
#include <d3d12.h>
18
#include <deque>
19
#include <dxgi1_5.h>
20
#include <functional>
21
#include <memory>
22
#include <mutex>
23
#include <string>
24
#include <thread>
25
#include <unordered_map>
26
#include <vector>
27
#include <wrl/client.h>
28
29
class D3D12Framebuffer;
30
class D3D12Pipeline;
31
class D3D12SwapChain;
32
class D3D12Texture;
33
class D3D12TextureBuffer;
34
class D3D12DownloadTexture;
35
36
namespace D3D12MA {
37
class Allocator;
38
}
39
40
class D3D12SwapChain;
41
42
class D3D12Device final : public GPUDevice
43
{
44
public:
45
friend D3D12Texture;
46
friend D3D12DownloadTexture;
47
48
template<typename T>
49
using ComPtr = Microsoft::WRL::ComPtr<T>;
50
51
enum : u32
52
{
53
NUM_COMMAND_LISTS = 3,
54
55
/// Start/End timestamp queries.
56
NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2,
57
};
58
59
public:
60
D3D12Device();
61
~D3D12Device() override;
62
63
std::string GetDriverInfo() const override;
64
65
void FlushCommands() override;
66
void WaitForGPUIdle() override;
67
68
std::unique_ptr<GPUSwapChain> CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
69
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
70
std::optional<bool> exclusive_fullscreen_control,
71
Error* error) override;
72
std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
73
GPUTexture::Type type, GPUTextureFormat format, GPUTexture::Flags flags,
74
const void* data = nullptr, u32 data_stride = 0,
75
Error* error = nullptr) override;
76
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) override;
77
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements,
78
Error* error = nullptr) override;
79
80
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTextureFormat format,
81
Error* error = nullptr) override;
82
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTextureFormat format,
83
void* memory, size_t memory_size, u32 memory_stride,
84
Error* error = nullptr) override;
85
86
bool SupportsTextureFormat(GPUTextureFormat format) const override;
87
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
88
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
89
void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
90
u32 src_x, u32 src_y, u32 width, u32 height) override;
91
void ClearRenderTarget(GPUTexture* t, u32 c) override;
92
void ClearDepth(GPUTexture* t, float d) override;
93
void InvalidateRenderTarget(GPUTexture* t) override;
94
95
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
96
Error* error) override;
97
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language,
98
std::string_view source, const char* entry_point,
99
DynamicHeapArray<u8>* out_binary, Error* error) override;
100
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
101
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
102
103
#ifdef ENABLE_GPU_OBJECT_NAMES
104
void PushDebugGroup(const char* name) override;
105
void PopDebugGroup() override;
106
void InsertDebugMessage(const char* msg) override;
107
#endif
108
109
void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
110
u32* map_base_vertex) override;
111
void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override;
112
void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override;
113
void UnmapIndexBuffer(u32 used_index_count) override;
114
void* MapUniformBuffer(u32 size) override;
115
void UnmapUniformBuffer(u32 size) override;
116
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
117
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override;
118
void SetPipeline(GPUPipeline* pipeline) override;
119
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
120
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
121
void SetViewport(const GSVector4i rc) override;
122
void SetScissor(const GSVector4i rc) override;
123
void Draw(u32 vertex_count, u32 base_vertex) override;
124
void DrawWithPushConstants(u32 vertex_count, u32 base_vertex, const void* push_constants,
125
u32 push_constants_size) override;
126
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
127
void DrawIndexedWithPushConstants(u32 index_count, u32 base_index, u32 base_vertex, const void* push_constants,
128
u32 push_constants_size) override;
129
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
130
u32 group_size_z) override;
131
void DispatchWithPushConstants(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
132
u32 group_size_z, const void* push_constants, u32 push_constants_size) override;
133
134
bool SetGPUTimingEnabled(bool enabled) override;
135
float GetAndResetAccumulatedGPUTime() override;
136
137
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
138
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;
139
void SubmitPresent(GPUSwapChain* swap_chain) override;
140
141
// Global state accessors
142
ALWAYS_INLINE static D3D12Device& GetInstance() { return *static_cast<D3D12Device*>(g_gpu_device.get()); }
143
ALWAYS_INLINE IDXGIAdapter1* GetAdapter() const { return m_adapter.Get(); }
144
ALWAYS_INLINE ID3D12Device1* GetDevice() const { return m_device.Get(); }
145
ALWAYS_INLINE ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); }
146
ALWAYS_INLINE IDXGIFactory5* GetDXGIFactory() { return m_dxgi_factory.Get(); }
147
ALWAYS_INLINE D3D12MA::Allocator* GetAllocator() const { return m_allocator.Get(); }
148
149
void WaitForAllFences();
150
151
// Descriptor manager access.
152
D3D12DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; }
153
D3D12DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; }
154
D3D12DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; }
155
D3D12DescriptorHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; }
156
const D3D12DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; }
157
158
// These command buffers are allocated per-frame. They are valid until the command buffer
159
// is submitted, after that you should call these functions again.
160
ALWAYS_INLINE ID3D12GraphicsCommandList4* GetCommandList() const
161
{
162
return m_command_lists[m_current_command_list].command_lists[1].Get();
163
}
164
ALWAYS_INLINE D3D12StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; }
165
ID3D12GraphicsCommandList4* GetInitCommandList();
166
167
// Root signature access.
168
ComPtr<ID3D12RootSignature> CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, Error* error);
169
170
/// Fence value for current command list.
171
u64 GetCurrentFenceValue() const { return m_current_fence_value; }
172
173
/// Last "completed" fence.
174
u64 GetCompletedFenceValue() const { return m_completed_fence_value; }
175
176
// Schedule a d3d12 resource for destruction later on. This will occur when the command buffer
177
// is next re-used, and the GPU has finished working with the specified resource.
178
void DeferObjectDestruction(ComPtr<ID3D12Object> resource);
179
void DeferResourceDestruction(ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> resource);
180
void DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor);
181
182
// Wait for a fence to be completed.
183
// Also invokes callbacks for completion.
184
void WaitForFence(u64 fence_counter);
185
186
// Ends a render pass if we're currently in one.
187
// When Bind() is next called, the pass will be restarted.
188
void BeginRenderPass();
189
void EndRenderPass();
190
bool InRenderPass();
191
192
/// Ends any render pass, executes the command buffer, and invalidates cached state.
193
void SubmitCommandList(bool wait_for_completion);
194
void SubmitCommandList(bool wait_for_completion, const std::string_view reason);
195
void SubmitCommandListAndRestartRenderPass(const std::string_view reason);
196
197
void UnbindPipeline(D3D12Pipeline* pl);
198
void UnbindTexture(D3D12Texture* tex);
199
void UnbindTextureBuffer(D3D12TextureBuffer* buf);
200
201
void RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height, u32 src_level,
202
u32 src_width, u32 src_height);
203
204
protected:
205
bool CreateDeviceAndMainSwapChain(std::string_view adapter, CreateFlags create_flags, const WindowInfo& wi,
206
GPUVSyncMode vsync_mode, const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
207
std::optional<bool> exclusive_fullscreen_control, Error* error) override;
208
void DestroyDevice() override;
209
210
bool ReadPipelineCache(DynamicHeapArray<u8> data, Error* error) override;
211
bool CreatePipelineCache(const std::string& path, Error* error) override;
212
bool GetPipelineCacheData(DynamicHeapArray<u8>* data, Error* error) override;
213
214
private:
215
enum DIRTY_FLAG : u32
216
{
217
DIRTY_FLAG_INITIAL = (1 << 0),
218
DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1),
219
DIRTY_FLAG_CONSTANT_BUFFER = (1 << 2),
220
DIRTY_FLAG_TEXTURES = (1 << 3),
221
DIRTY_FLAG_SAMPLERS = (1 << 3),
222
DIRTY_FLAG_RT_UAVS = (1 << 4),
223
224
LAYOUT_DEPENDENT_DIRTY_STATE = DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES |
225
DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS,
226
ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | LAYOUT_DEPENDENT_DIRTY_STATE,
227
};
228
229
struct CommandList
230
{
231
// [0] - Init (upload) command buffer, [1] - draw command buffer
232
std::array<ComPtr<ID3D12CommandAllocator>, 2> command_allocators;
233
std::array<ComPtr<ID3D12GraphicsCommandList4>, 2> command_lists;
234
D3D12DescriptorAllocator descriptor_allocator;
235
D3D12GroupedSamplerAllocator<MAX_TEXTURE_SAMPLERS> sampler_allocator;
236
u64 fence_counter = 0;
237
bool init_list_used = false;
238
bool needs_fence_wait = false;
239
bool has_timestamp_query = false;
240
};
241
242
struct PIPELINE_CACHE_HEADER
243
{
244
u64 adapter_luid;
245
u32 render_api_version;
246
u32 unused;
247
};
248
static_assert(sizeof(PIPELINE_CACHE_HEADER) == 16);
249
250
void GetPipelineCacheHeader(PIPELINE_CACHE_HEADER* hdr);
251
void SetFeatures(D3D_FEATURE_LEVEL feature_level, CreateFlags create_flags);
252
253
bool CreateCommandLists(Error* error);
254
void DestroyCommandLists();
255
bool CreateRootSignatures(Error* error);
256
void DestroyRootSignatures();
257
bool CreateBuffers(Error* error);
258
void DestroyBuffers();
259
bool CreateDescriptorHeaps(Error* error);
260
void DestroyDescriptorHeaps();
261
bool CreateTimestampQuery();
262
void DestroyTimestampQuery();
263
void DestroyDeferredObjects(u64 fence_value);
264
265
void RenderBlankFrame(D3D12SwapChain* swap_chain);
266
void BeginCommandList(u32 index);
267
268
bool CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format,
269
D3D12DescriptorHandle* dh, Error* error);
270
bool CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh,
271
Error* error);
272
bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh,
273
Error* error);
274
bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh,
275
Error* error);
276
277
bool IsRenderTargetBound(const GPUTexture* tex) const;
278
279
/// Set dirty flags on everything to force re-bind at next draw time.
280
void InvalidateCachedState();
281
void SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist);
282
void SetViewport(ID3D12GraphicsCommandList4* cmdlist);
283
void SetScissor(ID3D12GraphicsCommandList4* cmdlist);
284
285
/// Applies any changed state.
286
void SetInitialPipelineState();
287
void PreDrawCheck();
288
void PreDispatchCheck();
289
void PushUniformBuffer(ID3D12GraphicsCommandList4* const cmdlist, bool compute, const void* data, u32 data_size);
290
291
bool IsUsingROVRootSignature() const;
292
bool IsUsingComputeRootSignature() const;
293
void UpdateRootSignature();
294
template<GPUPipeline::Layout layout>
295
bool UpdateParametersForLayout(u32 dirty);
296
bool UpdateRootParameters(u32 dirty);
297
298
ComPtr<IDXGIAdapter1> m_adapter;
299
ComPtr<ID3D12Device1> m_device;
300
ComPtr<ID3D12CommandQueue> m_command_queue;
301
ComPtr<D3D12MA::Allocator> m_allocator;
302
303
ComPtr<ID3D12Fence> m_fence;
304
HANDLE m_fence_event = {};
305
u64 m_current_fence_value = 0;
306
u64 m_completed_fence_value = 0;
307
308
std::array<CommandList, NUM_COMMAND_LISTS> m_command_lists;
309
u32 m_current_command_list = NUM_COMMAND_LISTS - 1;
310
bool m_device_was_lost = false;
311
312
ComPtr<IDXGIFactory5> m_dxgi_factory;
313
314
D3D12DescriptorHeapManager m_descriptor_heap_manager;
315
D3D12DescriptorHeapManager m_rtv_heap_manager;
316
D3D12DescriptorHeapManager m_dsv_heap_manager;
317
D3D12DescriptorHeapManager m_sampler_heap_manager;
318
D3D12DescriptorHandle m_null_srv_descriptor;
319
D3D12DescriptorHandle m_null_uav_descriptor;
320
321
ComPtr<ID3D12QueryHeap> m_timestamp_query_heap;
322
ComPtr<ID3D12Resource> m_timestamp_query_buffer;
323
ComPtr<D3D12MA::Allocation> m_timestamp_query_allocation;
324
double m_timestamp_frequency = 0.0;
325
float m_accumulated_gpu_time = 0.0f;
326
327
std::deque<std::pair<u64, std::pair<D3D12MA::Allocation*, ID3D12Object*>>> m_cleanup_resources;
328
std::deque<std::pair<u64, std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>>> m_cleanup_descriptors;
329
330
DimensionalArray<ComPtr<ID3D12RootSignature>, static_cast<u8>(GPUPipeline::Layout::MaxCount), 2> m_root_signatures =
331
{};
332
333
D3D12StreamBuffer m_vertex_buffer;
334
D3D12StreamBuffer m_index_buffer;
335
D3D12StreamBuffer m_uniform_buffer;
336
D3D12StreamBuffer m_texture_upload_buffer;
337
338
u32 m_uniform_buffer_position = 0;
339
bool m_in_render_pass = false;
340
341
ComPtr<ID3D12PipelineLibrary> m_pipeline_library;
342
343
// Which bindings/state has to be updated before the next draw.
344
u32 m_dirty_flags = ALL_DIRTY_STATE;
345
346
D3D12Pipeline* m_current_pipeline = nullptr;
347
D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
348
u8 m_num_current_render_targets = 0;
349
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
350
std::array<D3D12Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
351
D3D12Texture* m_current_depth_target = nullptr;
352
u32 m_current_vertex_stride = 0;
353
u32 m_current_blend_constant = 0;
354
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
355
356
std::array<D3D12Texture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
357
std::array<D3D12DescriptorHandle, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
358
D3D12TextureBuffer* m_current_texture_buffer = nullptr;
359
GSVector4i m_current_viewport = GSVector4i::cxpr(0, 0, 1, 1);
360
GSVector4i m_current_scissor = {};
361
362
D3D12SwapChain* m_current_swap_chain = nullptr;
363
364
ComPtr<ID3D12RootSignature> m_mipmap_render_root_signature;
365
std::array<ComPtr<ID3D12PipelineState>, static_cast<size_t>(GPUTextureFormat::MaxCount)> m_mipmap_render_pipelines =
366
{};
367
};
368
369
class D3D12SwapChain : public GPUSwapChain
370
{
371
public:
372
template<typename T>
373
using ComPtr = Microsoft::WRL::ComPtr<T>;
374
375
friend D3D12Device;
376
377
using BufferPair = std::pair<ComPtr<ID3D12Resource>, D3D12DescriptorHandle>;
378
379
D3D12SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
380
const GPUDevice::ExclusiveFullscreenMode* fullscreen_mode);
381
~D3D12SwapChain() override;
382
383
ALWAYS_INLINE IDXGISwapChain1* GetSwapChain() const { return m_swap_chain.Get(); }
384
ALWAYS_INLINE const BufferPair& GetCurrentBuffer() const { return m_swap_chain_buffers[m_current_swap_chain_buffer]; }
385
ALWAYS_INLINE bool IsUsingAllowTearing() const { return m_using_allow_tearing; }
386
387
void AdvanceBuffer()
388
{
389
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
390
}
391
392
bool ResizeBuffers(u32 new_width, u32 new_height, Error* error) override;
393
bool SetVSyncMode(GPUVSyncMode mode, Error* error) override;
394
395
bool IsExclusiveFullscreen() const override;
396
397
private:
398
static u32 GetNewBufferCount(GPUVSyncMode vsync_mode);
399
400
bool InitializeExclusiveFullscreenMode(const GPUDevice::ExclusiveFullscreenMode* mode);
401
402
bool CreateSwapChain(D3D12Device& dev, Error* error);
403
bool CreateRTV(D3D12Device& dev, Error* error);
404
405
void DestroySwapChain();
406
void DestroyRTVs();
407
408
ComPtr<IDXGISwapChain1> m_swap_chain;
409
std::vector<BufferPair> m_swap_chain_buffers;
410
u32 m_current_swap_chain_buffer = 0;
411
bool m_using_allow_tearing = false;
412
413
ComPtr<IDXGIOutput> m_fullscreen_output;
414
std::optional<DXGI_MODE_DESC> m_fullscreen_mode;
415
};
416
417