Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/d3d12_stream_buffer.cpp
4212 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "d3d12_stream_buffer.h"
5
#include "d3d12_device.h"
6
7
#include "common/align.h"
8
#include "common/assert.h"
9
#include "common/error.h"
10
#include "common/log.h"
11
12
#include "D3D12MemAlloc.h"
13
14
#include <algorithm>
15
16
LOG_CHANNEL(GPUDevice);
17
18
D3D12StreamBuffer::D3D12StreamBuffer() = default;
19
20
D3D12StreamBuffer::~D3D12StreamBuffer()
21
{
22
Destroy();
23
}
24
25
bool D3D12StreamBuffer::Create(u32 size, Error* error)
26
{
27
const D3D12_RESOURCE_DESC resource_desc = {
28
D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
29
D3D12_RESOURCE_FLAG_NONE};
30
31
D3D12MA::ALLOCATION_DESC allocationDesc = {};
32
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
33
allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
34
35
Microsoft::WRL::ComPtr<ID3D12Resource> buffer;
36
Microsoft::WRL::ComPtr<D3D12MA::Allocation> allocation;
37
HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
38
&allocationDesc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.ReleaseAndGetAddressOf(),
39
IID_PPV_ARGS(buffer.GetAddressOf()));
40
if (FAILED(hr)) [[unlikely]]
41
{
42
Error::SetHResult(error, "CreateResource() for stream buffer failed: ", hr);
43
return false;
44
}
45
46
static const D3D12_RANGE read_range = {};
47
u8* host_pointer;
48
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
49
if (FAILED(hr)) [[unlikely]]
50
{
51
Error::SetHResult(error, "Map() for stream buffer failed: ", hr);
52
return false;
53
}
54
55
Destroy(true);
56
57
m_buffer = std::move(buffer);
58
m_allocation = std::move(allocation);
59
m_host_pointer = host_pointer;
60
m_size = size;
61
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
62
return true;
63
}
64
65
bool D3D12StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
66
{
67
const u32 required_bytes = num_bytes + alignment;
68
69
// Check for sane allocations
70
if (num_bytes > m_size) [[unlikely]]
71
{
72
ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", static_cast<u32>(num_bytes),
73
static_cast<u32>(m_size));
74
Panic("Stream buffer overflow");
75
}
76
77
// Is the GPU behind or up to date with our current offset?
78
UpdateCurrentFencePosition();
79
if (m_current_offset >= m_current_gpu_position)
80
{
81
const u32 aligned_required_bytes = (m_current_offset > 0) ? required_bytes : num_bytes;
82
const u32 remaining_bytes = m_size - m_current_offset;
83
if (aligned_required_bytes <= remaining_bytes)
84
{
85
// Place at the current position, after the GPU position.
86
m_current_offset = Common::AlignUp(m_current_offset, alignment);
87
m_current_space = m_size - m_current_offset;
88
return true;
89
}
90
91
// Check for space at the start of the buffer
92
// We use < here because we don't want to have the case of m_current_offset ==
93
// m_current_gpu_position. That would mean the code above would assume the
94
// GPU has caught up to us, which it hasn't.
95
if (required_bytes < m_current_gpu_position)
96
{
97
// Reset offset to zero, since we're allocating behind the gpu now
98
m_current_offset = 0;
99
m_current_space = m_current_gpu_position;
100
return true;
101
}
102
}
103
104
// Is the GPU ahead of our current offset?
105
if (m_current_offset < m_current_gpu_position)
106
{
107
// We have from m_current_offset..m_current_gpu_position space to use.
108
const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
109
if (required_bytes < remaining_bytes)
110
{
111
// Place at the current position, since this is still behind the GPU.
112
m_current_offset = Common::AlignUp(m_current_offset, alignment);
113
m_current_space = m_current_gpu_position - m_current_offset;
114
return true;
115
}
116
}
117
118
// Can we find a fence to wait on that will give us enough memory?
119
if (WaitForClearSpace(required_bytes))
120
{
121
const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
122
m_current_offset += align_diff;
123
m_current_space -= align_diff;
124
return true;
125
}
126
127
// We tried everything we could, and still couldn't get anything. This means that too much space
128
// in the buffer is being used by the command buffer currently being recorded. Therefore, the
129
// only option is to execute it, and wait until it's done.
130
return false;
131
}
132
133
void D3D12StreamBuffer::CommitMemory(u32 final_num_bytes)
134
{
135
DebugAssert((m_current_offset + final_num_bytes) <= m_size);
136
DebugAssert(final_num_bytes <= m_current_space);
137
m_current_offset += final_num_bytes;
138
m_current_space -= final_num_bytes;
139
}
140
141
void D3D12StreamBuffer::Destroy(bool defer)
142
{
143
if (m_host_pointer)
144
{
145
const D3D12_RANGE written_range = {0, m_size};
146
m_buffer->Unmap(0, &written_range);
147
m_host_pointer = nullptr;
148
}
149
150
if (m_buffer && defer)
151
D3D12Device::GetInstance().DeferResourceDestruction(std::move(m_allocation), std::move(m_buffer));
152
m_buffer.Reset();
153
m_allocation.Reset();
154
155
m_current_offset = 0;
156
m_current_space = 0;
157
m_current_gpu_position = 0;
158
m_tracked_fences.clear();
159
}
160
161
void D3D12StreamBuffer::UpdateCurrentFencePosition()
162
{
163
// Don't create a tracking entry if the GPU is caught up with the buffer.
164
if (m_current_offset == m_current_gpu_position)
165
return;
166
167
// Has the offset changed since the last fence?
168
const u64 fence = D3D12Device::GetInstance().GetCurrentFenceValue();
169
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
170
{
171
// Still haven't executed a command buffer, so just update the offset.
172
m_tracked_fences.back().second = m_current_offset;
173
return;
174
}
175
176
UpdateGPUPosition();
177
m_tracked_fences.emplace_back(fence, m_current_offset);
178
}
179
180
void D3D12StreamBuffer::UpdateGPUPosition()
181
{
182
auto start = m_tracked_fences.begin();
183
auto end = start;
184
185
const u64 completed_counter = D3D12Device::GetInstance().GetCompletedFenceValue();
186
while (end != m_tracked_fences.end() && completed_counter >= end->first)
187
{
188
m_current_gpu_position = end->second;
189
++end;
190
}
191
192
if (start != end)
193
m_tracked_fences.erase(start, end);
194
}
195
196
bool D3D12StreamBuffer::WaitForClearSpace(u32 num_bytes)
197
{
198
u32 new_offset = 0;
199
u32 new_space = 0;
200
u32 new_gpu_position = 0;
201
202
auto iter = m_tracked_fences.begin();
203
for (; iter != m_tracked_fences.end(); ++iter)
204
{
205
// Would this fence bring us in line with the GPU?
206
// This is the "last resort" case, where a command buffer execution has been forced
207
// after no additional data has been written to it, so we can assume that after the
208
// fence has been signaled the entire buffer is now consumed.
209
u32 gpu_position = iter->second;
210
if (m_current_offset == gpu_position)
211
{
212
new_offset = 0;
213
new_space = m_size;
214
new_gpu_position = 0;
215
break;
216
}
217
218
// Assuming that we wait for this fence, are we allocating in front of the GPU?
219
if (m_current_offset > gpu_position)
220
{
221
// This would suggest the GPU has now followed us and wrapped around, so we have from
222
// m_current_position..m_size free, as well as and 0..gpu_position.
223
const u32 remaining_space_after_offset = m_size - m_current_offset;
224
if (remaining_space_after_offset >= num_bytes)
225
{
226
// Switch to allocating in front of the GPU, using the remainder of the buffer.
227
new_offset = m_current_offset;
228
new_space = m_size - m_current_offset;
229
new_gpu_position = gpu_position;
230
break;
231
}
232
233
// We can wrap around to the start, behind the GPU, if there is enough space.
234
// We use > here because otherwise we'd end up lining up with the GPU, and then the
235
// allocator would assume that the GPU has consumed what we just wrote.
236
if (gpu_position > num_bytes)
237
{
238
new_offset = 0;
239
new_space = gpu_position;
240
new_gpu_position = gpu_position;
241
break;
242
}
243
}
244
else
245
{
246
// We're currently allocating behind the GPU. This would give us between the current
247
// offset and the GPU position worth of space to work with. Again, > because we can't
248
// align the GPU position with the buffer offset.
249
u32 available_space_inbetween = gpu_position - m_current_offset;
250
if (available_space_inbetween > num_bytes)
251
{
252
// Leave the offset as-is, but update the GPU position.
253
new_offset = m_current_offset;
254
new_space = gpu_position - m_current_offset;
255
new_gpu_position = gpu_position;
256
break;
257
}
258
}
259
}
260
261
// Did any fences satisfy this condition?
262
// Has the command buffer been executed yet? If not, the caller should execute it.
263
if (iter == m_tracked_fences.end() || iter->first == D3D12Device::GetInstance().GetCurrentFenceValue())
264
return false;
265
266
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
267
D3D12Device::GetInstance().WaitForFence(iter->first);
268
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
269
m_current_offset = new_offset;
270
m_current_space = new_space;
271
m_current_gpu_position = new_gpu_position;
272
return true;
273
}
274
275