Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/gpu_device.h
7367 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#pragma once
5
6
#include "gpu_shader_cache.h"
7
#include "gpu_texture.h"
8
#include "window_info.h"
9
10
#include "common/bitfield.h"
11
#include "common/gsvector.h"
12
#include "common/heap_array.h"
13
#include "common/small_string.h"
14
#include "common/types.h"
15
16
#include "fmt/base.h"
17
18
#include <cstring>
19
#include <deque>
20
#include <memory>
21
#include <optional>
22
#include <span>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <vector>
27
28
class Error;
29
class Image;
30
31
// Enables debug event generation and object names for graphics debuggers.
32
#if defined(_DEBUG) || defined(_DEVEL)
33
#define ENABLE_GPU_OBJECT_NAMES
34
#endif
35
36
class GPUSampler
37
{
38
public:
39
enum class Filter
40
{
41
Nearest,
42
Linear,
43
44
MaxCount
45
};
46
47
enum class AddressMode
48
{
49
Repeat,
50
ClampToEdge,
51
ClampToBorder,
52
MirrorRepeat,
53
54
MaxCount
55
};
56
57
union Config
58
{
59
static constexpr u8 LOD_MAX = 15;
60
61
BitField<u64, Filter, 0, 1> min_filter;
62
BitField<u64, Filter, 1, 1> mag_filter;
63
BitField<u64, Filter, 2, 1> mip_filter;
64
BitField<u64, AddressMode, 3, 2> address_u;
65
BitField<u64, AddressMode, 5, 2> address_v;
66
BitField<u64, AddressMode, 7, 2> address_w;
67
BitField<u64, u8, 9, 5> anisotropy;
68
BitField<u64, u8, 14, 4> min_lod;
69
BitField<u64, u8, 18, 4> max_lod;
70
BitField<u64, u32, 32, 32> border_color;
71
u64 key;
72
73
// clang-format off
74
ALWAYS_INLINE float GetBorderRed() const { return static_cast<float>(border_color.GetValue() & 0xFF) / 255.0f; }
75
ALWAYS_INLINE float GetBorderGreen() const { return static_cast<float>((border_color.GetValue() >> 8) & 0xFF) / 255.0f; }
76
ALWAYS_INLINE float GetBorderBlue() const { return static_cast<float>((border_color.GetValue() >> 16) & 0xFF) / 255.0f; }
77
ALWAYS_INLINE float GetBorderAlpha() const { return static_cast<float>((border_color.GetValue() >> 24) & 0xFF) / 255.0f; }
78
// clang-format on
79
ALWAYS_INLINE std::array<float, 4> GetBorderFloatColor() const
80
{
81
return std::array<float, 4>{GetBorderRed(), GetBorderGreen(), GetBorderBlue(), GetBorderAlpha()};
82
}
83
};
84
85
GPUSampler();
86
virtual ~GPUSampler();
87
88
#ifdef ENABLE_GPU_OBJECT_NAMES
89
virtual void SetDebugName(std::string_view name) = 0;
90
template<typename... T>
91
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
92
{
93
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
94
}
95
#endif
96
97
static Config GetNearestConfig();
98
static Config GetLinearConfig();
99
};
100
101
class GPUShader
102
{
103
public:
104
explicit GPUShader(GPUShaderStage stage);
105
virtual ~GPUShader();
106
107
static const char* GetStageName(GPUShaderStage stage);
108
109
ALWAYS_INLINE GPUShaderStage GetStage() const { return m_stage; }
110
111
#ifdef ENABLE_GPU_OBJECT_NAMES
112
virtual void SetDebugName(std::string_view name) = 0;
113
template<typename... T>
114
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
115
{
116
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
117
}
118
#endif
119
120
protected:
121
GPUShaderStage m_stage;
122
};
123
124
class GPUPipeline
125
{
126
public:
127
enum class Layout : u8
128
{
129
// 1 streamed UBO, 1 texture in PS.
130
SingleTextureAndUBO,
131
132
// 128 byte UBO via push constants, 1 texture.
133
SingleTextureAndPushConstants,
134
135
// 128 byte UBO via push constants, 1 texture buffer/SSBO.
136
SingleTextureBufferAndPushConstants,
137
138
// Multiple textures, 1 streamed UBO.
139
MultiTextureAndUBO,
140
141
// Multiple textures, 128 byte UBO via push constants.
142
MultiTextureAndPushConstants,
143
144
// Multiple textures, 1 streamed UBO, 128 byte push constants.
145
MultiTextureAndUBOAndPushConstants,
146
147
// Multiple textures, 1 streamed UBO, compute shader.
148
ComputeMultiTextureAndUBO,
149
150
// 128 byte UBO via push constants, multiple textures, compute shader.
151
ComputeMultiTextureAndPushConstants,
152
153
MaxCount
154
};
155
156
enum RenderPassFlag : u8
157
{
158
NoRenderPassFlags = 0,
159
ColorFeedbackLoop = (1 << 0),
160
ColorFeedbackLoopActive = (1 << 1),
161
SampleDepthBuffer = (1 << 2),
162
BindRenderTargetsAsImages = (1 << 3),
163
};
164
165
enum class Primitive : u8
166
{
167
Points,
168
Lines,
169
Triangles,
170
TriangleStrips,
171
172
MaxCount
173
};
174
175
union VertexAttribute
176
{
177
static constexpr u32 MaxAttributes = 16;
178
179
enum class Semantic : u8
180
{
181
Position,
182
TexCoord,
183
Color,
184
185
MaxCount
186
};
187
188
enum class Type : u8
189
{
190
Float,
191
UInt8,
192
SInt8,
193
UNorm8,
194
UInt16,
195
SInt16,
196
UNorm16,
197
UInt32,
198
SInt32,
199
200
MaxCount
201
};
202
203
BitField<u32, u8, 0, 4> index;
204
BitField<u32, Semantic, 4, 2> semantic;
205
BitField<u32, u8, 6, 2> semantic_index;
206
BitField<u32, Type, 8, 4> type;
207
BitField<u32, u8, 12, 3> components;
208
BitField<u32, u16, 16, 16> offset;
209
210
u32 key;
211
212
// clang-format off
213
ALWAYS_INLINE VertexAttribute() = default;
214
ALWAYS_INLINE constexpr VertexAttribute(const VertexAttribute& rhs) = default;
215
ALWAYS_INLINE VertexAttribute& operator=(const VertexAttribute& rhs) = default;
216
ALWAYS_INLINE bool operator==(const VertexAttribute& rhs) const { return key == rhs.key; }
217
ALWAYS_INLINE bool operator!=(const VertexAttribute& rhs) const { return key != rhs.key; }
218
ALWAYS_INLINE bool operator<(const VertexAttribute& rhs) const { return key < rhs.key; }
219
// clang-format on
220
221
static constexpr VertexAttribute Make(u8 index, Semantic semantic, u8 semantic_index, Type type, u8 components,
222
u16 offset)
223
{
224
// Nasty :/ can't access an inactive element of a union here..
225
return VertexAttribute((static_cast<u32>(index) & 0xf) | ((static_cast<u32>(semantic) & 0x3) << 4) |
226
((static_cast<u32>(semantic_index) & 0x3) << 6) | ((static_cast<u32>(type) & 0xf) << 8) |
227
((static_cast<u32>(components) & 0x7) << 12) |
228
((static_cast<u32>(offset) & 0xffff) << 16));
229
}
230
231
private:
232
ALWAYS_INLINE constexpr VertexAttribute(u32 key_) : key(key_) {}
233
};
234
235
struct InputLayout
236
{
237
std::span<const VertexAttribute> vertex_attributes;
238
u32 vertex_stride;
239
240
bool operator==(const InputLayout& rhs) const;
241
bool operator!=(const InputLayout& rhs) const;
242
};
243
244
struct InputLayoutHash
245
{
246
size_t operator()(const InputLayout& il) const;
247
};
248
249
enum class CullMode : u8
250
{
251
None,
252
Front,
253
Back,
254
255
MaxCount
256
};
257
258
enum class DepthFunc : u8
259
{
260
Never,
261
Always,
262
Less,
263
LessEqual,
264
Greater,
265
GreaterEqual,
266
Equal,
267
268
MaxCount
269
};
270
271
enum class BlendFunc : u8
272
{
273
Zero,
274
One,
275
SrcColor,
276
InvSrcColor,
277
DstColor,
278
InvDstColor,
279
SrcAlpha,
280
InvSrcAlpha,
281
SrcAlpha1,
282
InvSrcAlpha1,
283
DstAlpha,
284
InvDstAlpha,
285
ConstantColor,
286
InvConstantColor,
287
288
MaxCount
289
};
290
291
enum class BlendOp : u8
292
{
293
Add,
294
Subtract,
295
ReverseSubtract,
296
Min,
297
Max,
298
299
MaxCount
300
};
301
302
union RasterizationState
303
{
304
u16 key;
305
306
BitField<u16, CullMode, 0, 2> cull_mode;
307
BitField<u16, u8, 2, 6> multisamples;
308
BitField<u16, bool, 8, 1> per_sample_shading;
309
310
// clang-format off
311
ALWAYS_INLINE bool operator==(const RasterizationState& rhs) const { return key == rhs.key; }
312
ALWAYS_INLINE bool operator!=(const RasterizationState& rhs) const { return key != rhs.key; }
313
ALWAYS_INLINE bool operator<(const RasterizationState& rhs) const { return key < rhs.key; }
314
// clang-format on
315
316
static RasterizationState GetNoCullState(u8 multisamples = 1, bool per_sample_shading = false);
317
};
318
319
union DepthState
320
{
321
u8 key;
322
323
BitField<u8, DepthFunc, 0, 3> depth_test;
324
BitField<u8, bool, 4, 1> depth_write;
325
326
// clang-format off
327
ALWAYS_INLINE bool operator==(const DepthState& rhs) const { return key == rhs.key; }
328
ALWAYS_INLINE bool operator!=(const DepthState& rhs) const { return key != rhs.key; }
329
ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; }
330
// clang-format on
331
332
static DepthState GetNoTestsState();
333
static DepthState GetAlwaysWriteState();
334
};
335
336
union BlendState
337
{
338
u64 key;
339
340
BitField<u64, bool, 0, 1> enable;
341
BitField<u64, BlendFunc, 1, 4> src_blend;
342
BitField<u64, BlendFunc, 5, 4> src_alpha_blend;
343
BitField<u64, BlendFunc, 9, 4> dst_blend;
344
BitField<u64, BlendFunc, 13, 4> dst_alpha_blend;
345
BitField<u64, BlendOp, 17, 3> blend_op;
346
BitField<u64, BlendOp, 20, 3> alpha_blend_op;
347
BitField<u64, bool, 24, 1> write_r;
348
BitField<u64, bool, 25, 1> write_g;
349
BitField<u64, bool, 26, 1> write_b;
350
BitField<u64, bool, 27, 1> write_a;
351
BitField<u64, u8, 24, 4> write_mask;
352
BitField<u64, u32, 32, 32> constant;
353
354
BitField<u64, u16, 1, 16> blend_factors;
355
BitField<u64, u8, 17, 6> blend_ops;
356
357
// clang-format off
358
ALWAYS_INLINE bool operator==(const BlendState& rhs) const { return key == rhs.key; }
359
ALWAYS_INLINE bool operator!=(const BlendState& rhs) const { return key != rhs.key; }
360
ALWAYS_INLINE bool operator<(const BlendState& rhs) const { return key < rhs.key; }
361
// clang-format on
362
363
// clang-format off
364
ALWAYS_INLINE float GetConstantRed() const { return static_cast<float>(constant.GetValue() & 0xFF) / 255.0f; }
365
ALWAYS_INLINE float GetConstantGreen() const { return static_cast<float>((constant.GetValue() >> 8) & 0xFF) / 255.0f; }
366
ALWAYS_INLINE float GetConstantBlue() const { return static_cast<float>((constant.GetValue() >> 16) & 0xFF) / 255.0f; }
367
ALWAYS_INLINE float GetConstantAlpha() const { return static_cast<float>((constant.GetValue() >> 24) & 0xFF) / 255.0f; }
368
// clang-format on
369
ALWAYS_INLINE std::array<float, 4> GetConstantFloatColor() const
370
{
371
return std::array<float, 4>{GetConstantRed(), GetConstantGreen(), GetConstantBlue(), GetConstantAlpha()};
372
}
373
374
static BlendState GetNoBlendingState();
375
static BlendState GetAlphaBlendingState();
376
};
377
378
struct GraphicsConfig
379
{
380
InputLayout input_layout;
381
GPUShader* vertex_shader;
382
GPUShader* geometry_shader;
383
GPUShader* fragment_shader;
384
385
BlendState blend;
386
RasterizationState rasterization;
387
DepthState depth;
388
389
Layout layout;
390
Primitive primitive;
391
392
GPUTextureFormat color_formats[4];
393
GPUTextureFormat depth_format;
394
RenderPassFlag render_pass_flags;
395
396
void SetTargetFormats(GPUTextureFormat color_format, GPUTextureFormat depth_format_ = GPUTextureFormat::Unknown);
397
u32 GetRenderTargetCount() const;
398
};
399
400
struct ComputeConfig
401
{
402
Layout layout;
403
GPUShader* compute_shader;
404
};
405
406
GPUPipeline();
407
virtual ~GPUPipeline();
408
409
#ifdef ENABLE_GPU_OBJECT_NAMES
410
virtual void SetDebugName(std::string_view name) = 0;
411
template<typename... T>
412
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
413
{
414
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
415
}
416
#endif
417
};
418
419
class GPUTextureBuffer
420
{
421
public:
422
enum class Format
423
{
424
R16UI,
425
426
MaxCount
427
};
428
429
GPUTextureBuffer(Format format, u32 size_in_elements);
430
virtual ~GPUTextureBuffer();
431
432
static u32 GetElementSize(Format format);
433
434
ALWAYS_INLINE Format GetFormat() const { return m_format; }
435
ALWAYS_INLINE u32 GetSizeInElements() const { return m_size_in_elements; }
436
ALWAYS_INLINE u32 GetSizeInBytes() const { return m_size_in_elements * GetElementSize(m_format); }
437
ALWAYS_INLINE u32 GetCurrentPosition() const { return m_current_position; }
438
439
virtual void* Map(u32 required_elements) = 0;
440
virtual void Unmap(u32 used_elements) = 0;
441
442
#ifdef ENABLE_GPU_OBJECT_NAMES
443
virtual void SetDebugName(std::string_view name) = 0;
444
template<typename... T>
445
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
446
{
447
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
448
}
449
#endif
450
451
protected:
452
Format m_format;
453
u32 m_size_in_elements;
454
u32 m_current_position = 0;
455
};
456
457
class GPUSwapChain
458
{
459
public:
460
GPUSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode);
461
virtual ~GPUSwapChain();
462
463
ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; }
464
ALWAYS_INLINE u32 GetWidth() const { return m_window_info.surface_width; }
465
ALWAYS_INLINE u32 GetHeight() const { return m_window_info.surface_height; }
466
ALWAYS_INLINE u32 GetPostRotatedWidth() const { return m_window_info.GetPostRotatedWidth(); }
467
ALWAYS_INLINE u32 GetPostRotatedHeight() const { return m_window_info.GetPostRotatedHeight(); }
468
ALWAYS_INLINE float GetScale() const { return m_window_info.surface_scale; }
469
ALWAYS_INLINE float GetRefreshRate() const { return m_window_info.surface_refresh_rate; }
470
ALWAYS_INLINE void SetScale(float scale) { m_window_info.surface_scale = scale; }
471
ALWAYS_INLINE void SetRefreshRate(float refresh_rate) { m_window_info.surface_refresh_rate = refresh_rate; }
472
ALWAYS_INLINE WindowInfoPrerotation GetPreRotation() const { return m_window_info.surface_prerotation; }
473
ALWAYS_INLINE GPUTextureFormat GetFormat() const { return m_window_info.surface_format; }
474
ALWAYS_INLINE GSVector2i GetSizeVec() const
475
{
476
return GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
477
}
478
ALWAYS_INLINE GSVector2i GetPostRotatedSizeVec() const
479
{
480
return GSVector2i(m_window_info.GetPostRotatedWidth(), m_window_info.GetPostRotatedHeight());
481
}
482
483
ALWAYS_INLINE GPUVSyncMode GetVSyncMode() const { return m_vsync_mode; }
484
ALWAYS_INLINE bool IsVSyncModeBlocking() const { return (m_vsync_mode == GPUVSyncMode::FIFO); }
485
486
virtual bool ResizeBuffers(u32 new_width, u32 new_height, Error* error) = 0;
487
virtual bool SetVSyncMode(GPUVSyncMode mode, Error* error) = 0;
488
489
/// Returns true if exclusive fullscreen is currently active on this swap chain.
490
virtual bool IsExclusiveFullscreen() const;
491
492
static GSVector4i PreRotateClipRect(WindowInfoPrerotation prerotation, const GSVector2i surface_size,
493
const GSVector4i& v);
494
495
protected:
496
// TODO: Merge WindowInfo into this struct...
497
WindowInfo m_window_info;
498
499
GPUVSyncMode m_vsync_mode = GPUVSyncMode::Disabled;
500
};
501
502
class GPUDevice
503
{
504
public:
505
friend GPUTexture;
506
507
using DrawIndex = u16;
508
509
enum class CreateFlags : u32
510
{
511
None = 0,
512
PreferGLESContext = (1 << 0),
513
EnableDebugDevice = (1 << 1),
514
EnableGPUValidation = (1 << 2),
515
DisableShaderCache = (1 << 3),
516
DisableDualSourceBlend = (1 << 4),
517
DisableFeedbackLoops = (1 << 5),
518
DisableFramebufferFetch = (1 << 6),
519
DisableTextureBuffers = (1 << 7),
520
DisableGeometryShaders = (1 << 8),
521
DisableComputeShaders = (1 << 9),
522
DisableTextureCopyToSelf = (1 << 10),
523
DisableMemoryImport = (1 << 11),
524
DisableRasterOrderViews = (1 << 12),
525
DisableCompressedTextures = (1 << 13),
526
};
527
528
enum class DrawBarrier : u32
529
{
530
None,
531
One,
532
Full
533
};
534
535
enum class PresentResult : u32
536
{
537
OK,
538
SkipPresent,
539
ExclusiveFullscreenLost,
540
DeviceLost,
541
};
542
543
struct Features
544
{
545
bool dual_source_blend : 1;
546
bool framebuffer_fetch : 1;
547
bool per_sample_shading : 1;
548
bool noperspective_interpolation : 1;
549
bool texture_copy_to_self : 1;
550
bool texture_buffers : 1;
551
bool texture_buffers_emulated_with_ssbo : 1;
552
bool feedback_loops : 1;
553
bool geometry_shaders : 1;
554
bool compute_shaders : 1;
555
bool partial_msaa_resolve : 1;
556
bool memory_import : 1;
557
bool exclusive_fullscreen : 1;
558
bool explicit_present : 1;
559
bool timed_present : 1;
560
bool gpu_timing : 1;
561
bool shader_cache : 1;
562
bool pipeline_cache : 1;
563
bool prefer_unused_textures : 1;
564
bool raster_order_views : 1;
565
bool dxt_textures : 1;
566
bool bptc_textures : 1;
567
};
568
569
struct Statistics
570
{
571
size_t buffer_streamed;
572
u32 num_draws;
573
u32 num_barriers;
574
u32 num_render_passes;
575
u32 num_copies;
576
u32 num_downloads;
577
u32 num_uploads;
578
};
579
580
// Parameters for exclusive fullscreen.
581
struct ExclusiveFullscreenMode
582
{
583
u32 width;
584
u32 height;
585
float refresh_rate;
586
587
TinyString ToString() const;
588
589
bool operator==(const ExclusiveFullscreenMode& rhs) const;
590
bool operator!=(const ExclusiveFullscreenMode& rhs) const;
591
bool operator<(const ExclusiveFullscreenMode& rhs) const;
592
593
static std::optional<ExclusiveFullscreenMode> Parse(std::string_view str);
594
};
595
596
struct AdapterInfo
597
{
598
std::string name;
599
std::vector<ExclusiveFullscreenMode> fullscreen_modes;
600
u32 max_texture_size;
601
u32 max_multisamples;
602
GPUDriverType driver_type;
603
bool supports_sample_shading;
604
};
605
using AdapterInfoList = std::vector<AdapterInfo>;
606
607
struct PooledTextureDeleter
608
{
609
void operator()(GPUTexture* const tex);
610
};
611
using AutoRecycleTexture = std::unique_ptr<GPUTexture, PooledTextureDeleter>;
612
613
static constexpr u32 MAX_TEXTURE_SAMPLERS = 8;
614
static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512;
615
static constexpr u32 MAX_RENDER_TARGETS = 4;
616
static constexpr u32 MAX_IMAGE_RENDER_TARGETS = 2;
617
static constexpr u32 DEFAULT_CLEAR_COLOR = 0xFF000000u;
618
static constexpr u32 PIPELINE_CACHE_HASH_SIZE = 20;
619
static constexpr u32 BASE_UNIFORM_BUFFER_ALIGNMENT = 16;
620
static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTextureFormat) * MAX_RENDER_TARGETS);
621
622
GPUDevice();
623
virtual ~GPUDevice();
624
625
/// Returns the default/preferred API for the system.
626
static RenderAPI GetPreferredAPI(WindowInfoType window_type);
627
628
/// Returns a string representing the specified API.
629
static const char* RenderAPIToString(RenderAPI api);
630
631
/// Returns a string representing the specified language.
632
static const char* ShaderLanguageToString(GPUShaderLanguage language);
633
634
/// Returns a string representing the specified vsync mode.
635
static const char* VSyncModeToString(GPUVSyncMode mode);
636
637
/// Returns a new device for the specified API.
638
static std::unique_ptr<GPUDevice> CreateDeviceForAPI(RenderAPI api);
639
640
/// Returns true if the render API is the same (e.g. GLES and GL).
641
static bool IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs);
642
643
/// Returns a list of adapters for the given API.
644
static std::optional<AdapterInfoList> GetAdapterListForAPI(RenderAPI api, WindowInfoType window_type, Error* error);
645
646
/// Dumps out a shader that failed compilation.
647
static void DumpBadShader(std::string_view code, std::string_view errors);
648
649
/// Converts a RGBA8 value to 4 floating-point values.
650
static std::array<float, 4> RGBA8ToFloat(u32 rgba);
651
652
/// Returns true if the given device creation flag is present.
653
static constexpr bool HasCreateFlag(CreateFlags flags, CreateFlags flag)
654
{
655
return ((static_cast<u32>(flags) & static_cast<u32>(flag)) != 0);
656
}
657
658
/// Returns the number of texture bindings for a given pipeline layout.
659
static constexpr u32 GetActiveTexturesForLayout(GPUPipeline::Layout layout)
660
{
661
constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> counts = {
662
1, // SingleTextureAndUBO
663
1, // SingleTextureAndPushConstants
664
0, // SingleTextureBufferAndPushConstants
665
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
666
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
667
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBOAndPushConstants
668
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndUBO
669
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndPushConstants
670
};
671
672
return counts[static_cast<u8>(layout)];
673
}
674
675
/// Returns true if the given pipeline layout is used for compute shaders.
676
static constexpr bool IsComputeLayout(GPUPipeline::Layout layout)
677
{
678
return (layout >= GPUPipeline::Layout::ComputeMultiTextureAndUBO);
679
}
680
681
/// Returns the number of thread groups to dispatch for a given total count and local size.
682
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
683
u32 local_size_y, u32 local_size_z)
684
{
685
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
686
(count_z + (local_size_z - 1)) / local_size_z);
687
}
688
689
/// Determines the driver type for a given adapter.
690
static GPUDriverType GuessDriverType(u32 pci_vendor_id, std::string_view vendor_name, std::string_view adapter_name);
691
692
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
693
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
694
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
695
ALWAYS_INLINE u32 GetMaxTextureSize() const { return m_max_texture_size; }
696
ALWAYS_INLINE u32 GetMaxMultisamples() const { return m_max_multisamples; }
697
698
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
699
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
700
701
ALWAYS_INLINE GPUTexture* GetEmptyTexture() const { return m_empty_texture.get(); }
702
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler; }
703
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler; }
704
705
ALWAYS_INLINE bool IsGPUTimingEnabled() const { return m_gpu_timing_enabled; }
706
707
bool Create(std::string_view adapter, CreateFlags create_flags, std::string_view shader_dump_path,
708
std::string_view shader_cache_path, u32 shader_cache_version, const WindowInfo& wi, GPUVSyncMode vsync,
709
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
710
std::optional<bool> exclusive_fullscreen_control, Error* error);
711
void Destroy();
712
713
virtual std::unique_ptr<GPUSwapChain> CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
714
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
715
std::optional<bool> exclusive_fullscreen_control,
716
Error* error) = 0;
717
virtual bool SwitchToSurfacelessRendering(Error* error);
718
719
bool RecreateMainSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
720
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
721
std::optional<bool> exclusive_fullscreen_control, Error* error);
722
void DestroyMainSwapChain();
723
724
virtual std::string GetDriverInfo() const = 0;
725
726
// Flushes current command buffer, but does not wait for completion.
727
virtual void FlushCommands() = 0;
728
729
// Executes current command buffer, waits for its completion, and destroys all pending resources.
730
virtual void WaitForGPUIdle() = 0;
731
732
virtual std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
733
GPUTexture::Type type, GPUTextureFormat format,
734
GPUTexture::Flags flags, const void* data = nullptr,
735
u32 data_stride = 0, Error* error = nullptr) = 0;
736
virtual std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) = 0;
737
virtual std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements,
738
Error* error = nullptr) = 0;
739
740
GPUSampler* GetSampler(const GPUSampler::Config& config, Error* error = nullptr);
741
742
// Texture pooling.
743
std::unique_ptr<GPUTexture> FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
744
GPUTexture::Type type, GPUTextureFormat format, GPUTexture::Flags flags,
745
const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr);
746
AutoRecycleTexture FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
747
GPUTexture::Type type, GPUTextureFormat format, GPUTexture::Flags flags,
748
const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr);
749
std::unique_ptr<GPUTexture> FetchAndUploadTextureImage(const Image& image,
750
GPUTexture::Flags flags = GPUTexture::Flags::None,
751
Error* error = nullptr);
752
void RecycleTexture(std::unique_ptr<GPUTexture> texture);
753
void PurgeTexturePool();
754
755
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTextureFormat format,
756
Error* error = nullptr) = 0;
757
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTextureFormat format,
758
void* memory, size_t memory_size, u32 memory_stride,
759
Error* error = nullptr) = 0;
760
761
virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
762
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0;
763
virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
764
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0;
765
virtual void ClearRenderTarget(GPUTexture* t, u32 c);
766
virtual void ClearDepth(GPUTexture* t, float d);
767
virtual void InvalidateRenderTarget(GPUTexture* t);
768
769
/// Shader abstraction.
770
std::unique_ptr<GPUShader> CreateShader(GPUShaderStage stage, GPUShaderLanguage language, std::string_view source,
771
Error* error = nullptr, const char* entry_point = "main");
772
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
773
Error* error = nullptr) = 0;
774
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
775
Error* error = nullptr) = 0;
776
777
#ifdef ENABLE_GPU_OBJECT_NAMES
778
/// Debug messaging.
779
virtual void PushDebugGroup(const char* name) = 0;
780
virtual void PopDebugGroup() = 0;
781
virtual void InsertDebugMessage(const char* msg) = 0;
782
783
/// Formatted debug variants.
784
template<typename... T>
785
void PushDebugGroup(fmt::format_string<T...> fmt, T&&... args)
786
{
787
PushDebugGroup(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
788
}
789
template<typename... T>
790
void InsertDebugMessage(fmt::format_string<T...> fmt, T&&... args)
791
{
792
InsertDebugMessage(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
793
}
794
#endif
795
796
/// Vertex/index buffer abstraction.
797
virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
798
u32* map_base_vertex) = 0;
799
virtual void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) = 0;
800
virtual void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) = 0;
801
virtual void UnmapIndexBuffer(u32 used_size) = 0;
802
803
void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex);
804
void UploadIndexBuffer(const DrawIndex* indices, u32 index_count, u32* base_index);
805
806
/// Uniform buffer abstraction.
807
virtual void* MapUniformBuffer(u32 size) = 0;
808
virtual void UnmapUniformBuffer(u32 size) = 0;
809
void UploadUniformBuffer(const void* data, u32 data_size);
810
811
/// Drawing setup abstraction.
812
virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
813
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) = 0;
814
virtual void SetPipeline(GPUPipeline* pipeline) = 0;
815
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0;
816
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
817
virtual void SetViewport(const GSVector4i rc) = 0;
818
virtual void SetScissor(const GSVector4i rc) = 0;
819
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
820
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags);
821
void SetViewport(s32 x, s32 y, s32 width, s32 height);
822
void SetScissor(s32 x, s32 y, s32 width, s32 height);
823
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);
824
void SetViewportAndScissor(const GSVector4i rc);
825
826
// Drawing abstraction.
827
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
828
virtual void DrawWithPushConstants(u32 vertex_count, u32 base_vertex, const void* push_constants,
829
u32 push_constants_size) = 0;
830
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
831
virtual void DrawIndexedWithPushConstants(u32 index_count, u32 base_index, u32 base_vertex,
832
const void* push_constants, u32 push_constants_size) = 0;
833
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type);
834
virtual void DrawIndexedWithBarrierWithPushConstants(u32 index_count, u32 base_index, u32 base_vertex,
835
const void* push_constants, u32 push_constants_size,
836
DrawBarrier type);
837
virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
838
u32 group_size_z) = 0;
839
virtual void DispatchWithPushConstants(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x,
840
u32 group_size_y, u32 group_size_z, const void* push_constants,
841
u32 push_constants_size) = 0;
842
843
/// Returns false if the window was completely occluded.
844
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
845
virtual void EndPresent(GPUSwapChain* swap_chain, bool explicit_submit, u64 submit_time = 0) = 0;
846
virtual void SubmitPresent(GPUSwapChain* swap_chain) = 0;
847
848
ALWAYS_INLINE bool IsDebugDevice() const { return m_debug_device; }
849
ALWAYS_INLINE size_t GetVRAMUsage() const { return s_total_vram_usage; }
850
851
bool UsesLowerLeftOrigin() const;
852
static GSVector4i FlipToLowerLeft(GSVector4i rc, s32 target_height);
853
bool ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type,
854
GPUTextureFormat format, GPUTexture::Flags flags, bool preserve = true, Error* error = nullptr);
855
bool ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type,
856
GPUTextureFormat format, GPUTexture::Flags flags, const void* replace_data, u32 replace_data_pitch,
857
Error* error = nullptr);
858
859
virtual bool SupportsTextureFormat(GPUTextureFormat format) const = 0;
860
861
/// Enables/disables GPU frame timing.
862
virtual bool SetGPUTimingEnabled(bool enabled);
863
864
/// Returns the amount of GPU time utilized since the last time this method was called.
865
virtual float GetAndResetAccumulatedGPUTime();
866
867
ALWAYS_INLINE static Statistics& GetStatistics() { return s_stats; }
868
static void ResetStatistics();
869
870
/// Releases dynamic libraries and other resources used by the GPU device system.
871
static void UnloadDynamicLibraries();
872
873
protected:
874
virtual bool CreateDeviceAndMainSwapChain(std::string_view adapter, CreateFlags create_flags, const WindowInfo& wi,
875
GPUVSyncMode vsync_mode,
876
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
877
std::optional<bool> exclusive_fullscreen_control, Error* error) = 0;
878
virtual void DestroyDevice() = 0;
879
880
std::string GetShaderCacheBaseName(std::string_view type) const;
881
virtual bool OpenPipelineCache(const std::string& path, Error* error);
882
virtual bool CreatePipelineCache(const std::string& path, Error* error);
883
virtual bool ReadPipelineCache(DynamicHeapArray<u8> data, Error* error);
884
virtual bool GetPipelineCacheData(DynamicHeapArray<u8>* data, Error* error);
885
virtual bool ClosePipelineCache(const std::string& path, Error* error);
886
887
virtual std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
888
Error* error) = 0;
889
virtual std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language,
890
std::string_view source, const char* entry_point,
891
DynamicHeapArray<u8>* out_binary, Error* error) = 0;
892
893
void TrimTexturePool();
894
895
bool CompileGLSLShaderToVulkanSpv(GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source,
896
const char* entry_point, bool optimization, bool nonsemantic_debug_info,
897
DynamicHeapArray<u8>* out_binary, Error* error);
898
bool TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GPUShaderStage stage,
899
GPUShaderLanguage target_language, u32 target_version, std::string* output,
900
Error* error);
901
std::unique_ptr<GPUShader> TranspileAndCreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage source_language,
902
std::string_view source, const char* entry_point,
903
GPUShaderLanguage target_language, u32 target_version,
904
DynamicHeapArray<u8>* out_binary, Error* error);
905
static std::optional<DynamicHeapArray<u8>> OptimizeVulkanSpv(const std::span<const u8> spirv, Error* error);
906
907
void SetDriverType(GPUDriverType type);
908
909
Features m_features = {};
910
RenderAPI m_render_api = RenderAPI::None;
911
u32 m_render_api_version = 0;
912
u32 m_max_texture_size = 0;
913
GPUDriverType m_driver_type = GPUDriverType::Unknown;
914
u16 m_max_multisamples = 0;
915
916
std::unique_ptr<GPUSwapChain> m_main_swap_chain;
917
std::unique_ptr<GPUTexture> m_empty_texture;
918
GPUSampler* m_nearest_sampler = nullptr;
919
GPUSampler* m_linear_sampler = nullptr;
920
921
GPUShaderCache m_shader_cache;
922
923
private:
924
static constexpr u32 MAX_TEXTURE_POOL_SIZE = 125;
925
static constexpr u32 MAX_TARGET_POOL_SIZE = 50;
926
static constexpr u32 POOL_PURGE_DELAY = 300;
927
928
struct TexturePoolKey
929
{
930
u16 width;
931
u16 height;
932
u8 layers;
933
u8 levels;
934
u8 samples;
935
GPUTexture::Type type;
936
GPUTextureFormat format;
937
GPUTexture::Flags flags;
938
939
ALWAYS_INLINE bool operator==(const TexturePoolKey& rhs) const
940
{
941
return std::memcmp(this, &rhs, sizeof(TexturePoolKey)) == 0;
942
}
943
ALWAYS_INLINE bool operator!=(const TexturePoolKey& rhs) const
944
{
945
return std::memcmp(this, &rhs, sizeof(TexturePoolKey)) != 0;
946
}
947
};
948
struct TexturePoolEntry
949
{
950
std::unique_ptr<GPUTexture> texture;
951
u32 use_counter;
952
TexturePoolKey key;
953
};
954
955
using TexturePool = std::deque<TexturePoolEntry>;
956
using SamplerMap = std::unordered_map<u64, std::unique_ptr<GPUSampler>>;
957
958
#ifdef __APPLE__
959
// We have to define these in the base class, because they're in Objective C++.
960
static std::unique_ptr<GPUDevice> WrapNewMetalDevice();
961
static AdapterInfoList WrapGetMetalAdapterList();
962
#endif
963
964
void OpenShaderCache(std::string_view base_path, u32 version);
965
void CloseShaderCache();
966
bool CreateResources(Error* error);
967
void DestroyResources();
968
static bool IsTexturePoolType(GPUTexture::Type type);
969
970
static size_t s_total_vram_usage;
971
972
SamplerMap m_sampler_map;
973
974
TexturePool m_texture_pool;
975
TexturePool m_target_pool;
976
size_t m_pool_vram_usage = 0;
977
u32 m_texture_pool_counter = 0;
978
979
protected:
980
static Statistics s_stats;
981
982
bool m_gpu_timing_enabled = false;
983
bool m_debug_device = false;
984
};
985
986
extern std::unique_ptr<GPUDevice> g_gpu_device;
987
988
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUDevice::CreateFlags);
989
990
ALWAYS_INLINE void GPUDevice::PooledTextureDeleter::operator()(GPUTexture* const tex)
991
{
992
g_gpu_device->RecycleTexture(std::unique_ptr<GPUTexture>(tex));
993
}
994
995
// C preprocessor workarounds.
996
#define GL_TOKEN_PASTE(x, y) x##y
997
#define GL_TOKEN_PASTE2(x, y) GL_TOKEN_PASTE(x, y)
998
999
// Macros for debug messages.
1000
#ifdef ENABLE_GPU_OBJECT_NAMES
1001
struct GLAutoPop
1002
{
1003
GLAutoPop(const char* name)
1004
{
1005
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1006
g_gpu_device->PushDebugGroup(name);
1007
}
1008
1009
template<typename... T>
1010
GLAutoPop(fmt::format_string<T...> fmt, T&&... args)
1011
{
1012
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1013
g_gpu_device->PushDebugGroup(SmallString::from_vformat(fmt, fmt::make_format_args(args...)));
1014
}
1015
1016
~GLAutoPop()
1017
{
1018
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1019
g_gpu_device->PopDebugGroup();
1020
}
1021
};
1022
1023
#define GL_SCOPE(name) GLAutoPop GL_TOKEN_PASTE2(gl_auto_pop_, __LINE__)(name)
1024
#define GL_INS(msg) \
1025
do \
1026
{ \
1027
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1028
g_gpu_device->InsertDebugMessage(msg); \
1029
} while (0)
1030
#define GL_OBJECT_NAME(obj, name) \
1031
do \
1032
{ \
1033
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1034
(obj)->SetDebugName(name); \
1035
} while (0)
1036
1037
#define GL_SCOPE_FMT(...) GLAutoPop GL_TOKEN_PASTE2(gl_auto_pop_, __LINE__)(__VA_ARGS__)
1038
#define GL_INS_FMT(...) \
1039
do \
1040
{ \
1041
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1042
g_gpu_device->InsertDebugMessage(__VA_ARGS__); \
1043
} while (0)
1044
#define GL_OBJECT_NAME_FMT(obj, ...) \
1045
do \
1046
{ \
1047
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1048
(obj)->SetDebugName(__VA_ARGS__); \
1049
} while (0)
1050
#else
1051
#define GL_SCOPE(name) (void)0
1052
#define GL_INS(msg) (void)0
1053
#define GL_OBJECT_NAME(obj, name) (void)0
1054
1055
#define GL_SCOPE_FMT(...) (void)0
1056
#define GL_INS_FMT(...) (void)0
1057
#define GL_OBJECT_NAME_FMT(obj, ...) (void)0
1058
#endif
1059
1060