Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/util/gpu_device.h
4223 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#pragma once
5
6
#include "gpu_shader_cache.h"
7
#include "gpu_texture.h"
8
#include "window_info.h"
9
10
#include "common/bitfield.h"
11
#include "common/gsvector.h"
12
#include "common/heap_array.h"
13
#include "common/small_string.h"
14
#include "common/types.h"
15
16
#include "fmt/base.h"
17
18
#include <cstring>
19
#include <deque>
20
#include <memory>
21
#include <optional>
22
#include <span>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <vector>
27
28
class Error;
29
class Image;
30
31
// Enables debug event generation and object names for graphics debuggers.
32
#if defined(_DEBUG) || defined(_DEVEL)
33
#define ENABLE_GPU_OBJECT_NAMES
34
#endif
35
36
enum class RenderAPI : u8
37
{
38
None,
39
D3D11,
40
D3D12,
41
Vulkan,
42
OpenGL,
43
OpenGLES,
44
Metal
45
};
46
47
enum class GPUVSyncMode : u8
48
{
49
Disabled,
50
FIFO,
51
Mailbox,
52
Count
53
};
54
55
class GPUSampler
56
{
57
public:
58
enum class Filter
59
{
60
Nearest,
61
Linear,
62
63
MaxCount
64
};
65
66
enum class AddressMode
67
{
68
Repeat,
69
ClampToEdge,
70
ClampToBorder,
71
MirrorRepeat,
72
73
MaxCount
74
};
75
76
union Config
77
{
78
static constexpr u8 LOD_MAX = 15;
79
80
BitField<u64, Filter, 0, 1> min_filter;
81
BitField<u64, Filter, 1, 1> mag_filter;
82
BitField<u64, Filter, 2, 1> mip_filter;
83
BitField<u64, AddressMode, 3, 2> address_u;
84
BitField<u64, AddressMode, 5, 2> address_v;
85
BitField<u64, AddressMode, 7, 2> address_w;
86
BitField<u64, u8, 9, 5> anisotropy;
87
BitField<u64, u8, 14, 4> min_lod;
88
BitField<u64, u8, 18, 4> max_lod;
89
BitField<u64, u32, 32, 32> border_color;
90
u64 key;
91
92
// clang-format off
93
ALWAYS_INLINE float GetBorderRed() const { return static_cast<float>(border_color.GetValue() & 0xFF) / 255.0f; }
94
ALWAYS_INLINE float GetBorderGreen() const { return static_cast<float>((border_color.GetValue() >> 8) & 0xFF) / 255.0f; }
95
ALWAYS_INLINE float GetBorderBlue() const { return static_cast<float>((border_color.GetValue() >> 16) & 0xFF) / 255.0f; }
96
ALWAYS_INLINE float GetBorderAlpha() const { return static_cast<float>((border_color.GetValue() >> 24) & 0xFF) / 255.0f; }
97
// clang-format on
98
ALWAYS_INLINE std::array<float, 4> GetBorderFloatColor() const
99
{
100
return std::array<float, 4>{GetBorderRed(), GetBorderGreen(), GetBorderBlue(), GetBorderAlpha()};
101
}
102
};
103
104
GPUSampler();
105
virtual ~GPUSampler();
106
107
#ifdef ENABLE_GPU_OBJECT_NAMES
108
virtual void SetDebugName(std::string_view name) = 0;
109
template<typename... T>
110
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
111
{
112
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
113
}
114
#endif
115
116
static Config GetNearestConfig();
117
static Config GetLinearConfig();
118
};
119
120
enum class GPUShaderStage : u8
121
{
122
Vertex,
123
Fragment,
124
Geometry,
125
Compute,
126
127
MaxCount
128
};
129
130
enum class GPUShaderLanguage : u8
131
{
132
None,
133
HLSL,
134
GLSL,
135
GLSLES,
136
GLSLVK,
137
MSL,
138
SPV,
139
Count
140
};
141
142
enum class GPUDriverType : u16
143
{
144
MobileFlag = 0x100,
145
SoftwareFlag = 0x200,
146
147
Unknown = 0,
148
AMDProprietary = 1,
149
AMDMesa = 2,
150
IntelProprietary = 3,
151
IntelMesa = 4,
152
NVIDIAProprietary = 5,
153
NVIDIAMesa = 6,
154
AppleProprietary = 7,
155
AppleMesa = 8,
156
DozenMesa = 9,
157
158
ImaginationProprietary = MobileFlag | 1,
159
ImaginationMesa = MobileFlag | 2,
160
ARMProprietary = MobileFlag | 3,
161
ARMMesa = MobileFlag | 4,
162
QualcommProprietary = MobileFlag | 5,
163
QualcommMesa = MobileFlag | 6,
164
BroadcomProprietary = MobileFlag | 7,
165
BroadcomMesa = MobileFlag | 8,
166
167
LLVMPipe = SoftwareFlag | 1,
168
SwiftShader = SoftwareFlag | 2,
169
};
170
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUDriverType);
171
172
class GPUShader
173
{
174
public:
175
explicit GPUShader(GPUShaderStage stage);
176
virtual ~GPUShader();
177
178
static const char* GetStageName(GPUShaderStage stage);
179
180
ALWAYS_INLINE GPUShaderStage GetStage() const { return m_stage; }
181
182
#ifdef ENABLE_GPU_OBJECT_NAMES
183
virtual void SetDebugName(std::string_view name) = 0;
184
template<typename... T>
185
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
186
{
187
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
188
}
189
#endif
190
191
protected:
192
GPUShaderStage m_stage;
193
};
194
195
class GPUPipeline
196
{
197
public:
198
enum class Layout : u8
199
{
200
// 1 streamed UBO, 1 texture in PS.
201
SingleTextureAndUBO,
202
203
// 128 byte UBO via push constants, 1 texture.
204
SingleTextureAndPushConstants,
205
206
// 128 byte UBO via push constants, 1 texture buffer/SSBO.
207
SingleTextureBufferAndPushConstants,
208
209
// Multiple textures, 1 streamed UBO.
210
MultiTextureAndUBO,
211
212
// Multiple textures, 128 byte UBO via push constants.
213
MultiTextureAndPushConstants,
214
215
// Multiple textures, 1 streamed UBO, compute shader.
216
ComputeMultiTextureAndUBO,
217
218
// 128 byte UBO via push constants, multiple textures, compute shader.
219
ComputeMultiTextureAndPushConstants,
220
221
MaxCount
222
};
223
224
enum RenderPassFlag : u8
225
{
226
NoRenderPassFlags = 0,
227
ColorFeedbackLoop = (1 << 0),
228
SampleDepthBuffer = (1 << 1),
229
BindRenderTargetsAsImages = (1 << 2),
230
};
231
232
enum class Primitive : u8
233
{
234
Points,
235
Lines,
236
Triangles,
237
TriangleStrips,
238
239
MaxCount
240
};
241
242
union VertexAttribute
243
{
244
static constexpr u32 MaxAttributes = 16;
245
246
enum class Semantic : u8
247
{
248
Position,
249
TexCoord,
250
Color,
251
252
MaxCount
253
};
254
255
enum class Type : u8
256
{
257
Float,
258
UInt8,
259
SInt8,
260
UNorm8,
261
UInt16,
262
SInt16,
263
UNorm16,
264
UInt32,
265
SInt32,
266
267
MaxCount
268
};
269
270
BitField<u32, u8, 0, 4> index;
271
BitField<u32, Semantic, 4, 2> semantic;
272
BitField<u32, u8, 6, 2> semantic_index;
273
BitField<u32, Type, 8, 4> type;
274
BitField<u32, u8, 12, 3> components;
275
BitField<u32, u16, 16, 16> offset;
276
277
u32 key;
278
279
// clang-format off
280
ALWAYS_INLINE VertexAttribute() = default;
281
ALWAYS_INLINE constexpr VertexAttribute(const VertexAttribute& rhs) = default;
282
ALWAYS_INLINE VertexAttribute& operator=(const VertexAttribute& rhs) = default;
283
ALWAYS_INLINE bool operator==(const VertexAttribute& rhs) const { return key == rhs.key; }
284
ALWAYS_INLINE bool operator!=(const VertexAttribute& rhs) const { return key != rhs.key; }
285
ALWAYS_INLINE bool operator<(const VertexAttribute& rhs) const { return key < rhs.key; }
286
// clang-format on
287
288
static constexpr VertexAttribute Make(u8 index, Semantic semantic, u8 semantic_index, Type type, u8 components,
289
u16 offset)
290
{
291
// Nasty :/ can't access an inactive element of a union here..
292
return VertexAttribute((static_cast<u32>(index) & 0xf) | ((static_cast<u32>(semantic) & 0x3) << 4) |
293
((static_cast<u32>(semantic_index) & 0x3) << 6) | ((static_cast<u32>(type) & 0xf) << 8) |
294
((static_cast<u32>(components) & 0x7) << 12) |
295
((static_cast<u32>(offset) & 0xffff) << 16));
296
}
297
298
private:
299
ALWAYS_INLINE constexpr VertexAttribute(u32 key_) : key(key_) {}
300
};
301
302
struct InputLayout
303
{
304
std::span<const VertexAttribute> vertex_attributes;
305
u32 vertex_stride;
306
307
bool operator==(const InputLayout& rhs) const;
308
bool operator!=(const InputLayout& rhs) const;
309
};
310
311
struct InputLayoutHash
312
{
313
size_t operator()(const InputLayout& il) const;
314
};
315
316
enum class CullMode : u8
317
{
318
None,
319
Front,
320
Back,
321
322
MaxCount
323
};
324
325
enum class DepthFunc : u8
326
{
327
Never,
328
Always,
329
Less,
330
LessEqual,
331
Greater,
332
GreaterEqual,
333
Equal,
334
335
MaxCount
336
};
337
338
enum class BlendFunc : u8
339
{
340
Zero,
341
One,
342
SrcColor,
343
InvSrcColor,
344
DstColor,
345
InvDstColor,
346
SrcAlpha,
347
InvSrcAlpha,
348
SrcAlpha1,
349
InvSrcAlpha1,
350
DstAlpha,
351
InvDstAlpha,
352
ConstantColor,
353
InvConstantColor,
354
355
MaxCount
356
};
357
358
enum class BlendOp : u8
359
{
360
Add,
361
Subtract,
362
ReverseSubtract,
363
Min,
364
Max,
365
366
MaxCount
367
};
368
369
// TODO: purge this?
370
union RasterizationState
371
{
372
u8 key;
373
374
BitField<u8, CullMode, 0, 2> cull_mode;
375
376
// clang-format off
377
ALWAYS_INLINE bool operator==(const RasterizationState& rhs) const { return key == rhs.key; }
378
ALWAYS_INLINE bool operator!=(const RasterizationState& rhs) const { return key != rhs.key; }
379
ALWAYS_INLINE bool operator<(const RasterizationState& rhs) const { return key < rhs.key; }
380
// clang-format on
381
382
static RasterizationState GetNoCullState();
383
};
384
385
union DepthState
386
{
387
u8 key;
388
389
BitField<u8, DepthFunc, 0, 3> depth_test;
390
BitField<u8, bool, 4, 1> depth_write;
391
392
// clang-format off
393
ALWAYS_INLINE bool operator==(const DepthState& rhs) const { return key == rhs.key; }
394
ALWAYS_INLINE bool operator!=(const DepthState& rhs) const { return key != rhs.key; }
395
ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; }
396
// clang-format on
397
398
static DepthState GetNoTestsState();
399
static DepthState GetAlwaysWriteState();
400
};
401
402
union BlendState
403
{
404
u64 key;
405
406
BitField<u64, bool, 0, 1> enable;
407
BitField<u64, BlendFunc, 1, 4> src_blend;
408
BitField<u64, BlendFunc, 5, 4> src_alpha_blend;
409
BitField<u64, BlendFunc, 9, 4> dst_blend;
410
BitField<u64, BlendFunc, 13, 4> dst_alpha_blend;
411
BitField<u64, BlendOp, 17, 3> blend_op;
412
BitField<u64, BlendOp, 20, 3> alpha_blend_op;
413
BitField<u64, bool, 24, 1> write_r;
414
BitField<u64, bool, 25, 1> write_g;
415
BitField<u64, bool, 26, 1> write_b;
416
BitField<u64, bool, 27, 1> write_a;
417
BitField<u64, u8, 24, 4> write_mask;
418
BitField<u64, u32, 32, 32> constant;
419
420
BitField<u64, u16, 1, 16> blend_factors;
421
BitField<u64, u8, 17, 6> blend_ops;
422
423
// clang-format off
424
ALWAYS_INLINE bool operator==(const BlendState& rhs) const { return key == rhs.key; }
425
ALWAYS_INLINE bool operator!=(const BlendState& rhs) const { return key != rhs.key; }
426
ALWAYS_INLINE bool operator<(const BlendState& rhs) const { return key < rhs.key; }
427
// clang-format on
428
429
// clang-format off
430
ALWAYS_INLINE float GetConstantRed() const { return static_cast<float>(constant.GetValue() & 0xFF) / 255.0f; }
431
ALWAYS_INLINE float GetConstantGreen() const { return static_cast<float>((constant.GetValue() >> 8) & 0xFF) / 255.0f; }
432
ALWAYS_INLINE float GetConstantBlue() const { return static_cast<float>((constant.GetValue() >> 16) & 0xFF) / 255.0f; }
433
ALWAYS_INLINE float GetConstantAlpha() const { return static_cast<float>((constant.GetValue() >> 24) & 0xFF) / 255.0f; }
434
// clang-format on
435
ALWAYS_INLINE std::array<float, 4> GetConstantFloatColor() const
436
{
437
return std::array<float, 4>{GetConstantRed(), GetConstantGreen(), GetConstantBlue(), GetConstantAlpha()};
438
}
439
440
static BlendState GetNoBlendingState();
441
static BlendState GetAlphaBlendingState();
442
};
443
444
struct GraphicsConfig
445
{
446
Layout layout;
447
448
Primitive primitive;
449
InputLayout input_layout;
450
451
RasterizationState rasterization;
452
DepthState depth;
453
BlendState blend;
454
455
GPUShader* vertex_shader;
456
GPUShader* geometry_shader;
457
GPUShader* fragment_shader;
458
459
GPUTexture::Format color_formats[4];
460
GPUTexture::Format depth_format;
461
u8 samples;
462
bool per_sample_shading;
463
RenderPassFlag render_pass_flags;
464
465
void SetTargetFormats(GPUTexture::Format color_format,
466
GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown);
467
u32 GetRenderTargetCount() const;
468
};
469
470
struct ComputeConfig
471
{
472
Layout layout;
473
GPUShader* compute_shader;
474
};
475
476
GPUPipeline();
477
virtual ~GPUPipeline();
478
479
#ifdef ENABLE_GPU_OBJECT_NAMES
480
virtual void SetDebugName(std::string_view name) = 0;
481
template<typename... T>
482
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
483
{
484
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
485
}
486
#endif
487
};
488
489
class GPUTextureBuffer
490
{
491
public:
492
enum class Format
493
{
494
R16UI,
495
496
MaxCount
497
};
498
499
GPUTextureBuffer(Format format, u32 size_in_elements);
500
virtual ~GPUTextureBuffer();
501
502
static u32 GetElementSize(Format format);
503
504
ALWAYS_INLINE Format GetFormat() const { return m_format; }
505
ALWAYS_INLINE u32 GetSizeInElements() const { return m_size_in_elements; }
506
ALWAYS_INLINE u32 GetSizeInBytes() const { return m_size_in_elements * GetElementSize(m_format); }
507
ALWAYS_INLINE u32 GetCurrentPosition() const { return m_current_position; }
508
509
virtual void* Map(u32 required_elements) = 0;
510
virtual void Unmap(u32 used_elements) = 0;
511
512
#ifdef ENABLE_GPU_OBJECT_NAMES
513
virtual void SetDebugName(std::string_view name) = 0;
514
template<typename... T>
515
void SetDebugName(fmt::format_string<T...> fmt, T&&... args)
516
{
517
SetDebugName(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
518
}
519
#endif
520
521
protected:
522
Format m_format;
523
u32 m_size_in_elements;
524
u32 m_current_position = 0;
525
};
526
527
class GPUSwapChain
528
{
529
public:
530
GPUSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle);
531
virtual ~GPUSwapChain();
532
533
ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; }
534
ALWAYS_INLINE u32 GetWidth() const { return m_window_info.surface_width; }
535
ALWAYS_INLINE u32 GetHeight() const { return m_window_info.surface_height; }
536
ALWAYS_INLINE u32 GetPostRotatedWidth() const { return m_window_info.GetPostRotatedWidth(); }
537
ALWAYS_INLINE u32 GetPostRotatedHeight() const { return m_window_info.GetPostRotatedHeight(); }
538
ALWAYS_INLINE float GetScale() const { return m_window_info.surface_scale; }
539
ALWAYS_INLINE WindowInfo::PreRotation GetPreRotation() const { return m_window_info.surface_prerotation; }
540
ALWAYS_INLINE GPUTexture::Format GetFormat() const { return m_window_info.surface_format; }
541
ALWAYS_INLINE GSVector2i GetSizeVec() const
542
{
543
return GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
544
}
545
ALWAYS_INLINE GSVector2i GetPostRotatedSizeVec() const
546
{
547
return GSVector2i(m_window_info.GetPostRotatedWidth(), m_window_info.GetPostRotatedHeight());
548
}
549
550
ALWAYS_INLINE GPUVSyncMode GetVSyncMode() const { return m_vsync_mode; }
551
ALWAYS_INLINE bool IsVSyncModeBlocking() const { return (m_vsync_mode == GPUVSyncMode::FIFO); }
552
ALWAYS_INLINE bool IsPresentThrottleAllowed() const { return m_allow_present_throttle; }
553
554
virtual bool ResizeBuffers(u32 new_width, u32 new_height, float new_scale, Error* error) = 0;
555
virtual bool SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle, Error* error) = 0;
556
557
/// Returns true if exclusive fullscreen is currently active on this swap chain.
558
virtual bool IsExclusiveFullscreen() const;
559
560
bool ShouldSkipPresentingFrame();
561
void ThrottlePresentation();
562
563
static GSVector4i PreRotateClipRect(WindowInfo::PreRotation prerotation, const GSVector2i surface_size,
564
const GSVector4i& v);
565
566
protected:
567
// TODO: Merge WindowInfo into this struct...
568
WindowInfo m_window_info;
569
570
GPUVSyncMode m_vsync_mode = GPUVSyncMode::Disabled;
571
bool m_allow_present_throttle = false;
572
573
u64 m_last_frame_displayed_time = 0;
574
};
575
576
class GPUDevice
577
{
578
public:
579
friend GPUTexture;
580
581
using DrawIndex = u16;
582
583
enum class CreateFlags : u32
584
{
585
None = 0,
586
PreferGLESContext = (1 << 0),
587
EnableDebugDevice = (1 << 1),
588
EnableGPUValidation = (1 << 2),
589
DisableDualSourceBlend = (1 << 3),
590
DisableFeedbackLoops = (1 << 4),
591
DisableFramebufferFetch = (1 << 5),
592
DisableTextureBuffers = (1 << 6),
593
DisableGeometryShaders = (1 << 7),
594
DisableComputeShaders = (1 << 8),
595
DisableTextureCopyToSelf = (1 << 9),
596
DisableMemoryImport = (1 << 10),
597
DisableRasterOrderViews = (1 << 11),
598
DisableCompressedTextures = (1 << 12),
599
};
600
601
enum class DrawBarrier : u32
602
{
603
None,
604
One,
605
Full
606
};
607
608
enum class PresentResult : u32
609
{
610
OK,
611
SkipPresent,
612
ExclusiveFullscreenLost,
613
DeviceLost,
614
};
615
616
struct Features
617
{
618
bool dual_source_blend : 1;
619
bool framebuffer_fetch : 1;
620
bool per_sample_shading : 1;
621
bool noperspective_interpolation : 1;
622
bool texture_copy_to_self : 1;
623
bool texture_buffers : 1;
624
bool texture_buffers_emulated_with_ssbo : 1;
625
bool feedback_loops : 1;
626
bool geometry_shaders : 1;
627
bool compute_shaders : 1;
628
bool partial_msaa_resolve : 1;
629
bool memory_import : 1;
630
bool exclusive_fullscreen : 1;
631
bool explicit_present : 1;
632
bool timed_present : 1;
633
bool gpu_timing : 1;
634
bool shader_cache : 1;
635
bool pipeline_cache : 1;
636
bool prefer_unused_textures : 1;
637
bool raster_order_views : 1;
638
bool dxt_textures : 1;
639
bool bptc_textures : 1;
640
};
641
642
struct Statistics
643
{
644
size_t buffer_streamed;
645
u32 num_draws;
646
u32 num_barriers;
647
u32 num_render_passes;
648
u32 num_copies;
649
u32 num_downloads;
650
u32 num_uploads;
651
};
652
653
// Parameters for exclusive fullscreen.
654
struct ExclusiveFullscreenMode
655
{
656
u32 width;
657
u32 height;
658
float refresh_rate;
659
660
TinyString ToString() const;
661
662
static std::optional<ExclusiveFullscreenMode> Parse(std::string_view str);
663
};
664
665
struct AdapterInfo
666
{
667
std::string name;
668
std::vector<ExclusiveFullscreenMode> fullscreen_modes;
669
u32 max_texture_size;
670
u32 max_multisamples;
671
GPUDriverType driver_type;
672
bool supports_sample_shading;
673
};
674
using AdapterInfoList = std::vector<AdapterInfo>;
675
676
struct PooledTextureDeleter
677
{
678
void operator()(GPUTexture* const tex);
679
};
680
using AutoRecycleTexture = std::unique_ptr<GPUTexture, PooledTextureDeleter>;
681
682
static constexpr u32 MAX_TEXTURE_SAMPLERS = 8;
683
static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512;
684
static constexpr u32 MAX_RENDER_TARGETS = 4;
685
static constexpr u32 MAX_IMAGE_RENDER_TARGETS = 2;
686
static constexpr u32 DEFAULT_CLEAR_COLOR = 0xFF000000u;
687
static constexpr u32 PIPELINE_CACHE_HASH_SIZE = 20;
688
static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS);
689
690
GPUDevice();
691
virtual ~GPUDevice();
692
693
/// Returns the default/preferred API for the system.
694
static RenderAPI GetPreferredAPI();
695
696
/// Returns a string representing the specified API.
697
static const char* RenderAPIToString(RenderAPI api);
698
699
/// Returns a string representing the specified language.
700
static const char* ShaderLanguageToString(GPUShaderLanguage language);
701
702
/// Returns a string representing the specified vsync mode.
703
static const char* VSyncModeToString(GPUVSyncMode mode);
704
705
/// Returns a new device for the specified API.
706
static std::unique_ptr<GPUDevice> CreateDeviceForAPI(RenderAPI api);
707
708
/// Returns true if the render API is the same (e.g. GLES and GL).
709
static bool IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs);
710
711
/// Returns a list of adapters for the given API.
712
static AdapterInfoList GetAdapterListForAPI(RenderAPI api);
713
714
/// Dumps out a shader that failed compilation.
715
static void DumpBadShader(std::string_view code, std::string_view errors);
716
717
/// Converts a RGBA8 value to 4 floating-point values.
718
static std::array<float, 4> RGBA8ToFloat(u32 rgba);
719
720
/// Returns true if the given device creation flag is present.
721
static constexpr bool HasCreateFlag(CreateFlags flags, CreateFlags flag)
722
{
723
return ((static_cast<u32>(flags) & static_cast<u32>(flag)) != 0);
724
}
725
726
/// Returns the number of texture bindings for a given pipeline layout.
727
static constexpr u32 GetActiveTexturesForLayout(GPUPipeline::Layout layout)
728
{
729
constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> counts = {
730
1, // SingleTextureAndUBO
731
1, // SingleTextureAndPushConstants
732
0, // SingleTextureBufferAndPushConstants
733
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
734
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
735
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndUBO
736
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndPushConstants
737
};
738
739
return counts[static_cast<u8>(layout)];
740
}
741
742
/// Returns true if the given pipeline layout is used for compute shaders.
743
static constexpr bool IsComputeLayout(GPUPipeline::Layout layout)
744
{
745
return (layout >= GPUPipeline::Layout::ComputeMultiTextureAndUBO);
746
}
747
748
/// Returns the number of thread groups to dispatch for a given total count and local size.
749
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
750
u32 local_size_y, u32 local_size_z)
751
{
752
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
753
(count_z + (local_size_z - 1)) / local_size_z);
754
}
755
756
/// Determines the driver type for a given adapter.
757
static GPUDriverType GuessDriverType(u32 pci_vendor_id, std::string_view vendor_name, std::string_view adapter_name);
758
759
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
760
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
761
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
762
ALWAYS_INLINE u32 GetMaxTextureSize() const { return m_max_texture_size; }
763
ALWAYS_INLINE u32 GetMaxMultisamples() const { return m_max_multisamples; }
764
765
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
766
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
767
768
ALWAYS_INLINE GPUTexture* GetEmptyTexture() const { return m_empty_texture.get(); }
769
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler; }
770
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler; }
771
772
ALWAYS_INLINE bool IsGPUTimingEnabled() const { return m_gpu_timing_enabled; }
773
774
bool Create(std::string_view adapter, CreateFlags create_flags, std::string_view shader_dump_path,
775
std::string_view shader_cache_path, u32 shader_cache_version, const WindowInfo& wi, GPUVSyncMode vsync,
776
bool allow_present_throttle, const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
777
std::optional<bool> exclusive_fullscreen_control, Error* error);
778
void Destroy();
779
780
virtual std::unique_ptr<GPUSwapChain> CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode,
781
bool allow_present_throttle,
782
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
783
std::optional<bool> exclusive_fullscreen_control,
784
Error* error) = 0;
785
virtual bool SwitchToSurfacelessRendering(Error* error);
786
787
bool RecreateMainSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle,
788
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
789
std::optional<bool> exclusive_fullscreen_control, Error* error);
790
void DestroyMainSwapChain();
791
792
virtual std::string GetDriverInfo() const = 0;
793
794
// Flushes current command buffer, but does not wait for completion.
795
virtual void FlushCommands() = 0;
796
797
// Executes current command buffer, waits for its completion, and destroys all pending resources.
798
virtual void WaitForGPUIdle() = 0;
799
800
virtual std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
801
GPUTexture::Type type, GPUTexture::Format format,
802
GPUTexture::Flags flags, const void* data = nullptr,
803
u32 data_stride = 0, Error* error = nullptr) = 0;
804
virtual std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config, Error* error = nullptr) = 0;
805
virtual std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements,
806
Error* error = nullptr) = 0;
807
808
GPUSampler* GetSampler(const GPUSampler::Config& config, Error* error = nullptr);
809
810
// Texture pooling.
811
std::unique_ptr<GPUTexture> FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
812
GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags,
813
const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr);
814
AutoRecycleTexture FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
815
GPUTexture::Type type, GPUTexture::Format format, GPUTexture::Flags flags,
816
const void* data = nullptr, u32 data_stride = 0, Error* error = nullptr);
817
std::unique_ptr<GPUTexture> FetchAndUploadTextureImage(const Image& image,
818
GPUTexture::Flags flags = GPUTexture::Flags::None,
819
Error* error = nullptr);
820
void RecycleTexture(std::unique_ptr<GPUTexture> texture);
821
void PurgeTexturePool();
822
823
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
824
Error* error = nullptr) = 0;
825
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
826
void* memory, size_t memory_size, u32 memory_stride,
827
Error* error = nullptr) = 0;
828
829
virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
830
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0;
831
virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
832
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0;
833
virtual void ClearRenderTarget(GPUTexture* t, u32 c);
834
virtual void ClearDepth(GPUTexture* t, float d);
835
virtual void InvalidateRenderTarget(GPUTexture* t);
836
837
/// Shader abstraction.
838
std::unique_ptr<GPUShader> CreateShader(GPUShaderStage stage, GPUShaderLanguage language, std::string_view source,
839
Error* error = nullptr, const char* entry_point = "main");
840
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
841
Error* error = nullptr) = 0;
842
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
843
Error* error = nullptr) = 0;
844
845
#ifdef ENABLE_GPU_OBJECT_NAMES
846
/// Debug messaging.
847
virtual void PushDebugGroup(const char* name) = 0;
848
virtual void PopDebugGroup() = 0;
849
virtual void InsertDebugMessage(const char* msg) = 0;
850
851
/// Formatted debug variants.
852
template<typename... T>
853
void PushDebugGroup(fmt::format_string<T...> fmt, T&&... args)
854
{
855
PushDebugGroup(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
856
}
857
template<typename... T>
858
void InsertDebugMessage(fmt::format_string<T...> fmt, T&&... args)
859
{
860
InsertDebugMessage(TinyString::from_vformat(fmt, fmt::make_format_args(args...)));
861
}
862
#endif
863
864
/// Vertex/index buffer abstraction.
865
virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
866
u32* map_base_vertex) = 0;
867
virtual void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) = 0;
868
virtual void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) = 0;
869
virtual void UnmapIndexBuffer(u32 used_size) = 0;
870
871
void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex);
872
void UploadIndexBuffer(const DrawIndex* indices, u32 index_count, u32* base_index);
873
874
/// Uniform buffer abstraction.
875
virtual void PushUniformBuffer(const void* data, u32 data_size) = 0;
876
virtual void* MapUniformBuffer(u32 size) = 0;
877
virtual void UnmapUniformBuffer(u32 size) = 0;
878
void UploadUniformBuffer(const void* data, u32 data_size);
879
880
/// Drawing setup abstraction.
881
virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
882
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) = 0;
883
virtual void SetPipeline(GPUPipeline* pipeline) = 0;
884
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0;
885
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
886
virtual void SetViewport(const GSVector4i rc) = 0;
887
virtual void SetScissor(const GSVector4i rc) = 0;
888
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
889
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags);
890
void SetViewport(s32 x, s32 y, s32 width, s32 height);
891
void SetScissor(s32 x, s32 y, s32 width, s32 height);
892
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);
893
void SetViewportAndScissor(const GSVector4i rc);
894
895
// Drawing abstraction.
896
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
897
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
898
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
899
virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
900
u32 group_size_z) = 0;
901
902
/// Returns false if the window was completely occluded.
903
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
904
virtual void EndPresent(GPUSwapChain* swap_chain, bool explicit_submit, u64 submit_time = 0) = 0;
905
virtual void SubmitPresent(GPUSwapChain* swap_chain) = 0;
906
907
ALWAYS_INLINE bool IsDebugDevice() const { return m_debug_device; }
908
ALWAYS_INLINE size_t GetVRAMUsage() const { return s_total_vram_usage; }
909
910
bool UsesLowerLeftOrigin() const;
911
static GSVector4i FlipToLowerLeft(GSVector4i rc, s32 target_height);
912
bool ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type,
913
GPUTexture::Format format, GPUTexture::Flags flags, bool preserve = true, Error* error = nullptr);
914
bool ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type,
915
GPUTexture::Format format, GPUTexture::Flags flags, const void* replace_data,
916
u32 replace_data_pitch, Error* error = nullptr);
917
918
virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0;
919
920
/// Enables/disables GPU frame timing.
921
virtual bool SetGPUTimingEnabled(bool enabled);
922
923
/// Returns the amount of GPU time utilized since the last time this method was called.
924
virtual float GetAndResetAccumulatedGPUTime();
925
926
ALWAYS_INLINE static Statistics& GetStatistics() { return s_stats; }
927
static void ResetStatistics();
928
929
protected:
930
virtual bool CreateDeviceAndMainSwapChain(std::string_view adapter, CreateFlags create_flags, const WindowInfo& wi,
931
GPUVSyncMode vsync_mode, bool allow_present_throttle,
932
const ExclusiveFullscreenMode* exclusive_fullscreen_mode,
933
std::optional<bool> exclusive_fullscreen_control, Error* error) = 0;
934
virtual void DestroyDevice() = 0;
935
936
std::string GetShaderCacheBaseName(std::string_view type) const;
937
virtual bool OpenPipelineCache(const std::string& path, Error* error);
938
virtual bool CreatePipelineCache(const std::string& path, Error* error);
939
virtual bool ReadPipelineCache(DynamicHeapArray<u8> data, Error* error);
940
virtual bool GetPipelineCacheData(DynamicHeapArray<u8>* data, Error* error);
941
virtual bool ClosePipelineCache(const std::string& path, Error* error);
942
943
virtual std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
944
Error* error) = 0;
945
virtual std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language,
946
std::string_view source, const char* entry_point,
947
DynamicHeapArray<u8>* out_binary, Error* error) = 0;
948
949
void TrimTexturePool();
950
951
bool CompileGLSLShaderToVulkanSpv(GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source,
952
const char* entry_point, bool optimization, bool nonsemantic_debug_info,
953
DynamicHeapArray<u8>* out_binary, Error* error);
954
bool TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GPUShaderStage stage,
955
GPUShaderLanguage target_language, u32 target_version, std::string* output,
956
Error* error);
957
std::unique_ptr<GPUShader> TranspileAndCreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage source_language,
958
std::string_view source, const char* entry_point,
959
GPUShaderLanguage target_language, u32 target_version,
960
DynamicHeapArray<u8>* out_binary, Error* error);
961
static std::optional<DynamicHeapArray<u8>> OptimizeVulkanSpv(const std::span<const u8> spirv, Error* error);
962
963
void SetDriverType(GPUDriverType type);
964
965
Features m_features = {};
966
RenderAPI m_render_api = RenderAPI::None;
967
u32 m_render_api_version = 0;
968
u32 m_max_texture_size = 0;
969
GPUDriverType m_driver_type = GPUDriverType::Unknown;
970
u16 m_max_multisamples = 0;
971
972
std::unique_ptr<GPUSwapChain> m_main_swap_chain;
973
std::unique_ptr<GPUTexture> m_empty_texture;
974
GPUSampler* m_nearest_sampler = nullptr;
975
GPUSampler* m_linear_sampler = nullptr;
976
977
GPUShaderCache m_shader_cache;
978
979
private:
980
static constexpr u32 MAX_TEXTURE_POOL_SIZE = 125;
981
static constexpr u32 MAX_TARGET_POOL_SIZE = 50;
982
static constexpr u32 POOL_PURGE_DELAY = 300;
983
984
struct TexturePoolKey
985
{
986
u16 width;
987
u16 height;
988
u8 layers;
989
u8 levels;
990
u8 samples;
991
GPUTexture::Type type;
992
GPUTexture::Format format;
993
GPUTexture::Flags flags;
994
995
ALWAYS_INLINE bool operator==(const TexturePoolKey& rhs) const
996
{
997
return std::memcmp(this, &rhs, sizeof(TexturePoolKey)) == 0;
998
}
999
ALWAYS_INLINE bool operator!=(const TexturePoolKey& rhs) const
1000
{
1001
return std::memcmp(this, &rhs, sizeof(TexturePoolKey)) != 0;
1002
}
1003
};
1004
struct TexturePoolEntry
1005
{
1006
std::unique_ptr<GPUTexture> texture;
1007
u32 use_counter;
1008
TexturePoolKey key;
1009
};
1010
1011
using TexturePool = std::deque<TexturePoolEntry>;
1012
using SamplerMap = std::unordered_map<u64, std::unique_ptr<GPUSampler>>;
1013
1014
#ifdef __APPLE__
1015
// We have to define these in the base class, because they're in Objective C++.
1016
static std::unique_ptr<GPUDevice> WrapNewMetalDevice();
1017
static AdapterInfoList WrapGetMetalAdapterList();
1018
#endif
1019
1020
void OpenShaderCache(std::string_view base_path, u32 version);
1021
void CloseShaderCache();
1022
bool CreateResources(Error* error);
1023
void DestroyResources();
1024
static bool IsTexturePoolType(GPUTexture::Type type);
1025
1026
static size_t s_total_vram_usage;
1027
1028
SamplerMap m_sampler_map;
1029
1030
TexturePool m_texture_pool;
1031
TexturePool m_target_pool;
1032
size_t m_pool_vram_usage = 0;
1033
u32 m_texture_pool_counter = 0;
1034
1035
protected:
1036
static Statistics s_stats;
1037
1038
bool m_gpu_timing_enabled = false;
1039
bool m_debug_device = false;
1040
};
1041
1042
extern std::unique_ptr<GPUDevice> g_gpu_device;
1043
1044
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUDevice::CreateFlags);
1045
1046
ALWAYS_INLINE void GPUDevice::PooledTextureDeleter::operator()(GPUTexture* const tex)
1047
{
1048
g_gpu_device->RecycleTexture(std::unique_ptr<GPUTexture>(tex));
1049
}
1050
1051
// C preprocessor workarounds.
1052
#define GL_TOKEN_PASTE(x, y) x##y
1053
#define GL_TOKEN_PASTE2(x, y) GL_TOKEN_PASTE(x, y)
1054
1055
// Macros for debug messages.
1056
#ifdef ENABLE_GPU_OBJECT_NAMES
1057
struct GLAutoPop
1058
{
1059
GLAutoPop(const char* name)
1060
{
1061
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1062
g_gpu_device->PushDebugGroup(name);
1063
}
1064
1065
template<typename... T>
1066
GLAutoPop(fmt::format_string<T...> fmt, T&&... args)
1067
{
1068
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1069
g_gpu_device->PushDebugGroup(SmallString::from_vformat(fmt, fmt::make_format_args(args...)));
1070
}
1071
1072
~GLAutoPop()
1073
{
1074
if (g_gpu_device->IsDebugDevice()) [[unlikely]]
1075
g_gpu_device->PopDebugGroup();
1076
}
1077
};
1078
1079
#define GL_SCOPE(name) GLAutoPop GL_TOKEN_PASTE2(gl_auto_pop_, __LINE__)(name)
1080
#define GL_INS(msg) \
1081
do \
1082
{ \
1083
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1084
g_gpu_device->InsertDebugMessage(msg); \
1085
} while (0)
1086
#define GL_OBJECT_NAME(obj, name) \
1087
do \
1088
{ \
1089
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1090
(obj)->SetDebugName(name); \
1091
} while (0)
1092
1093
#define GL_SCOPE_FMT(...) GLAutoPop GL_TOKEN_PASTE2(gl_auto_pop_, __LINE__)(__VA_ARGS__)
1094
#define GL_INS_FMT(...) \
1095
do \
1096
{ \
1097
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1098
g_gpu_device->InsertDebugMessage(__VA_ARGS__); \
1099
} while (0)
1100
#define GL_OBJECT_NAME_FMT(obj, ...) \
1101
do \
1102
{ \
1103
if (g_gpu_device->IsDebugDevice()) [[unlikely]] \
1104
(obj)->SetDebugName(__VA_ARGS__); \
1105
} while (0)
1106
#else
1107
#define GL_SCOPE(name) (void)0
1108
#define GL_INS(msg) (void)0
1109
#define GL_OBJECT_NAME(obj, name) (void)0
1110
1111
#define GL_SCOPE_FMT(...) (void)0
1112
#define GL_INS_FMT(...) (void)0
1113
#define GL_OBJECT_NAME_FMT(obj, ...) (void)0
1114
#endif
1115
1116