Path: blob/master/drivers/d3d12/rendering_device_driver_d3d12.cpp
9973 views
/**************************************************************************/1/* rendering_device_driver_d3d12.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "rendering_device_driver_d3d12.h"3132#include "d3d12_hooks.h"3334#include "core/config/project_settings.h"35#include "core/io/marshalls.h"36#include "servers/rendering/rendering_device.h"37#include "thirdparty/zlib/zlib.h"3839#include "d3d12_godot_nir_bridge.h"40#include "rendering_context_driver_d3d12.h"4142GODOT_GCC_WARNING_PUSH43GODOT_GCC_WARNING_IGNORE("-Wimplicit-fallthrough")44GODOT_GCC_WARNING_IGNORE("-Wlogical-not-parentheses")45GODOT_GCC_WARNING_IGNORE("-Wmissing-field-initializers")46GODOT_GCC_WARNING_IGNORE("-Wnon-virtual-dtor")47GODOT_GCC_WARNING_IGNORE("-Wshadow")48GODOT_GCC_WARNING_IGNORE("-Wswitch")49GODOT_CLANG_WARNING_PUSH50GODOT_CLANG_WARNING_IGNORE("-Wimplicit-fallthrough")51GODOT_CLANG_WARNING_IGNORE("-Wlogical-not-parentheses")52GODOT_CLANG_WARNING_IGNORE("-Wmissing-field-initializers")53GODOT_CLANG_WARNING_IGNORE("-Wnon-virtual-dtor")54GODOT_CLANG_WARNING_IGNORE("-Wstring-plus-int")55GODOT_CLANG_WARNING_IGNORE("-Wswitch")56GODOT_MSVC_WARNING_PUSH57GODOT_MSVC_WARNING_IGNORE(4200) // "nonstandard extension used: zero-sized array in struct/union".58GODOT_MSVC_WARNING_IGNORE(4806) // "'&': unsafe operation: no value of type 'bool' promoted to type 'uint32_t' can equal the given constant".5960#include <nir_spirv.h>61#include <nir_to_dxil.h>62#include <spirv_to_dxil.h>63extern "C" {64#include <dxil_spirv_nir.h>65}6667GODOT_GCC_WARNING_POP68GODOT_CLANG_WARNING_POP69GODOT_MSVC_WARNING_POP7071#if !defined(_MSC_VER)72#include <guiddef.h>7374#include <dxguids.h>75#endif7677// Mesa may define this.78#ifdef UNUSED79#undef UNUSED80#endif8182#ifdef PIX_ENABLED83#if defined(__GNUC__)84#define _MSC_VER 180085#endif86#define USE_PIX87#include "WinPixEventRuntime/pix3.h"88#if defined(__GNUC__)89#undef _MSC_VER90#endif91#endif9293static const D3D12_RANGE VOID_RANGE = {};9495/*****************/96/**** GENERIC ****/97/*****************/9899// NOTE: RD's packed format names are reversed in relation to DXGI's; e.g.:.100// - DATA_FORMAT_A8B8G8R8_UNORM_PACK32 -> DXGI_FORMAT_R8G8B8A8_UNORM (packed; note ABGR vs. RGBA).101// - DATA_FORMAT_B8G8R8A8_UNORM -> DXGI_FORMAT_B8G8R8A8_UNORM (not packed; note BGRA order matches).102// TODO: Add YUV formats properly, which would require better support for planes in the RD API.103104const RenderingDeviceDriverD3D12::D3D12Format RenderingDeviceDriverD3D12::RD_TO_D3D12_FORMAT[RDD::DATA_FORMAT_MAX] = {105/* DATA_FORMAT_R4G4_UNORM_PACK8 */ {},106/* DATA_FORMAT_R4G4B4A4_UNORM_PACK16 */ { DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(1, 2, 3, 0) },107/* DATA_FORMAT_B4G4R4A4_UNORM_PACK16 */ { DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(3, 2, 1, 0) },108/* DATA_FORMAT_R5G6B5_UNORM_PACK16 */ { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM },109/* DATA_FORMAT_B5G6R5_UNORM_PACK16 */ { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3) },110/* DATA_FORMAT_R5G5B5A1_UNORM_PACK16 */ { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(1, 2, 3, 0) },111/* DATA_FORMAT_B5G5R5A1_UNORM_PACK16 */ { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(3, 2, 1, 0) },112/* DATA_FORMAT_A1R5G5B5_UNORM_PACK16 */ { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM },113/* DATA_FORMAT_R8_UNORM */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM },114/* DATA_FORMAT_R8_SNORM */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_SNORM },115/* DATA_FORMAT_R8_USCALED */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT },116/* DATA_FORMAT_R8_SSCALED */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_SINT },117/* DATA_FORMAT_R8_UINT */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT },118/* DATA_FORMAT_R8_SINT */ { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_SINT },119/* DATA_FORMAT_R8_SRGB */ {},120/* DATA_FORMAT_R8G8_UNORM */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM },121/* DATA_FORMAT_R8G8_SNORM */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_SNORM },122/* DATA_FORMAT_R8G8_USCALED */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT },123/* DATA_FORMAT_R8G8_SSCALED */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_SINT },124/* DATA_FORMAT_R8G8_UINT */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT },125/* DATA_FORMAT_R8G8_SINT */ { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_SINT },126/* DATA_FORMAT_R8G8_SRGB */ {},127/* DATA_FORMAT_R8G8B8_UNORM */ {},128/* DATA_FORMAT_R8G8B8_SNORM */ {},129/* DATA_FORMAT_R8G8B8_USCALED */ {},130/* DATA_FORMAT_R8G8B8_SSCALED */ {},131/* DATA_FORMAT_R8G8B8_UINT */ {},132/* DATA_FORMAT_R8G8B8_SINT */ {},133/* DATA_FORMAT_R8G8B8_SRGB */ {},134/* DATA_FORMAT_B8G8R8_UNORM */ {},135/* DATA_FORMAT_B8G8R8_SNORM */ {},136/* DATA_FORMAT_B8G8R8_USCALED */ {},137/* DATA_FORMAT_B8G8R8_SSCALED */ {},138/* DATA_FORMAT_B8G8R8_UINT */ {},139/* DATA_FORMAT_B8G8R8_SINT */ {},140/* DATA_FORMAT_B8G8R8_SRGB */ {},141/* DATA_FORMAT_R8G8B8A8_UNORM */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM },142/* DATA_FORMAT_R8G8B8A8_SNORM */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SNORM },143/* DATA_FORMAT_R8G8B8A8_USCALED */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },144/* DATA_FORMAT_R8G8B8A8_SSCALED */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },145/* DATA_FORMAT_R8G8B8A8_UINT */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },146/* DATA_FORMAT_R8G8B8A8_SINT */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },147/* DATA_FORMAT_R8G8B8A8_SRGB */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB },148/* DATA_FORMAT_B8G8R8A8_UNORM */ { DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM },149/* DATA_FORMAT_B8G8R8A8_SNORM */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SNORM },150/* DATA_FORMAT_B8G8R8A8_USCALED */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },151/* DATA_FORMAT_B8G8R8A8_SSCALED */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },152/* DATA_FORMAT_B8G8R8A8_UINT */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },153/* DATA_FORMAT_B8G8R8A8_SINT */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },154/* DATA_FORMAT_B8G8R8A8_SRGB */ { DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB },155/* DATA_FORMAT_A8B8G8R8_UNORM_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM },156/* DATA_FORMAT_A8B8G8R8_SNORM_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SNORM },157/* DATA_FORMAT_A8B8G8R8_USCALED_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },158/* DATA_FORMAT_A8B8G8R8_SSCALED_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },159/* DATA_FORMAT_A8B8G8R8_UINT_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT },160/* DATA_FORMAT_A8B8G8R8_SINT_PACK32 */ { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_SINT },161/* DATA_FORMAT_A8B8G8R8_SRGB_PACK32 */ { DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB },162/* DATA_FORMAT_A2R10G10B10_UNORM_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3) },163/* DATA_FORMAT_A2R10G10B10_SNORM_PACK32 */ {},164/* DATA_FORMAT_A2R10G10B10_USCALED_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UINT, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3) },165/* DATA_FORMAT_A2R10G10B10_SSCALED_PACK32 */ {},166/* DATA_FORMAT_A2R10G10B10_UINT_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UINT, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(2, 1, 0, 3) },167/* DATA_FORMAT_A2R10G10B10_SINT_PACK32 */ {},168/* DATA_FORMAT_A2B10G10R10_UNORM_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM },169/* DATA_FORMAT_A2B10G10R10_SNORM_PACK32 */ {},170/* DATA_FORMAT_A2B10G10R10_USCALED_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UINT },171/* DATA_FORMAT_A2B10G10R10_SSCALED_PACK32 */ {},172/* DATA_FORMAT_A2B10G10R10_UINT_PACK32 */ { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UINT },173/* DATA_FORMAT_A2B10G10R10_SINT_PACK32 */ {},174/* DATA_FORMAT_R16_UNORM */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM },175/* DATA_FORMAT_R16_SNORM */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_SNORM },176/* DATA_FORMAT_R16_USCALED */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT },177/* DATA_FORMAT_R16_SSCALED */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_SINT },178/* DATA_FORMAT_R16_UINT */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT },179/* DATA_FORMAT_R16_SINT */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_SINT },180/* DATA_FORMAT_R16_SFLOAT */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT },181/* DATA_FORMAT_R16G16_UNORM */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM },182/* DATA_FORMAT_R16G16_SNORM */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_SNORM },183/* DATA_FORMAT_R16G16_USCALED */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT },184/* DATA_FORMAT_R16G16_SSCALED */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_SINT },185/* DATA_FORMAT_R16G16_UINT */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT },186/* DATA_FORMAT_R16G16_SINT */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_SINT },187/* DATA_FORMAT_R16G16_SFLOAT */ { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT },188/* DATA_FORMAT_R16G16B16_UNORM */ {},189/* DATA_FORMAT_R16G16B16_SNORM */ {},190/* DATA_FORMAT_R16G16B16_USCALED */ {},191/* DATA_FORMAT_R16G16B16_SSCALED */ {},192/* DATA_FORMAT_R16G16B16_UINT */ {},193/* DATA_FORMAT_R16G16B16_SINT */ {},194/* DATA_FORMAT_R16G16B16_SFLOAT */ {},195/* DATA_FORMAT_R16G16B16A16_UNORM */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM },196/* DATA_FORMAT_R16G16B16A16_SNORM */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_SNORM },197/* DATA_FORMAT_R16G16B16A16_USCALED */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT },198/* DATA_FORMAT_R16G16B16A16_SSCALED */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_SINT },199/* DATA_FORMAT_R16G16B16A16_UINT */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT },200/* DATA_FORMAT_R16G16B16A16_SINT */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_SINT },201/* DATA_FORMAT_R16G16B16A16_SFLOAT */ { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT },202/* DATA_FORMAT_R32_UINT */ { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_UINT },203/* DATA_FORMAT_R32_SINT */ { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_SINT },204/* DATA_FORMAT_R32_SFLOAT */ { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT },205/* DATA_FORMAT_R32G32_UINT */ { DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_UINT },206/* DATA_FORMAT_R32G32_SINT */ { DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_SINT },207/* DATA_FORMAT_R32G32_SFLOAT */ { DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT },208/* DATA_FORMAT_R32G32B32_UINT */ { DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_UINT },209/* DATA_FORMAT_R32G32B32_SINT */ { DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_SINT },210/* DATA_FORMAT_R32G32B32_SFLOAT */ { DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT },211/* DATA_FORMAT_R32G32B32A32_UINT */ { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_UINT },212/* DATA_FORMAT_R32G32B32A32_SINT */ { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_SINT },213/* DATA_FORMAT_R32G32B32A32_SFLOAT */ { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT },214/* DATA_FORMAT_R64_UINT */ {},215/* DATA_FORMAT_R64_SINT */ {},216/* DATA_FORMAT_R64_SFLOAT */ {},217/* DATA_FORMAT_R64G64_UINT */ {},218/* DATA_FORMAT_R64G64_SINT */ {},219/* DATA_FORMAT_R64G64_SFLOAT */ {},220/* DATA_FORMAT_R64G64B64_UINT */ {},221/* DATA_FORMAT_R64G64B64_SINT */ {},222/* DATA_FORMAT_R64G64B64_SFLOAT */ {},223/* DATA_FORMAT_R64G64B64A64_UINT */ {},224/* DATA_FORMAT_R64G64B64A64_SINT */ {},225/* DATA_FORMAT_R64G64B64A64_SFLOAT */ {},226/* DATA_FORMAT_B10G11R11_UFLOAT_PACK32 */ { DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R11G11B10_FLOAT },227/* DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32 */ { DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_R9G9B9E5_SHAREDEXP },228/* DATA_FORMAT_D16_UNORM */ { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, 0, DXGI_FORMAT_D16_UNORM },229/* DATA_FORMAT_X8_D24_UNORM_PACK32 */ { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_UNKNOWN, 0, DXGI_FORMAT_D24_UNORM_S8_UINT },230/* DATA_FORMAT_D32_SFLOAT */ { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, DXGI_FORMAT_D32_FLOAT },231/* DATA_FORMAT_S8_UINT */ {},232/* DATA_FORMAT_D16_UNORM_S8_UINT */ {},233/* DATA_FORMAT_D24_UNORM_S8_UINT */ { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_UNKNOWN, 0, DXGI_FORMAT_D24_UNORM_S8_UINT },234/* DATA_FORMAT_D32_SFLOAT_S8_UINT */ { DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, DXGI_FORMAT_D32_FLOAT_S8X24_UINT },235/* DATA_FORMAT_BC1_RGB_UNORM_BLOCK */ { DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(0, 1, 2, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1) },236/* DATA_FORMAT_BC1_RGB_SRGB_BLOCK */ { DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM_SRGB, D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(0, 1, 2, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1) },237/* DATA_FORMAT_BC1_RGBA_UNORM_BLOCK */ { DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM },238/* DATA_FORMAT_BC1_RGBA_SRGB_BLOCK */ { DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM_SRGB },239/* DATA_FORMAT_BC2_UNORM_BLOCK */ { DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM },240/* DATA_FORMAT_BC2_SRGB_BLOCK */ { DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM_SRGB },241/* DATA_FORMAT_BC3_UNORM_BLOCK */ { DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM },242/* DATA_FORMAT_BC3_SRGB_BLOCK */ { DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM_SRGB },243/* DATA_FORMAT_BC4_UNORM_BLOCK */ { DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM },244/* DATA_FORMAT_BC4_SNORM_BLOCK */ { DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_SNORM },245/* DATA_FORMAT_BC5_UNORM_BLOCK */ { DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM },246/* DATA_FORMAT_BC5_SNORM_BLOCK */ { DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_SNORM },247/* DATA_FORMAT_BC6H_UFLOAT_BLOCK */ { DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16 },248/* DATA_FORMAT_BC6H_SFLOAT_BLOCK */ { DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_SF16 },249/* DATA_FORMAT_BC7_UNORM_BLOCK */ { DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM },250/* DATA_FORMAT_BC7_SRGB_BLOCK */ { DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM_SRGB },251/* DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK */ {},252/* DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK */ {},253/* DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK */ {},254/* DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK */ {},255/* DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK */ {},256/* DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK */ {},257/* DATA_FORMAT_EAC_R11_UNORM_BLOCK */ {},258/* DATA_FORMAT_EAC_R11_SNORM_BLOCK */ {},259/* DATA_FORMAT_EAC_R11G11_UNORM_BLOCK */ {},260/* DATA_FORMAT_EAC_R11G11_SNORM_BLOCK */ {},261/* DATA_FORMAT_ASTC_4x4_UNORM_BLOCK */ {},262/* DATA_FORMAT_ASTC_4x4_SRGB_BLOCK */ {},263/* DATA_FORMAT_ASTC_5x4_UNORM_BLOCK */ {},264/* DATA_FORMAT_ASTC_5x4_SRGB_BLOCK */ {},265/* DATA_FORMAT_ASTC_5x5_UNORM_BLOCK */ {},266/* DATA_FORMAT_ASTC_5x5_SRGB_BLOCK */ {},267/* DATA_FORMAT_ASTC_6x5_UNORM_BLOCK */ {},268/* DATA_FORMAT_ASTC_6x5_SRGB_BLOCK */ {},269/* DATA_FORMAT_ASTC_6x6_UNORM_BLOCK */ {},270/* DATA_FORMAT_ASTC_6x6_SRGB_BLOCK */ {},271/* DATA_FORMAT_ASTC_8x5_UNORM_BLOCK */ {},272/* DATA_FORMAT_ASTC_8x5_SRGB_BLOCK */ {},273/* DATA_FORMAT_ASTC_8x6_UNORM_BLOCK */ {},274/* DATA_FORMAT_ASTC_8x6_SRGB_BLOCK */ {},275/* DATA_FORMAT_ASTC_8x8_UNORM_BLOCK */ {},276/* DATA_FORMAT_ASTC_8x8_SRGB_BLOCK */ {},277/* DATA_FORMAT_ASTC_10x5_UNORM_BLOCK */ {},278/* DATA_FORMAT_ASTC_10x5_SRGB_BLOCK */ {},279/* DATA_FORMAT_ASTC_10x6_UNORM_BLOCK */ {},280/* DATA_FORMAT_ASTC_10x6_SRGB_BLOCK */ {},281/* DATA_FORMAT_ASTC_10x8_UNORM_BLOCK */ {},282/* DATA_FORMAT_ASTC_10x8_SRGB_BLOCK */ {},283/* DATA_FORMAT_ASTC_10x10_UNORM_BLOCK */ {},284/* DATA_FORMAT_ASTC_10x10_SRGB_BLOCK */ {},285/* DATA_FORMAT_ASTC_12x10_UNORM_BLOCK */ {},286/* DATA_FORMAT_ASTC_12x10_SRGB_BLOCK */ {},287/* DATA_FORMAT_ASTC_12x12_UNORM_BLOCK */ {},288/* DATA_FORMAT_ASTC_12x12_SRGB_BLOCK */ {},289/* DATA_FORMAT_G8B8G8R8_422_UNORM */ {},290/* DATA_FORMAT_B8G8R8G8_422_UNORM */ {},291/* DATA_FORMAT_G8_B8_R8_3PLANE_420_UNORM */ {},292/* DATA_FORMAT_G8_B8R8_2PLANE_420_UNORM */ {},293/* DATA_FORMAT_G8_B8_R8_3PLANE_422_UNORM */ {},294/* DATA_FORMAT_G8_B8R8_2PLANE_422_UNORM */ {},295/* DATA_FORMAT_G8_B8_R8_3PLANE_444_UNORM */ {},296/* DATA_FORMAT_R10X6_UNORM_PACK16 */ {},297/* DATA_FORMAT_R10X6G10X6_UNORM_2PACK16 */ {},298/* DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16 */ {},299/* DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16 */ {},300/* DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16 */ {},301/* DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 */ {},302/* DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 */ {},303/* DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16 */ {},304/* DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16 */ {},305/* DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16 */ {},306/* DATA_FORMAT_R12X4_UNORM_PACK16 */ {},307/* DATA_FORMAT_R12X4G12X4_UNORM_2PACK16 */ {},308/* DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 */ {},309/* DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16 */ {},310/* DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16 */ {},311/* DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16 */ {},312/* DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 */ {},313/* DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16 */ {},314/* DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 */ {},315/* DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16 */ {},316/* DATA_FORMAT_G16B16G16R16_422_UNORM */ {},317/* DATA_FORMAT_B16G16R16G16_422_UNORM */ {},318/* DATA_FORMAT_G16_B16_R16_3PLANE_420_UNORM */ {},319/* DATA_FORMAT_G16_B16R16_2PLANE_420_UNORM */ {},320/* DATA_FORMAT_G16_B16_R16_3PLANE_422_UNORM */ {},321/* DATA_FORMAT_G16_B16R16_2PLANE_422_UNORM */ {},322/* DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM */ {},323/* DATA_FORMAT_ASTC_4x4_SFLOAT_BLOCK */ {},324/* DATA_FORMAT_ASTC_5x4_SFLOAT_BLOCK */ {},325/* DATA_FORMAT_ASTC_5x5_SFLOAT_BLOCK */ {},326/* DATA_FORMAT_ASTC_6x5_SFLOAT_BLOCK */ {},327/* DATA_FORMAT_ASTC_6x6_SFLOAT_BLOCK */ {},328/* DATA_FORMAT_ASTC_8x5_SFLOAT_BLOCK */ {},329/* DATA_FORMAT_ASTC_8x6_SFLOAT_BLOCK */ {},330/* DATA_FORMAT_ASTC_8x8_SFLOAT_BLOCK */ {},331/* DATA_FORMAT_ASTC_10x5_SFLOAT_BLOCK*/ {},332/* DATA_FORMAT_ASTC_10x6_SFLOAT_BLOCK */ {},333/* DATA_FORMAT_ASTC_10x8_SFLOAT_BLOCK */ {},334/* DATA_FORMAT_ASTC_10x10_SFLOAT_BLOCK */ {},335/* DATA_FORMAT_ASTC_12x10_SFLOAT_BLOCK */ {},336/* DATA_FORMAT_ASTC_12x12_SFLOAT_BLOCK */ {},337};338339Error RenderingDeviceDriverD3D12::DescriptorsHeap::allocate(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count, bool p_for_gpu) {340ERR_FAIL_COND_V(heap, ERR_ALREADY_EXISTS);341ERR_FAIL_COND_V(p_descriptor_count == 0, ERR_INVALID_PARAMETER);342343handle_size = p_device->GetDescriptorHandleIncrementSize(p_type);344345desc.Type = p_type;346desc.NumDescriptors = p_descriptor_count;347desc.Flags = p_for_gpu ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : D3D12_DESCRIPTOR_HEAP_FLAG_NONE;348HRESULT res = p_device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(heap.GetAddressOf()));349ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CreateDescriptorHeap failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");350351return OK;352}353354RenderingDeviceDriverD3D12::DescriptorsHeap::Walker RenderingDeviceDriverD3D12::DescriptorsHeap::make_walker() const {355Walker walker;356walker.handle_size = handle_size;357walker.handle_count = desc.NumDescriptors;358if (heap) {359#if defined(_MSC_VER) || !defined(_WIN32)360walker.first_cpu_handle = heap->GetCPUDescriptorHandleForHeapStart();361if ((desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) {362walker.first_gpu_handle = heap->GetGPUDescriptorHandleForHeapStart();363}364#else365heap->GetCPUDescriptorHandleForHeapStart(&walker.first_cpu_handle);366if ((desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) {367heap->GetGPUDescriptorHandleForHeapStart(&walker.first_gpu_handle);368}369#endif370}371return walker;372}373374void RenderingDeviceDriverD3D12::DescriptorsHeap::Walker::advance(uint32_t p_count) {375ERR_FAIL_COND_MSG(handle_index + p_count > handle_count, "Would advance past EOF.");376handle_index += p_count;377}378379D3D12_CPU_DESCRIPTOR_HANDLE RenderingDeviceDriverD3D12::DescriptorsHeap::Walker::get_curr_cpu_handle() {380ERR_FAIL_COND_V_MSG(is_at_eof(), D3D12_CPU_DESCRIPTOR_HANDLE(), "Heap walker is at EOF.");381return D3D12_CPU_DESCRIPTOR_HANDLE{ first_cpu_handle.ptr + handle_index * handle_size };382}383384D3D12_GPU_DESCRIPTOR_HANDLE RenderingDeviceDriverD3D12::DescriptorsHeap::Walker::get_curr_gpu_handle() {385ERR_FAIL_COND_V_MSG(!first_gpu_handle.ptr, D3D12_GPU_DESCRIPTOR_HANDLE(), "Can't provide a GPU handle from a non-GPU descriptors heap.");386ERR_FAIL_COND_V_MSG(is_at_eof(), D3D12_GPU_DESCRIPTOR_HANDLE(), "Heap walker is at EOF.");387return D3D12_GPU_DESCRIPTOR_HANDLE{ first_gpu_handle.ptr + handle_index * handle_size };388}389390static const D3D12_COMPARISON_FUNC RD_TO_D3D12_COMPARE_OP[RD::COMPARE_OP_MAX] = {391D3D12_COMPARISON_FUNC_NEVER,392D3D12_COMPARISON_FUNC_LESS,393D3D12_COMPARISON_FUNC_EQUAL,394D3D12_COMPARISON_FUNC_LESS_EQUAL,395D3D12_COMPARISON_FUNC_GREATER,396D3D12_COMPARISON_FUNC_NOT_EQUAL,397D3D12_COMPARISON_FUNC_GREATER_EQUAL,398D3D12_COMPARISON_FUNC_ALWAYS,399};400401uint32_t RenderingDeviceDriverD3D12::SubgroupCapabilities::supported_stages_flags_rd() const {402// If there's a way to check exactly which are supported, I have yet to find it.403return (404RenderingDevice::ShaderStage::SHADER_STAGE_FRAGMENT_BIT |405RenderingDevice::ShaderStage::SHADER_STAGE_COMPUTE_BIT);406}407408uint32_t RenderingDeviceDriverD3D12::SubgroupCapabilities::supported_operations_flags_rd() const {409if (!wave_ops_supported) {410return 0;411} else {412return (413RenderingDevice::SubgroupOperations::SUBGROUP_BASIC_BIT |414RenderingDevice::SubgroupOperations::SUBGROUP_BALLOT_BIT |415RenderingDevice::SubgroupOperations::SUBGROUP_VOTE_BIT |416RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_BIT |417RenderingDevice::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT |418RenderingDevice::SubgroupOperations::SUBGROUP_QUAD_BIT |419RenderingDevice::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT |420RenderingDevice::SubgroupOperations::SUBGROUP_CLUSTERED_BIT);421}422}423424void RenderingDeviceDriverD3D12::_debug_message_func(D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, D3D12_MESSAGE_ID p_id, LPCSTR p_description, void *p_context) {425String type_string;426switch (p_category) {427case D3D12_MESSAGE_CATEGORY_APPLICATION_DEFINED:428type_string = "APPLICATION_DEFINED";429break;430case D3D12_MESSAGE_CATEGORY_MISCELLANEOUS:431type_string = "MISCELLANEOUS";432break;433case D3D12_MESSAGE_CATEGORY_INITIALIZATION:434type_string = "INITIALIZATION";435break;436case D3D12_MESSAGE_CATEGORY_CLEANUP:437type_string = "CLEANUP";438break;439case D3D12_MESSAGE_CATEGORY_COMPILATION:440type_string = "COMPILATION";441break;442case D3D12_MESSAGE_CATEGORY_STATE_CREATION:443type_string = "STATE_CREATION";444break;445case D3D12_MESSAGE_CATEGORY_STATE_SETTING:446type_string = "STATE_SETTING";447break;448case D3D12_MESSAGE_CATEGORY_STATE_GETTING:449type_string = "STATE_GETTING";450break;451case D3D12_MESSAGE_CATEGORY_RESOURCE_MANIPULATION:452type_string = "RESOURCE_MANIPULATION";453break;454case D3D12_MESSAGE_CATEGORY_EXECUTION:455type_string = "EXECUTION";456break;457case D3D12_MESSAGE_CATEGORY_SHADER:458type_string = "SHADER";459break;460}461462String error_message(type_string +463" - Message Id Number: " + String::num_int64(p_id) +464"\n\t" + p_description);465466// Convert D3D12 severity to our own log macros.467switch (p_severity) {468case D3D12_MESSAGE_SEVERITY_MESSAGE:469print_verbose(error_message);470break;471case D3D12_MESSAGE_SEVERITY_INFO:472print_line(error_message);473break;474case D3D12_MESSAGE_SEVERITY_WARNING:475WARN_PRINT(error_message);476break;477case D3D12_MESSAGE_SEVERITY_ERROR:478case D3D12_MESSAGE_SEVERITY_CORRUPTION:479ERR_PRINT(error_message);480CRASH_COND_MSG(Engine::get_singleton()->is_abort_on_gpu_errors_enabled(),481"Crashing, because abort on GPU errors is enabled.");482break;483}484}485486/******************/487/**** RESOURCE ****/488/******************/489490static const D3D12_RESOURCE_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSION[RD::TEXTURE_TYPE_MAX] = {491D3D12_RESOURCE_DIMENSION_TEXTURE1D,492D3D12_RESOURCE_DIMENSION_TEXTURE2D,493D3D12_RESOURCE_DIMENSION_TEXTURE3D,494D3D12_RESOURCE_DIMENSION_TEXTURE2D,495D3D12_RESOURCE_DIMENSION_TEXTURE1D,496D3D12_RESOURCE_DIMENSION_TEXTURE2D,497D3D12_RESOURCE_DIMENSION_TEXTURE2D,498};499500void RenderingDeviceDriverD3D12::_resource_transition_batch(CommandBufferInfo *p_command_buffer, ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state) {501DEV_ASSERT(p_subresource != UINT32_MAX); // We don't support an "all-resources" command here.502503ResourceInfo::States *res_states = p_resource->states_ptr;504D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource];505506// Transitions can be considered redundant if the current state has all the bits of the new state.507// This check does not apply to the common state however, which must resort to checking if the state is the same (0).508bool any_state_is_common = *curr_state == D3D12_RESOURCE_STATE_COMMON || p_new_state == D3D12_RESOURCE_STATE_COMMON;509bool redundant_transition = any_state_is_common ? *curr_state == p_new_state : ((*curr_state) & p_new_state) == p_new_state;510if (redundant_transition) {511bool just_written = *curr_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS;512bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch;513if (needs_uav_barrier) {514if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + 1) {515p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + 1);516}517p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(p_resource->resource);518p_command_buffer->res_barriers_count++;519res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch;520}521} else {522uint64_t subres_mask_piece = ((uint64_t)1 << (p_subresource & 0b111111));523uint8_t subres_qword = p_subresource >> 6;524525if (p_command_buffer->res_barriers_requests.has(res_states)) {526BarrierRequest &br = p_command_buffer->res_barriers_requests.get(res_states);527DEV_ASSERT(br.dx_resource == p_resource->resource);528DEV_ASSERT(br.subres_mask_qwords == STEPIFY(res_states->subresource_states.size(), 64) / 64);529DEV_ASSERT(br.planes == p_num_planes);530531// First, find if the subresource already has a barrier scheduled.532uint8_t curr_group_idx = 0;533bool same_transition_scheduled = false;534for (curr_group_idx = 0; curr_group_idx < br.groups_count; curr_group_idx++) {535if (unlikely(br.groups[curr_group_idx].states == BarrierRequest::DELETED_GROUP)) {536continue;537}538if ((br.groups[curr_group_idx].subres_mask[subres_qword] & subres_mask_piece)) {539uint32_t state_mask = br.groups[curr_group_idx].states;540same_transition_scheduled = (state_mask & (uint32_t)p_new_state) == (uint32_t)p_new_state;541break;542}543}544if (!same_transition_scheduled) {545bool subres_already_there = curr_group_idx != br.groups_count;546D3D12_RESOURCE_STATES final_states = {};547if (subres_already_there) {548final_states = br.groups[curr_group_idx].states;549final_states |= p_new_state;550bool subres_alone = true;551for (uint8_t i = 0; i < br.subres_mask_qwords; i++) {552if (i == subres_qword) {553if (br.groups[curr_group_idx].subres_mask[i] != subres_mask_piece) {554subres_alone = false;555break;556}557} else {558if (br.groups[curr_group_idx].subres_mask[i] != 0) {559subres_alone = false;560break;561}562}563}564bool relocated = false;565if (subres_alone) {566// Subresource is there by itself.567for (uint8_t i = 0; i < br.groups_count; i++) {568if (unlikely(i == curr_group_idx)) {569continue;570}571if (unlikely(br.groups[i].states == BarrierRequest::DELETED_GROUP)) {572continue;573}574// There's another group with the final states; relocate to it.575if (br.groups[i].states == final_states) {576br.groups[curr_group_idx].subres_mask[subres_qword] &= ~subres_mask_piece;577relocated = true;578break;579}580}581if (relocated) {582// Let's delete the group where it used to be by itself.583if (curr_group_idx == br.groups_count - 1) {584br.groups_count--;585} else {586br.groups[curr_group_idx].states = BarrierRequest::DELETED_GROUP;587}588} else {589// Its current group, where it's alone, can extend its states.590br.groups[curr_group_idx].states = final_states;591}592} else {593// Already there, but not by itself and the state mask is different, so it now belongs to a different group.594br.groups[curr_group_idx].subres_mask[subres_qword] &= ~subres_mask_piece;595subres_already_there = false;596}597} else {598final_states = p_new_state;599}600if (!subres_already_there) {601// See if it fits exactly the states of some of the groups to fit it there.602for (uint8_t i = 0; i < br.groups_count; i++) {603if (unlikely(i == curr_group_idx)) {604continue;605}606if (unlikely(br.groups[i].states == BarrierRequest::DELETED_GROUP)) {607continue;608}609if (br.groups[i].states == final_states) {610br.groups[i].subres_mask[subres_qword] |= subres_mask_piece;611subres_already_there = true;612break;613}614}615if (!subres_already_there) {616// Add a new group to accommodate this subresource.617uint8_t group_to_fill = 0;618if (br.groups_count < BarrierRequest::MAX_GROUPS) {619// There are still free groups.620group_to_fill = br.groups_count;621br.groups_count++;622} else {623// Let's try to take over a deleted one.624for (; group_to_fill < br.groups_count; group_to_fill++) {625if (unlikely(br.groups[group_to_fill].states == BarrierRequest::DELETED_GROUP)) {626break;627}628}629CRASH_COND(group_to_fill == br.groups_count);630}631632br.groups[group_to_fill].states = final_states;633for (uint8_t i = 0; i < br.subres_mask_qwords; i++) {634if (unlikely(i == subres_qword)) {635br.groups[group_to_fill].subres_mask[i] = subres_mask_piece;636} else {637br.groups[group_to_fill].subres_mask[i] = 0;638}639}640}641}642}643} else {644BarrierRequest &br = p_command_buffer->res_barriers_requests[res_states];645br.dx_resource = p_resource->resource;646br.subres_mask_qwords = STEPIFY(p_resource->states_ptr->subresource_states.size(), 64) / 64;647CRASH_COND(p_resource->states_ptr->subresource_states.size() > BarrierRequest::MAX_SUBRESOURCES);648br.planes = p_num_planes;649br.groups[0].states = p_new_state;650for (uint8_t i = 0; i < br.subres_mask_qwords; i++) {651if (unlikely(i == subres_qword)) {652br.groups[0].subres_mask[i] = subres_mask_piece;653} else {654br.groups[0].subres_mask[i] = 0;655}656}657br.groups_count = 1;658}659}660}661662void RenderingDeviceDriverD3D12::_resource_transitions_flush(CommandBufferInfo *p_command_buffer) {663for (const KeyValue<ResourceInfo::States *, BarrierRequest> &E : p_command_buffer->res_barriers_requests) {664ResourceInfo::States *res_states = E.key;665const BarrierRequest &br = E.value;666667uint32_t num_subresources = res_states->subresource_states.size();668669// When there's not a lot of subresources, the empirical finding is that it's better670// to avoid attempting the single-barrier optimization.671static const uint32_t SINGLE_BARRIER_ATTEMPT_MAX_NUM_SUBRESOURCES = 48;672673bool may_do_single_barrier = br.groups_count == 1 && num_subresources * br.planes >= SINGLE_BARRIER_ATTEMPT_MAX_NUM_SUBRESOURCES;674if (may_do_single_barrier) {675// A single group means we may be able to do a single all-subresources barrier.676677{678// First requisite is that all subresources are involved.679680uint8_t subres_mask_full_qwords = num_subresources / 64;681for (uint32_t i = 0; i < subres_mask_full_qwords; i++) {682if (br.groups[0].subres_mask[i] != UINT64_MAX) {683may_do_single_barrier = false;684break;685}686}687if (may_do_single_barrier) {688if (num_subresources % 64) {689DEV_ASSERT(br.subres_mask_qwords == subres_mask_full_qwords + 1);690uint64_t mask_tail_qword = 0;691for (uint8_t i = 0; i < num_subresources % 64; i++) {692mask_tail_qword |= ((uint64_t)1 << i);693}694if ((br.groups[0].subres_mask[subres_mask_full_qwords] & mask_tail_qword) != mask_tail_qword) {695may_do_single_barrier = false;696}697}698}699}700701if (may_do_single_barrier) {702// Second requisite is that the source state is the same for all.703704for (uint32_t i = 1; i < num_subresources; i++) {705if (res_states->subresource_states[i] != res_states->subresource_states[0]) {706may_do_single_barrier = false;707break;708}709}710711if (may_do_single_barrier) {712// Hurray!, we can do a single barrier (plus maybe a UAV one, too).713714bool just_written = res_states->subresource_states[0] == D3D12_RESOURCE_STATE_UNORDERED_ACCESS;715bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch;716717uint32_t needed_barriers = (needs_uav_barrier ? 1 : 0) + 1;718if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + needed_barriers) {719p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + needed_barriers);720}721722if (needs_uav_barrier) {723p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource);724p_command_buffer->res_barriers_count++;725res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch;726}727728if (res_states->subresource_states[0] != br.groups[0].states) {729p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, res_states->subresource_states[0], br.groups[0].states, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);730p_command_buffer->res_barriers_count++;731}732733for (uint32_t i = 0; i < num_subresources; i++) {734res_states->subresource_states[i] = br.groups[0].states;735}736}737}738}739740if (!may_do_single_barrier) {741for (uint8_t i = 0; i < br.groups_count; i++) {742const BarrierRequest::Group &g = E.value.groups[i];743744if (unlikely(g.states == BarrierRequest::DELETED_GROUP)) {745continue;746}747748uint32_t subresource = 0;749do {750uint64_t subres_mask_piece = ((uint64_t)1 << (subresource % 64));751uint8_t subres_qword = subresource / 64;752753if (likely(g.subres_mask[subres_qword] == 0)) {754subresource += 64;755continue;756}757758if (likely(!(g.subres_mask[subres_qword] & subres_mask_piece))) {759subresource++;760continue;761}762763D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[subresource];764765bool just_written = *curr_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS;766bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch;767768uint32_t needed_barriers = (needs_uav_barrier ? 1 : 0) + br.planes;769if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + needed_barriers) {770p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + needed_barriers);771}772773if (needs_uav_barrier) {774p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource);775p_command_buffer->res_barriers_count++;776res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch;777}778779if (*curr_state != g.states) {780for (uint8_t k = 0; k < br.planes; k++) {781p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, *curr_state, g.states, subresource + k * num_subresources);782p_command_buffer->res_barriers_count++;783}784}785786*curr_state = g.states;787788subresource++;789} while (subresource < num_subresources);790}791}792}793794if (p_command_buffer->res_barriers_count) {795p_command_buffer->cmd_list->ResourceBarrier(p_command_buffer->res_barriers_count, p_command_buffer->res_barriers.ptr());796p_command_buffer->res_barriers_requests.clear();797}798799p_command_buffer->res_barriers_count = 0;800p_command_buffer->res_barriers_batch++;801}802803/*****************/804/**** BUFFERS ****/805/*****************/806807RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) {808// D3D12 debug layers complain at CBV creation time if the size is not multiple of the value per the spec809// but also if you give a rounded size at that point because it will extend beyond the810// memory of the resource. Therefore, it seems the only way is to create it with a811// rounded size.812CD3DX12_RESOURCE_DESC1 resource_desc = CD3DX12_RESOURCE_DESC1::Buffer(STEPIFY(p_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT));813if (p_usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)) {814resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;815} else {816resource_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;817}818819D3D12MA::ALLOCATION_DESC allocation_desc = {};820allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT;821D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COMMON;822switch (p_allocation_type) {823case MEMORY_ALLOCATION_TYPE_CPU: {824bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT);825bool is_dst = p_usage.has_flag(BUFFER_USAGE_TRANSFER_TO_BIT);826if (is_src && !is_dst) {827// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.828allocation_desc.HeapType = D3D12_HEAP_TYPE_UPLOAD;829initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;830}831if (is_dst && !is_src) {832// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.833allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;834initial_state = D3D12_RESOURCE_STATE_COPY_DEST;835}836} break;837case MEMORY_ALLOCATION_TYPE_GPU: {838// Use default parameters.839} break;840}841842ComPtr<ID3D12Resource> buffer;843ComPtr<D3D12MA::Allocation> allocation;844HRESULT res;845if (barrier_capabilities.enhanced_barriers_supported) {846res = allocator->CreateResource3(847&allocation_desc,848&resource_desc,849D3D12_BARRIER_LAYOUT_UNDEFINED,850nullptr,8510,852nullptr,853allocation.GetAddressOf(),854IID_PPV_ARGS(buffer.GetAddressOf()));855} else {856res = allocator->CreateResource(857&allocation_desc,858reinterpret_cast<const D3D12_RESOURCE_DESC *>(&resource_desc),859initial_state,860nullptr,861allocation.GetAddressOf(),862IID_PPV_ARGS(buffer.GetAddressOf()));863}864865ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + vformat("0x%08ux", (uint64_t)res) + ".");866867// Bookkeep.868869BufferInfo *buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);870buf_info->resource = buffer.Get();871buf_info->owner_info.resource = buffer;872buf_info->owner_info.allocation = allocation;873buf_info->owner_info.states.subresource_states.push_back(initial_state);874buf_info->states_ptr = &buf_info->owner_info.states;875buf_info->size = p_size;876buf_info->flags.usable_as_uav = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);877878return BufferID(buf_info);879}880881bool RenderingDeviceDriverD3D12::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {882BufferInfo *buf_info = (BufferInfo *)p_buffer.id;883buf_info->texel_format = p_format;884return true;885}886887void RenderingDeviceDriverD3D12::buffer_free(BufferID p_buffer) {888BufferInfo *buf_info = (BufferInfo *)p_buffer.id;889VersatileResource::free(resources_allocator, buf_info);890}891892uint64_t RenderingDeviceDriverD3D12::buffer_get_allocation_size(BufferID p_buffer) {893const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;894return buf_info->owner_info.allocation ? buf_info->owner_info.allocation->GetSize() : 0;895}896897uint8_t *RenderingDeviceDriverD3D12::buffer_map(BufferID p_buffer) {898const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;899void *data_ptr = nullptr;900HRESULT res = buf_info->resource->Map(0, &VOID_RANGE, &data_ptr);901ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), nullptr, "Map failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");902return (uint8_t *)data_ptr;903}904905void RenderingDeviceDriverD3D12::buffer_unmap(BufferID p_buffer) {906const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;907buf_info->resource->Unmap(0, &VOID_RANGE);908}909910uint64_t RenderingDeviceDriverD3D12::buffer_get_device_address(BufferID p_buffer) {911const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;912return buf_info->resource->GetGPUVirtualAddress();913}914915/*****************/916/**** TEXTURE ****/917/*****************/918919static const D3D12_SRV_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV[RD::TEXTURE_TYPE_MAX] = {920D3D12_SRV_DIMENSION_TEXTURE1D,921D3D12_SRV_DIMENSION_TEXTURE2D,922D3D12_SRV_DIMENSION_TEXTURE3D,923D3D12_SRV_DIMENSION_TEXTURECUBE,924D3D12_SRV_DIMENSION_TEXTURE1DARRAY,925D3D12_SRV_DIMENSION_TEXTURE2DARRAY,926D3D12_SRV_DIMENSION_TEXTURECUBEARRAY,927};928929static const D3D12_SRV_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV_MS[RD::TEXTURE_TYPE_MAX] = {930D3D12_SRV_DIMENSION_UNKNOWN,931D3D12_SRV_DIMENSION_TEXTURE2DMS,932D3D12_SRV_DIMENSION_UNKNOWN,933D3D12_SRV_DIMENSION_UNKNOWN,934D3D12_SRV_DIMENSION_UNKNOWN,935D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY,936D3D12_SRV_DIMENSION_UNKNOWN,937};938939static const D3D12_UAV_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_UAV[RD::TEXTURE_TYPE_MAX] = {940D3D12_UAV_DIMENSION_TEXTURE1D,941D3D12_UAV_DIMENSION_TEXTURE2D,942D3D12_UAV_DIMENSION_TEXTURE3D,943D3D12_UAV_DIMENSION_TEXTURE2DARRAY,944D3D12_UAV_DIMENSION_TEXTURE1DARRAY,945D3D12_UAV_DIMENSION_TEXTURE2DARRAY,946D3D12_UAV_DIMENSION_TEXTURE2DARRAY,947};948949uint32_t RenderingDeviceDriverD3D12::_find_max_common_supported_sample_count(VectorView<DXGI_FORMAT> p_formats) {950uint32_t common = UINT32_MAX;951952MutexLock lock(format_sample_counts_mask_cache_mutex);953for (uint32_t i = 0; i < p_formats.size(); i++) {954if (format_sample_counts_mask_cache.has(p_formats[i])) {955common &= format_sample_counts_mask_cache[p_formats[i]];956} else {957D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msql = {};958msql.Format = p_formats[i];959uint32_t mask = 0;960for (int samples = 1 << (TEXTURE_SAMPLES_MAX - 1); samples >= 1; samples /= 2) {961msql.SampleCount = (UINT)samples;962HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msql, sizeof(msql));963if (SUCCEEDED(res) && msql.NumQualityLevels) {964int bit = get_shift_from_power_of_2((uint32_t)samples);965ERR_FAIL_COND_V(bit == -1, 1);966mask |= (uint32_t)(1 << bit);967}968}969format_sample_counts_mask_cache.insert(p_formats[i], mask);970common &= mask;971}972}973if (common == UINT32_MAX) {974return 1;975} else {976return ((uint32_t)1 << nearest_shift(common));977}978}979980UINT RenderingDeviceDriverD3D12::_compute_component_mapping(const RDD::TextureView &p_view) {981UINT base_swizzle = RD_TO_D3D12_FORMAT[p_view.format].swizzle;982983D3D12_SHADER_COMPONENT_MAPPING component_swizzles[TEXTURE_SWIZZLE_MAX] = {984D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, // Unused.985D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,986D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1,987// These will be D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_*.988D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(0, base_swizzle),989D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(1, base_swizzle),990D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(2, base_swizzle),991D3D12_DECODE_SHADER_4_COMPONENT_MAPPING(3, base_swizzle),992};993994return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(995p_view.swizzle_r == TEXTURE_SWIZZLE_IDENTITY ? component_swizzles[TEXTURE_SWIZZLE_R] : component_swizzles[p_view.swizzle_r],996p_view.swizzle_g == TEXTURE_SWIZZLE_IDENTITY ? component_swizzles[TEXTURE_SWIZZLE_G] : component_swizzles[p_view.swizzle_g],997p_view.swizzle_b == TEXTURE_SWIZZLE_IDENTITY ? component_swizzles[TEXTURE_SWIZZLE_B] : component_swizzles[p_view.swizzle_b],998p_view.swizzle_a == TEXTURE_SWIZZLE_IDENTITY ? component_swizzles[TEXTURE_SWIZZLE_A] : component_swizzles[p_view.swizzle_a]);999}10001001UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, BitField<TextureAspectBits> p_aspect_bits) {1002TextureAspect aspect = TEXTURE_ASPECT_MAX;10031004if (p_aspect_bits.has_flag(TEXTURE_ASPECT_COLOR_BIT)) {1005DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);1006aspect = TEXTURE_ASPECT_COLOR;1007}1008if (p_aspect_bits.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) {1009DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);1010aspect = TEXTURE_ASPECT_DEPTH;1011} else if (p_aspect_bits.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {1012DEV_ASSERT(aspect == TEXTURE_ASPECT_MAX);1013aspect = TEXTURE_ASPECT_STENCIL;1014}10151016DEV_ASSERT(aspect != TEXTURE_ASPECT_MAX);10171018return _compute_plane_slice(p_format, aspect);1019}10201021UINT RenderingDeviceDriverD3D12::_compute_plane_slice(DataFormat p_format, TextureAspect p_aspect) {1022switch (p_aspect) {1023case TEXTURE_ASPECT_COLOR:1024// The plane must be 0 for the color aspect (assuming the format is a regular color one, which must be the case).1025return 0;1026case TEXTURE_ASPECT_DEPTH:1027// The plane must be 0 for the color or depth aspect1028return 0;1029case TEXTURE_ASPECT_STENCIL:1030// The plane may be 0 for the stencil aspect (if the format is stencil-only), or 1 (if the format is depth-stencil; other cases are ill).1031return format_get_plane_count(p_format) == 2 ? 1 : 0;1032default:1033DEV_ASSERT(false);1034return 0;1035}1036}10371038UINT RenderingDeviceDriverD3D12::_compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset) {1039return D3D12CalcSubresource(p_layers.mipmap, p_layers.base_layer + p_layer_offset, _compute_plane_slice(p_texture->format, p_layers.aspect), p_texture->desc.MipLevels, p_texture->desc.ArraySize());1040}10411042void RenderingDeviceDriverD3D12::_discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info) {1043uint32_t planes = 1;1044if ((p_tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {1045planes = format_get_plane_count(p_tex_info->format);1046}1047D3D12_DISCARD_REGION dr = {};1048dr.NumRects = p_cmd_buf_info->render_pass_state.region_is_all ? 0 : 1;1049dr.pRects = p_cmd_buf_info->render_pass_state.region_is_all ? nullptr : &p_cmd_buf_info->render_pass_state.region_rect;1050dr.FirstSubresource = UINT_MAX;1051dr.NumSubresources = 0;1052for (uint32_t u = 0; u < planes; u++) {1053for (uint32_t v = 0; v < p_tex_info->layers; v++) {1054for (uint32_t w = 0; w < p_tex_info->mipmaps; w++) {1055UINT subresource = D3D12CalcSubresource(1056p_tex_info->base_mip + w,1057p_tex_info->base_layer + v,1058u,1059p_tex_info->desc.MipLevels,1060p_tex_info->desc.ArraySize());1061if (dr.NumSubresources == 0) {1062dr.FirstSubresource = subresource;1063dr.NumSubresources = 1;1064} else if (dr.FirstSubresource + dr.NumSubresources == subresource) {1065dr.NumSubresources++;1066} else {1067p_cmd_buf_info->cmd_list->DiscardResource(p_tex_info->resource, &dr);1068dr.FirstSubresource = subresource;1069dr.NumSubresources = 1;1070}1071}1072}1073}1074if (dr.NumSubresources) {1075p_cmd_buf_info->cmd_list->DiscardResource(p_tex_info->resource, &dr);1076}1077}10781079bool RenderingDeviceDriverD3D12::_unordered_access_supported_by_format(DataFormat p_format) {1080switch (p_format) {1081case DATA_FORMAT_R4G4_UNORM_PACK8:1082case DATA_FORMAT_R4G4B4A4_UNORM_PACK16:1083case DATA_FORMAT_B4G4R4A4_UNORM_PACK16:1084case DATA_FORMAT_R5G6B5_UNORM_PACK16:1085case DATA_FORMAT_B5G6R5_UNORM_PACK16:1086case DATA_FORMAT_R5G5B5A1_UNORM_PACK16:1087case DATA_FORMAT_B5G5R5A1_UNORM_PACK16:1088case DATA_FORMAT_A1R5G5B5_UNORM_PACK16:1089case DATA_FORMAT_A8B8G8R8_UNORM_PACK32:1090case DATA_FORMAT_A8B8G8R8_SNORM_PACK32:1091case DATA_FORMAT_A8B8G8R8_USCALED_PACK32:1092case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32:1093case DATA_FORMAT_A8B8G8R8_UINT_PACK32:1094case DATA_FORMAT_A8B8G8R8_SINT_PACK32:1095case DATA_FORMAT_A8B8G8R8_SRGB_PACK32:1096case DATA_FORMAT_A2R10G10B10_UNORM_PACK32:1097case DATA_FORMAT_A2R10G10B10_SNORM_PACK32:1098case DATA_FORMAT_A2R10G10B10_USCALED_PACK32:1099case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32:1100case DATA_FORMAT_A2R10G10B10_UINT_PACK32:1101case DATA_FORMAT_A2R10G10B10_SINT_PACK32:1102case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:1103case DATA_FORMAT_A2B10G10R10_SNORM_PACK32:1104case DATA_FORMAT_A2B10G10R10_USCALED_PACK32:1105case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32:1106case DATA_FORMAT_A2B10G10R10_UINT_PACK32:1107case DATA_FORMAT_A2B10G10R10_SINT_PACK32:1108case DATA_FORMAT_B10G11R11_UFLOAT_PACK32:1109case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32:1110case DATA_FORMAT_X8_D24_UNORM_PACK32:1111case DATA_FORMAT_R10X6_UNORM_PACK16:1112case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16:1113case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:1114case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:1115case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:1116case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:1117case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:1118case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:1119case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:1120case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:1121case DATA_FORMAT_R12X4_UNORM_PACK16:1122case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16:1123case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:1124case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:1125case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:1126case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:1127case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:1128case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:1129case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:1130case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:1131return false;1132default:1133return true;1134}1135}11361137RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p_format, const TextureView &p_view) {1138// Using D3D12_RESOURCE_DESC1. Thanks to the layout, it's sliceable down to D3D12_RESOURCE_DESC if needed.1139CD3DX12_RESOURCE_DESC1 resource_desc = {};1140resource_desc.Dimension = RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSION[p_format.texture_type];1141resource_desc.Alignment = 0; // D3D12MA will override this to use a smaller alignment than the default if possible.11421143resource_desc.Width = p_format.width;1144resource_desc.Height = p_format.height;1145resource_desc.DepthOrArraySize = p_format.depth * p_format.array_layers;1146resource_desc.MipLevels = p_format.mipmaps;11471148// Format.1149bool cross_family_sharing = false;1150bool relaxed_casting_available = false;1151DXGI_FORMAT *relaxed_casting_formats = nullptr;1152uint32_t relaxed_casting_format_count = 0;1153{1154resource_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].family;11551156// If views of different families are wanted, special setup is needed for proper sharing among them.1157// If the driver reports relaxed casting is, leverage its new extended resource creation API (via D3D12MA).1158if (p_format.shareable_formats.size() && format_capabilities.relaxed_casting_supported) {1159relaxed_casting_available = true;1160relaxed_casting_formats = ALLOCA_ARRAY(DXGI_FORMAT, p_format.shareable_formats.size() + 1);1161relaxed_casting_formats[0] = RD_TO_D3D12_FORMAT[p_format.format].general_format;1162relaxed_casting_format_count++;1163}11641165HashMap<DataFormat, D3D12_RESOURCE_FLAGS> aliases_forbidden_flags;1166for (int i = 0; i < p_format.shareable_formats.size(); i++) {1167DataFormat curr_format = p_format.shareable_formats[i];1168String format_text = "'" + String(FORMAT_NAMES[p_format.format]) + "'";11691170ERR_FAIL_COND_V_MSG(RD_TO_D3D12_FORMAT[curr_format].family == DXGI_FORMAT_UNKNOWN, TextureID(), "Format " + format_text + " is not supported.");11711172if (RD_TO_D3D12_FORMAT[curr_format].family != RD_TO_D3D12_FORMAT[p_format.format].family) {1173cross_family_sharing = true;1174}11751176if (relaxed_casting_available) {1177relaxed_casting_formats[relaxed_casting_format_count] = RD_TO_D3D12_FORMAT[curr_format].general_format;1178relaxed_casting_format_count++;1179}1180}11811182if (cross_family_sharing && !relaxed_casting_available) {1183// Per https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_texture_layout.1184if (p_format.texture_type == TEXTURE_TYPE_1D) {1185ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for a 1D texture.");1186}1187if (p_format.samples != TEXTURE_SAMPLES_1) {1188ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for a multi-sample texture.");1189}1190if ((p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {1191ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for a depth-stencil texture.");1192}1193if (RD_TO_D3D12_FORMAT[p_format.format].family == DXGI_FORMAT_R32G32B32_TYPELESS) {1194ERR_FAIL_V_MSG(TextureID(), "This texture's views require aliasing, but that's not supported for an R32G32B32 texture.");1195}1196}1197}11981199// Usage.1200if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {1201resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;1202} else {1203if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) && _unordered_access_supported_by_format(p_format.format)) {1204resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; // For clearing via UAV.1205}1206}1207if ((p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {1208resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;1209}1210if ((p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) {1211resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;1212}1213if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT)) {1214// For VRS images we can't use the typeless format.1215resource_desc.Format = DXGI_FORMAT_R8_UINT;1216}12171218resource_desc.SampleDesc = {};1219DXGI_FORMAT format_to_test = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) ? RD_TO_D3D12_FORMAT[p_format.format].dsv_format : RD_TO_D3D12_FORMAT[p_format.format].general_format;1220if (!(resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) {1221resource_desc.SampleDesc.Count = MIN(1222_find_max_common_supported_sample_count(format_to_test),1223TEXTURE_SAMPLES_COUNT[p_format.samples]);1224} else {1225// No MSAA in D3D12 if storage. May have become possible recently where supported, though.1226resource_desc.SampleDesc.Count = 1;1227}1228resource_desc.SampleDesc.Quality = resource_desc.SampleDesc.Count == 1 ? 0 : DXGI_STANDARD_MULTISAMPLE_QUALITY_PATTERN;12291230// Create.12311232D3D12MA::ALLOCATION_DESC allocation_desc = {};1233allocation_desc.HeapType = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT;1234if ((resource_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) {1235allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES;1236} else {1237allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;1238}1239if ((resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) {1240allocation_desc.ExtraHeapFlags |= D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS;1241}12421243D3D12_RESOURCE_STATES initial_state = {};1244ID3D12Resource *texture = nullptr;1245ComPtr<ID3D12Resource> main_texture;1246ComPtr<D3D12MA::Allocation> allocation;1247static const FLOAT black[4] = {};1248D3D12_CLEAR_VALUE clear_value = CD3DX12_CLEAR_VALUE(RD_TO_D3D12_FORMAT[p_format.format].general_format, black);1249D3D12_CLEAR_VALUE *clear_value_ptr = (resource_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : nullptr;1250{1251HRESULT res = E_FAIL;1252if (barrier_capabilities.enhanced_barriers_supported || (cross_family_sharing && relaxed_casting_available)) {1253// Create with undefined layout if enhanced barriers are supported. Leave as common otherwise for interop with legacy barriers.1254D3D12_BARRIER_LAYOUT initial_layout = barrier_capabilities.enhanced_barriers_supported ? D3D12_BARRIER_LAYOUT_UNDEFINED : D3D12_BARRIER_LAYOUT_COMMON;1255res = allocator->CreateResource3(1256&allocation_desc,1257&resource_desc,1258initial_layout,1259clear_value_ptr,1260relaxed_casting_format_count,1261relaxed_casting_formats,1262allocation.GetAddressOf(),1263IID_PPV_ARGS(main_texture.GetAddressOf()));1264initial_state = D3D12_RESOURCE_STATE_COMMON;1265} else {1266res = allocator->CreateResource(1267&allocation_desc,1268(D3D12_RESOURCE_DESC *)&resource_desc,1269D3D12_RESOURCE_STATE_COPY_DEST,1270clear_value_ptr,1271allocation.GetAddressOf(),1272IID_PPV_ARGS(main_texture.GetAddressOf()));1273initial_state = D3D12_RESOURCE_STATE_COPY_DEST;1274}1275ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), TextureID(), "CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");1276texture = main_texture.Get();1277}12781279// Describe views.12801281D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};1282{1283srv_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format;1284srv_desc.ViewDimension = p_format.samples == TEXTURE_SAMPLES_1 ? RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV[p_format.texture_type] : RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV_MS[p_format.texture_type];1285srv_desc.Shader4ComponentMapping = _compute_component_mapping(p_view);12861287switch (srv_desc.ViewDimension) {1288case D3D12_SRV_DIMENSION_TEXTURE1D: {1289srv_desc.Texture1D.MipLevels = p_format.mipmaps;1290} break;1291case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: {1292srv_desc.Texture1DArray.MipLevels = p_format.mipmaps;1293srv_desc.Texture1DArray.ArraySize = p_format.array_layers;1294} break;1295case D3D12_SRV_DIMENSION_TEXTURE2D: {1296srv_desc.Texture2D.MipLevels = p_format.mipmaps;1297} break;1298case D3D12_SRV_DIMENSION_TEXTURE2DMS: {1299} break;1300case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: {1301srv_desc.Texture2DArray.MipLevels = p_format.mipmaps;1302srv_desc.Texture2DArray.ArraySize = p_format.array_layers;1303} break;1304case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: {1305srv_desc.Texture2DMSArray.ArraySize = p_format.array_layers;1306} break;1307case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: {1308srv_desc.TextureCubeArray.MipLevels = p_format.mipmaps;1309srv_desc.TextureCubeArray.NumCubes = p_format.array_layers / 6;1310} break;1311case D3D12_SRV_DIMENSION_TEXTURE3D: {1312srv_desc.Texture3D.MipLevels = p_format.mipmaps;1313} break;1314case D3D12_SRV_DIMENSION_TEXTURECUBE: {1315srv_desc.TextureCube.MipLevels = p_format.mipmaps;1316} break;1317default: {1318}1319}1320}13211322D3D12_UNORDERED_ACCESS_VIEW_DESC main_uav_desc = {};1323{1324main_uav_desc.Format = RD_TO_D3D12_FORMAT[p_format.format].general_format;1325main_uav_desc.ViewDimension = p_format.samples == TEXTURE_SAMPLES_1 ? RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_UAV[p_format.texture_type] : D3D12_UAV_DIMENSION_UNKNOWN;13261327switch (main_uav_desc.ViewDimension) {1328case D3D12_UAV_DIMENSION_TEXTURE1DARRAY: {1329main_uav_desc.Texture1DArray.ArraySize = p_format.array_layers;1330} break;1331case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: {1332// Either for an actual 2D texture array, cubemap or cubemap array.1333main_uav_desc.Texture2DArray.ArraySize = p_format.array_layers;1334} break;1335case D3D12_UAV_DIMENSION_TEXTURE3D: {1336main_uav_desc.Texture3D.WSize = p_format.depth;1337} break;1338default: {1339}1340}1341}13421343D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = main_uav_desc;1344uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format;13451346// Bookkeep.13471348TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);1349tex_info->resource = texture;1350tex_info->owner_info.resource = main_texture;1351tex_info->owner_info.allocation = allocation;1352tex_info->owner_info.states.subresource_states.resize(p_format.mipmaps * p_format.array_layers);1353for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) {1354tex_info->owner_info.states.subresource_states[i] = initial_state;1355}1356tex_info->states_ptr = &tex_info->owner_info.states;1357tex_info->format = p_format.format;1358GODOT_GCC_WARNING_PUSH_AND_IGNORE("-Wstrict-aliasing")1359tex_info->desc = *(CD3DX12_RESOURCE_DESC *)&resource_desc;1360GODOT_GCC_WARNING_POP1361tex_info->base_layer = 0;1362tex_info->layers = resource_desc.ArraySize();1363tex_info->base_mip = 0;1364tex_info->mipmaps = resource_desc.MipLevels;1365tex_info->view_descs.srv = srv_desc;1366tex_info->view_descs.uav = uav_desc;13671368if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {1369// Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers1370// are supported, as the discard flag will be used instead when transitioning from an undefined layout.1371textures_pending_clear.add(&tex_info->pending_clear);1372}13731374return TextureID(tex_info);1375}13761377RDD::TextureID RenderingDeviceDriverD3D12::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) {1378ID3D12Resource *texture = (ID3D12Resource *)p_native_texture;13791380#if defined(_MSC_VER) || !defined(_WIN32)1381const D3D12_RESOURCE_DESC base_resource_desc = texture->GetDesc();1382#else1383D3D12_RESOURCE_DESC base_resource_desc;1384texture->GetDesc(&base_resource_desc);1385#endif1386CD3DX12_RESOURCE_DESC resource_desc(base_resource_desc);1387D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};1388{1389srv_desc.Format = RD_TO_D3D12_FORMAT[p_format].general_format;1390srv_desc.ViewDimension = resource_desc.SampleDesc.Count == 1 ? RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV[p_type] : RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_SRV_MS[p_type];1391srv_desc.Shader4ComponentMapping = _compute_component_mapping(TextureView{ p_format });13921393switch (srv_desc.ViewDimension) {1394case D3D12_SRV_DIMENSION_TEXTURE1D: {1395srv_desc.Texture1D.MipLevels = resource_desc.MipLevels;1396} break;1397case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: {1398srv_desc.Texture1DArray.MipLevels = resource_desc.MipLevels;1399srv_desc.Texture1DArray.ArraySize = p_array_layers;1400} break;1401case D3D12_SRV_DIMENSION_TEXTURE2D: {1402srv_desc.Texture2D.MipLevels = resource_desc.MipLevels;1403} break;1404case D3D12_SRV_DIMENSION_TEXTURE2DMS: {1405} break;1406case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: {1407srv_desc.Texture2DArray.MipLevels = resource_desc.MipLevels;1408srv_desc.Texture2DArray.ArraySize = p_array_layers;1409} break;1410case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: {1411srv_desc.Texture2DMSArray.ArraySize = p_array_layers;1412} break;1413case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: {1414srv_desc.TextureCubeArray.MipLevels = resource_desc.MipLevels;1415srv_desc.TextureCubeArray.NumCubes = p_array_layers / 6;1416} break;1417case D3D12_SRV_DIMENSION_TEXTURE3D: {1418srv_desc.Texture3D.MipLevels = resource_desc.MipLevels;1419} break;1420case D3D12_SRV_DIMENSION_TEXTURECUBE: {1421srv_desc.TextureCube.MipLevels = resource_desc.MipLevels;1422} break;1423default: {1424}1425}1426}14271428D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};1429{1430uav_desc.Format = RD_TO_D3D12_FORMAT[p_format].general_format;1431uav_desc.ViewDimension = resource_desc.SampleDesc.Count == 1 ? RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_UAV[p_type] : D3D12_UAV_DIMENSION_UNKNOWN;14321433switch (uav_desc.ViewDimension) {1434case D3D12_UAV_DIMENSION_TEXTURE1DARRAY: {1435uav_desc.Texture1DArray.ArraySize = p_array_layers;1436} break;1437case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: {1438// Either for an actual 2D texture array, cubemap or cubemap array.1439uav_desc.Texture2DArray.ArraySize = p_array_layers;1440} break;1441case D3D12_UAV_DIMENSION_TEXTURE3D: {1442uav_desc.Texture3D.WSize = resource_desc.Depth();1443} break;1444default: {1445}1446}1447}14481449TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);1450tex_info->resource = texture;1451tex_info->owner_info.resource = nullptr; // Not allocated by us.1452tex_info->owner_info.allocation = nullptr; // Not allocated by us.1453tex_info->owner_info.states.subresource_states.resize(resource_desc.MipLevels * p_array_layers);1454for (uint32_t i = 0; i < tex_info->owner_info.states.subresource_states.size(); i++) {1455tex_info->owner_info.states.subresource_states[i] = !p_depth_stencil ? D3D12_RESOURCE_STATE_RENDER_TARGET : D3D12_RESOURCE_STATE_DEPTH_WRITE;1456}1457tex_info->states_ptr = &tex_info->owner_info.states;1458tex_info->format = p_format;1459#if defined(__GNUC__) && !defined(__clang__)1460#pragma GCC diagnostic push1461#pragma GCC diagnostic ignored "-Wstrict-aliasing"1462#endif1463tex_info->desc = *(CD3DX12_RESOURCE_DESC *)&resource_desc;1464#if defined(__GNUC__) && !defined(__clang__)1465#pragma GCC diagnostic pop1466#endif1467tex_info->base_layer = 0;1468tex_info->layers = p_array_layers;1469tex_info->base_mip = 0;1470tex_info->mipmaps = resource_desc.MipLevels;1471tex_info->view_descs.srv = srv_desc;1472tex_info->view_descs.uav = uav_desc;1473#ifdef DEBUG_ENABLED1474tex_info->created_from_extension = true;1475#endif1476return TextureID(tex_info);1477}14781479RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {1480return _texture_create_shared_from_slice(p_original_texture, p_view, (TextureSliceType)-1, 0, 0, 0, 0);1481}14821483RDD::TextureID RenderingDeviceDriverD3D12::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {1484return _texture_create_shared_from_slice(p_original_texture, p_view, p_slice_type, p_layer, p_layers, p_mipmap, p_mipmaps);1485}14861487RDD::TextureID RenderingDeviceDriverD3D12::_texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {1488TextureInfo *owner_tex_info = (TextureInfo *)p_original_texture.id;1489#ifdef DEBUG_ENABLED1490ERR_FAIL_COND_V(!owner_tex_info->owner_info.allocation, TextureID());1491#endif14921493ComPtr<ID3D12Resource> new_texture;1494ComPtr<D3D12MA::Allocation> new_allocation;1495ID3D12Resource *resource = owner_tex_info->resource;1496CD3DX12_RESOURCE_DESC new_tex_resource_desc = owner_tex_info->desc;14971498// Describe views.14991500D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = owner_tex_info->view_descs.srv;1501{1502srv_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format;1503srv_desc.Shader4ComponentMapping = _compute_component_mapping(p_view);1504}15051506D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = owner_tex_info->view_descs.uav;1507{1508uav_desc.Format = RD_TO_D3D12_FORMAT[p_view.format].general_format;1509}15101511if (p_slice_type != (TextureSliceType)-1) {1512// Complete description with slicing.15131514switch (p_slice_type) {1515case TEXTURE_SLICE_2D: {1516if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer == 0) {1517srv_desc.Texture2D.MostDetailedMip = p_mipmap;1518srv_desc.Texture2D.MipLevels = p_mipmaps;15191520DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2D);1521uav_desc.Texture1D.MipSlice = p_mipmap;1522} else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer == 0) {1523DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_UNKNOWN);1524} else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2D && p_layer)) || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) {1525srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;1526srv_desc.Texture2DArray.MostDetailedMip = p_mipmap;1527srv_desc.Texture2DArray.MipLevels = p_mipmaps;1528srv_desc.Texture2DArray.FirstArraySlice = p_layer;1529srv_desc.Texture2DArray.ArraySize = 1;1530srv_desc.Texture2DArray.PlaneSlice = 0;1531srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f;15321533uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;1534uav_desc.Texture2DArray.MipSlice = p_mipmap;1535uav_desc.Texture2DArray.FirstArraySlice = p_layer;1536uav_desc.Texture2DArray.ArraySize = 1;1537uav_desc.Texture2DArray.PlaneSlice = 0;1538} else if ((srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY || (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DMS && p_layer))) {1539srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;1540srv_desc.Texture2DMSArray.FirstArraySlice = p_layer;1541srv_desc.Texture2DMSArray.ArraySize = 1;15421543uav_desc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN;1544} else {1545DEV_ASSERT(false);1546}1547} break;1548case TEXTURE_SLICE_CUBEMAP: {1549if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || p_layer == 0) {1550srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;1551srv_desc.TextureCube.MostDetailedMip = p_mipmap;1552srv_desc.TextureCube.MipLevels = p_mipmaps;15531554DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY);1555uav_desc.Texture2DArray.MipSlice = p_mipmap;1556uav_desc.Texture2DArray.FirstArraySlice = p_layer;1557uav_desc.Texture2DArray.ArraySize = 6;1558uav_desc.Texture2DArray.PlaneSlice = 0;1559} else if (srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY || p_layer != 0) {1560srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY;1561srv_desc.TextureCubeArray.MostDetailedMip = p_mipmap;1562srv_desc.TextureCubeArray.MipLevels = p_mipmaps;1563srv_desc.TextureCubeArray.First2DArrayFace = p_layer;1564srv_desc.TextureCubeArray.NumCubes = 1;1565srv_desc.TextureCubeArray.ResourceMinLODClamp = 0.0f;15661567DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY);1568uav_desc.Texture2DArray.MipSlice = p_mipmap;1569uav_desc.Texture2DArray.FirstArraySlice = p_layer;1570uav_desc.Texture2DArray.ArraySize = 6;1571uav_desc.Texture2DArray.PlaneSlice = 0;1572} else {1573DEV_ASSERT(false);1574}1575} break;1576case TEXTURE_SLICE_3D: {1577DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D);1578srv_desc.Texture3D.MostDetailedMip = p_mipmap;1579srv_desc.Texture3D.MipLevels = p_mipmaps;15801581DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D);1582uav_desc.Texture3D.MipSlice = p_mipmap;1583uav_desc.Texture3D.WSize = -1;1584} break;1585case TEXTURE_SLICE_2D_ARRAY: {1586DEV_ASSERT(srv_desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE2DARRAY);1587srv_desc.Texture2DArray.MostDetailedMip = p_mipmap;1588srv_desc.Texture2DArray.MipLevels = p_mipmaps;1589srv_desc.Texture2DArray.FirstArraySlice = p_layer;1590srv_desc.Texture2DArray.ArraySize = p_layers;15911592DEV_ASSERT(uav_desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE2DARRAY);1593uav_desc.Texture2DArray.MipSlice = p_mipmap;1594uav_desc.Texture2DArray.FirstArraySlice = p_layer;1595uav_desc.Texture2DArray.ArraySize = p_layers;1596} break;1597default:1598break;1599}1600}16011602// Bookkeep.16031604TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);1605tex_info->resource = resource;1606tex_info->states_ptr = owner_tex_info->states_ptr;1607tex_info->format = p_view.format;1608tex_info->desc = new_tex_resource_desc;1609if (p_slice_type == (TextureSliceType)-1) {1610tex_info->base_layer = owner_tex_info->base_layer;1611tex_info->layers = owner_tex_info->layers;1612tex_info->base_mip = owner_tex_info->base_mip;1613tex_info->mipmaps = owner_tex_info->mipmaps;1614} else {1615tex_info->base_layer = p_layer;1616tex_info->layers = p_layers;1617tex_info->base_mip = p_mipmap;1618tex_info->mipmaps = p_mipmaps;1619}1620tex_info->view_descs.srv = srv_desc;1621tex_info->view_descs.uav = uav_desc;1622tex_info->main_texture = owner_tex_info;16231624return TextureID(tex_info);1625}16261627void RenderingDeviceDriverD3D12::texture_free(TextureID p_texture) {1628TextureInfo *tex_info = (TextureInfo *)p_texture.id;1629VersatileResource::free(resources_allocator, tex_info);1630}16311632uint64_t RenderingDeviceDriverD3D12::texture_get_allocation_size(TextureID p_texture) {1633const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;1634return tex_info->owner_info.allocation ? tex_info->owner_info.allocation->GetSize() : 0;1635}16361637void RenderingDeviceDriverD3D12::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {1638TextureInfo *tex_info = (TextureInfo *)p_texture.id;16391640UINT subresource = tex_info->desc.CalcSubresource(p_subresource.mipmap, p_subresource.layer, 0);16411642D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {};1643UINT64 subresource_total_size = 0;1644device->GetCopyableFootprints(1645&tex_info->desc,1646subresource,16471,16480,1649&footprint,1650nullptr,1651nullptr,1652&subresource_total_size);16531654*r_layout = {};1655r_layout->offset = footprint.Offset;1656r_layout->size = subresource_total_size;1657r_layout->row_pitch = footprint.Footprint.RowPitch;1658r_layout->depth_pitch = subresource_total_size / tex_info->desc.Depth();1659r_layout->layer_pitch = subresource_total_size / tex_info->desc.ArraySize();1660}16611662uint8_t *RenderingDeviceDriverD3D12::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) {1663TextureInfo *tex_info = (TextureInfo *)p_texture.id;1664#ifdef DEBUG_ENABLED1665ERR_FAIL_COND_V(tex_info->mapped_subresource != UINT_MAX, nullptr);1666#endif16671668UINT plane = _compute_plane_slice(tex_info->format, p_subresource.aspect);1669UINT subresource = tex_info->desc.CalcSubresource(p_subresource.mipmap, p_subresource.layer, plane);16701671void *data_ptr = nullptr;1672HRESULT res = tex_info->resource->Map(subresource, &VOID_RANGE, &data_ptr);1673ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), nullptr, "Map failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");1674tex_info->mapped_subresource = subresource;1675return (uint8_t *)data_ptr;1676}16771678void RenderingDeviceDriverD3D12::texture_unmap(TextureID p_texture) {1679TextureInfo *tex_info = (TextureInfo *)p_texture.id;1680#ifdef DEBUG_ENABLED1681ERR_FAIL_COND(tex_info->mapped_subresource == UINT_MAX);1682#endif1683tex_info->resource->Unmap(tex_info->mapped_subresource, &VOID_RANGE);1684tex_info->mapped_subresource = UINT_MAX;1685}16861687BitField<RDD::TextureUsageBits> RenderingDeviceDriverD3D12::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {1688D3D12_FEATURE_DATA_FORMAT_SUPPORT srv_rtv_support = {};1689srv_rtv_support.Format = RD_TO_D3D12_FORMAT[p_format].general_format;1690if (srv_rtv_support.Format != DXGI_FORMAT_UNKNOWN) { // Some implementations (i.e., vkd3d-proton) error out instead of returning empty.1691HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &srv_rtv_support, sizeof(srv_rtv_support));1692ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");1693}16941695D3D12_FEATURE_DATA_FORMAT_SUPPORT &uav_support = srv_rtv_support; // Fine for now.16961697D3D12_FEATURE_DATA_FORMAT_SUPPORT dsv_support = {};1698dsv_support.Format = RD_TO_D3D12_FORMAT[p_format].dsv_format;1699if (dsv_support.Format != DXGI_FORMAT_UNKNOWN) { // See above.1700HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &dsv_support, sizeof(dsv_support));1701ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");1702}17031704// Everything supported by default makes an all-or-nothing check easier for the caller.1705BitField<RDD::TextureUsageBits> supported = INT64_MAX;17061707// Per https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_format_support1,1708// as long as the resource can be used as a texture, Sample() will work with point filter at least.1709// However, we've empirically found that checking for at least D3D12_FORMAT_SUPPORT1_SHADER_LOAD is needed.1710// That's almost good for integer formats. The problem is that theoretically there may be1711// float formats that support LOAD but not SAMPLE fully, so this check will not detect1712// such a flaw in the format. Linearly interpolated sampling would just not work on them.1713if (!(srv_rtv_support.Support1 & (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE)) ||1714RD_TO_D3D12_FORMAT[p_format].general_format == DXGI_FORMAT_UNKNOWN) {1715supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT);1716}17171718if (!(srv_rtv_support.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET)) {1719supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);1720}1721if (!(dsv_support.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL)) {1722supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);1723}1724if (!(uav_support.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) { // Maybe check LOAD/STORE, too?1725supported.clear_flag(TEXTURE_USAGE_STORAGE_BIT);1726}1727if (!(uav_support.Support2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD)) { // Check a basic atomic at least.1728supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT);1729}1730if (RD_TO_D3D12_FORMAT[p_format].general_format != DXGI_FORMAT_R8_UINT) {1731supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT);1732}17331734return supported;1735}17361737bool RenderingDeviceDriverD3D12::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {1738r_raw_reinterpretation = false;17391740if (format_capabilities.relaxed_casting_supported) {1741// Relaxed casting is supported, there should be no need to check for format family compatibility.1742return true;1743} else {1744TextureInfo *tex_info = (TextureInfo *)p_texture.id;1745if (tex_info->format == DATA_FORMAT_R16_UINT && p_format == DATA_FORMAT_R4G4B4A4_UNORM_PACK16) {1746// Specific cases that require buffer reinterpretation.1747r_raw_reinterpretation = true;1748return false;1749} else if (RD_TO_D3D12_FORMAT[tex_info->format].family != RD_TO_D3D12_FORMAT[p_format].family) {1750// Format family is different but copying resources directly is possible.1751return false;1752} else {1753// Format family is the same and the view can just cast the format.1754return true;1755}1756}1757}17581759/*****************/1760/**** SAMPLER ****/1761/*****************/17621763static const D3D12_TEXTURE_ADDRESS_MODE RD_REPEAT_MODE_TO_D3D12_ADDRESS_MODE[RDD::SAMPLER_REPEAT_MODE_MAX] = {1764D3D12_TEXTURE_ADDRESS_MODE_WRAP,1765D3D12_TEXTURE_ADDRESS_MODE_MIRROR,1766D3D12_TEXTURE_ADDRESS_MODE_CLAMP,1767D3D12_TEXTURE_ADDRESS_MODE_BORDER,1768D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE,1769};17701771static const FLOAT RD_TO_D3D12_SAMPLER_BORDER_COLOR[RDD::SAMPLER_BORDER_COLOR_MAX][4] = {1772{ 0, 0, 0, 0 },1773{ 0, 0, 0, 0 },1774{ 0, 0, 0, 1 },1775{ 0, 0, 0, 1 },1776{ 1, 1, 1, 1 },1777{ 1, 1, 1, 1 },1778};17791780RDD::SamplerID RenderingDeviceDriverD3D12::sampler_create(const SamplerState &p_state) {1781uint32_t slot = UINT32_MAX;17821783if (samplers.is_empty()) {1784// Adding a seemigly busy slot 0 makes things easier elsewhere.1785samplers.push_back({});1786samplers.push_back({});1787slot = 1;1788} else {1789for (uint32_t i = 1; i < samplers.size(); i++) {1790if ((int)samplers[i].Filter == INT_MAX) {1791slot = i;1792break;1793}1794}1795if (slot == UINT32_MAX) {1796slot = samplers.size();1797samplers.push_back({});1798}1799}18001801D3D12_SAMPLER_DESC &sampler_desc = samplers[slot];18021803if (p_state.use_anisotropy) {1804sampler_desc.Filter = D3D12_ENCODE_ANISOTROPIC_FILTER(D3D12_FILTER_REDUCTION_TYPE_STANDARD);1805sampler_desc.MaxAnisotropy = p_state.anisotropy_max;1806} else {1807static const D3D12_FILTER_TYPE RD_FILTER_TYPE_TO_D3D12[] = {1808D3D12_FILTER_TYPE_POINT, // SAMPLER_FILTER_NEAREST.1809D3D12_FILTER_TYPE_LINEAR, // SAMPLER_FILTER_LINEAR.1810};1811sampler_desc.Filter = D3D12_ENCODE_BASIC_FILTER(1812RD_FILTER_TYPE_TO_D3D12[p_state.min_filter],1813RD_FILTER_TYPE_TO_D3D12[p_state.mag_filter],1814RD_FILTER_TYPE_TO_D3D12[p_state.mip_filter],1815p_state.enable_compare ? D3D12_FILTER_REDUCTION_TYPE_COMPARISON : D3D12_FILTER_REDUCTION_TYPE_STANDARD);1816}18171818sampler_desc.AddressU = RD_REPEAT_MODE_TO_D3D12_ADDRESS_MODE[p_state.repeat_u];1819sampler_desc.AddressV = RD_REPEAT_MODE_TO_D3D12_ADDRESS_MODE[p_state.repeat_v];1820sampler_desc.AddressW = RD_REPEAT_MODE_TO_D3D12_ADDRESS_MODE[p_state.repeat_w];18211822for (int i = 0; i < 4; i++) {1823sampler_desc.BorderColor[i] = RD_TO_D3D12_SAMPLER_BORDER_COLOR[p_state.border_color][i];1824}18251826sampler_desc.MinLOD = p_state.min_lod;1827sampler_desc.MaxLOD = p_state.max_lod;1828sampler_desc.MipLODBias = p_state.lod_bias;18291830sampler_desc.ComparisonFunc = p_state.enable_compare ? RD_TO_D3D12_COMPARE_OP[p_state.compare_op] : D3D12_COMPARISON_FUNC_NEVER;18311832// TODO: Emulate somehow?1833if (p_state.unnormalized_uvw) {1834WARN_PRINT("Creating a sampler with unnormalized UVW, which is not supported.");1835}18361837return SamplerID(slot);1838}18391840void RenderingDeviceDriverD3D12::sampler_free(SamplerID p_sampler) {1841samplers[p_sampler.id].Filter = (D3D12_FILTER)INT_MAX;1842}18431844bool RenderingDeviceDriverD3D12::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {1845D3D12_FEATURE_DATA_FORMAT_SUPPORT srv_rtv_support = {};1846srv_rtv_support.Format = RD_TO_D3D12_FORMAT[p_format].general_format;1847HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &srv_rtv_support, sizeof(srv_rtv_support));1848ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");1849return (srv_rtv_support.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE);1850}18511852/**********************/1853/**** VERTEX ARRAY ****/1854/**********************/18551856RDD::VertexFormatID RenderingDeviceDriverD3D12::vertex_format_create(VectorView<VertexAttribute> p_vertex_attribs) {1857VertexFormatInfo *vf_info = VersatileResource::allocate<VertexFormatInfo>(resources_allocator);18581859vf_info->input_elem_descs.resize(p_vertex_attribs.size());1860vf_info->vertex_buffer_strides.resize(p_vertex_attribs.size());1861for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) {1862vf_info->input_elem_descs[i] = {};1863vf_info->input_elem_descs[i].SemanticName = "TEXCOORD";1864vf_info->input_elem_descs[i].SemanticIndex = p_vertex_attribs[i].location;1865vf_info->input_elem_descs[i].Format = RD_TO_D3D12_FORMAT[p_vertex_attribs[i].format].general_format;1866vf_info->input_elem_descs[i].InputSlot = i; // TODO: Can the same slot be used if data comes from the same buffer (regardless format)?1867vf_info->input_elem_descs[i].AlignedByteOffset = p_vertex_attribs[i].offset;1868if (p_vertex_attribs[i].frequency == VERTEX_FREQUENCY_INSTANCE) {1869vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;1870vf_info->input_elem_descs[i].InstanceDataStepRate = 1;1871} else {1872vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;1873vf_info->input_elem_descs[i].InstanceDataStepRate = 0;1874}18751876vf_info->vertex_buffer_strides[i] = p_vertex_attribs[i].stride;1877}18781879return VertexFormatID(vf_info);1880}18811882void RenderingDeviceDriverD3D12::vertex_format_free(VertexFormatID p_vertex_format) {1883VertexFormatInfo *vf_info = (VertexFormatInfo *)p_vertex_format.id;1884VersatileResource::free(resources_allocator, vf_info);1885}18861887/******************/1888/**** BARRIERS ****/1889/******************/18901891static D3D12_BARRIER_ACCESS _rd_texture_layout_access_mask(RDD::TextureLayout p_texture_layout) {1892switch (p_texture_layout) {1893case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:1894return D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;1895case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:1896return D3D12_BARRIER_ACCESS_RENDER_TARGET;1897case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:1898return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;1899case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:1900return D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;1901case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:1902return D3D12_BARRIER_ACCESS_SHADER_RESOURCE;1903case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:1904return D3D12_BARRIER_ACCESS_COPY_SOURCE;1905case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:1906return D3D12_BARRIER_ACCESS_COPY_DEST;1907case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:1908return D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;1909case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:1910return D3D12_BARRIER_ACCESS_RESOLVE_DEST;1911case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL:1912return D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;1913case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL:1914DEV_ASSERT(false && "Fragment density maps are not supported in D3D12.");1915return D3D12_BARRIER_ACCESS_NO_ACCESS;1916default:1917return D3D12_BARRIER_ACCESS_NO_ACCESS;1918}1919}19201921static void _rd_access_to_d3d12_and_mask(BitField<RDD::BarrierAccessBits> p_access, RDD::TextureLayout p_texture_layout, D3D12_BARRIER_ACCESS &r_access, D3D12_BARRIER_SYNC &r_sync_mask) {1922r_access = D3D12_BARRIER_ACCESS_COMMON;1923r_sync_mask = D3D12_BARRIER_SYNC_NONE;19241925if (p_access.has_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT)) {1926r_access |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT;1927r_sync_mask |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;1928}19291930if (p_access.has_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT)) {1931r_access |= D3D12_BARRIER_ACCESS_INDEX_BUFFER;1932r_sync_mask |= D3D12_BARRIER_SYNC_INDEX_INPUT | D3D12_BARRIER_SYNC_DRAW;1933}19341935if (p_access.has_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)) {1936r_access |= D3D12_BARRIER_ACCESS_VERTEX_BUFFER;1937r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;1938}19391940if (p_access.has_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT)) {1941r_access |= D3D12_BARRIER_ACCESS_CONSTANT_BUFFER;1942r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |1943D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;1944}19451946if (p_access.has_flag(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT)) {1947r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;1948r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;1949}19501951if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT)) {1952r_access |= D3D12_BARRIER_ACCESS_COPY_SOURCE;1953r_sync_mask |= D3D12_BARRIER_SYNC_COPY;1954}19551956if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT)) {1957r_access |= D3D12_BARRIER_ACCESS_COPY_DEST;1958r_sync_mask |= D3D12_BARRIER_SYNC_COPY;1959}19601961if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) {1962r_access |= D3D12_BARRIER_ACCESS_RESOLVE_SOURCE;1963r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;1964}19651966if (p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) {1967r_access |= D3D12_BARRIER_ACCESS_RESOLVE_DEST;1968r_sync_mask |= D3D12_BARRIER_SYNC_RESOLVE;1969}19701971if (p_access.has_flag(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT)) {1972r_access |= D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE;1973r_sync_mask |= D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_ALL_SHADING;1974}19751976const D3D12_BARRIER_SYNC unordered_access_mask = D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING |1977D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;19781979if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) {1980r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;1981r_sync_mask |= unordered_access_mask;1982}19831984// These access bits only have compatibility with certain layouts unlike in Vulkan where they imply specific operations in the same layout.1985if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT)) {1986r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;1987r_sync_mask |= unordered_access_mask;1988} else if (p_access.has_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT)) {1989if (p_texture_layout == RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL) {1990// Unordered access must be enforced if the texture is using the storage layout.1991r_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;1992r_sync_mask |= unordered_access_mask;1993} else {1994r_access |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE;1995r_sync_mask |= D3D12_BARRIER_SYNC_VERTEX_SHADING | D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_COMPUTE_SHADING | D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_ALL_SHADING;1996}1997}19981999if (p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT)) {2000r_access |= D3D12_BARRIER_ACCESS_RENDER_TARGET;2001r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_RENDER_TARGET;2002}20032004if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {2005r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;2006r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;2007} else if (p_access.has_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT)) {2008r_access |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;2009r_sync_mask |= D3D12_BARRIER_SYNC_DRAW | D3D12_BARRIER_SYNC_DEPTH_STENCIL;2010}2011}20122013static void _rd_stages_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, D3D12_BARRIER_SYNC &r_sync) {2014if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) {2015r_sync = D3D12_BARRIER_SYNC_ALL;2016} else {2017if (p_stages.has_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT)) {2018r_sync |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT;2019}20202021if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT)) {2022r_sync |= D3D12_BARRIER_SYNC_INDEX_INPUT;2023}20242025if (p_stages.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT)) {2026r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;2027}20282029if (p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {2030// There's no granularity for tessellation or geometry stages. The specification defines it as part of vertex shading.2031r_sync |= D3D12_BARRIER_SYNC_VERTEX_SHADING;2032}20332034if (p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT)) {2035r_sync |= D3D12_BARRIER_SYNC_PIXEL_SHADING;2036}20372038if (p_stages.has_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT)) {2039// Covers both read and write operations for depth stencil.2040r_sync |= D3D12_BARRIER_SYNC_DEPTH_STENCIL;2041}20422043if (p_stages.has_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {2044r_sync |= D3D12_BARRIER_SYNC_RENDER_TARGET;2045}20462047if (p_stages.has_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {2048r_sync |= D3D12_BARRIER_SYNC_COMPUTE_SHADING;2049}20502051if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT)) {2052r_sync |= D3D12_BARRIER_SYNC_COPY;2053}20542055if (p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) {2056r_sync |= D3D12_BARRIER_SYNC_RESOLVE;2057}20582059if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {2060r_sync |= D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;2061}20622063if (p_stages.has_flag(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT)) {2064r_sync |= D3D12_BARRIER_SYNC_DRAW;2065}2066}2067}20682069static void _rd_stages_and_access_to_d3d12(BitField<RDD::PipelineStageBits> p_stages, RDD::TextureLayout p_texture_layout, BitField<RDD::BarrierAccessBits> p_access, D3D12_BARRIER_SYNC &r_sync, D3D12_BARRIER_ACCESS &r_access) {2070D3D12_BARRIER_SYNC sync_mask;2071r_sync = D3D12_BARRIER_SYNC_NONE;20722073if (p_texture_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) {2074// Undefined texture layouts are a special case where no access bits or synchronization scopes are allowed.2075r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;2076return;2077}20782079// Convert access bits to the D3D12 barrier access bits.2080_rd_access_to_d3d12_and_mask(p_access, p_texture_layout, r_access, sync_mask);20812082if (p_texture_layout != RDD::TEXTURE_LAYOUT_MAX) {2083// Only allow the access bits compatible with the texture layout.2084r_access &= _rd_texture_layout_access_mask(p_texture_layout);2085}20862087// Convert stage bits to the D3D12 synchronization scope bits.2088_rd_stages_to_d3d12(p_stages, r_sync);20892090// Only enable synchronization stages compatible with the access bits that were used.2091r_sync &= sync_mask;20922093if (r_sync == D3D12_BARRIER_SYNC_NONE) {2094if (p_access.is_empty()) {2095// No valid synchronization scope was defined and no access in particular is required.2096r_access = D3D12_BARRIER_ACCESS_NO_ACCESS;2097} else {2098// Access is required but the synchronization scope wasn't compatible. We fall back to the global synchronization scope and access.2099r_sync = D3D12_BARRIER_SYNC_ALL;2100r_access = D3D12_BARRIER_ACCESS_COMMON;2101}2102}2103}21042105static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::TextureLayout p_texture_layout) {2106switch (p_texture_layout) {2107case RDD::TEXTURE_LAYOUT_UNDEFINED:2108return D3D12_BARRIER_LAYOUT_UNDEFINED;2109case RDD::TEXTURE_LAYOUT_GENERAL:2110return D3D12_BARRIER_LAYOUT_COMMON;2111case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:2112return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;2113case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:2114return D3D12_BARRIER_LAYOUT_RENDER_TARGET;2115case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:2116return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;2117case RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:2118return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ;2119case RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:2120return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE;2121case RDD::TEXTURE_LAYOUT_COPY_SRC_OPTIMAL:2122return D3D12_BARRIER_LAYOUT_COPY_SOURCE;2123case RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL:2124return D3D12_BARRIER_LAYOUT_COPY_DEST;2125case RDD::TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL:2126return D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE;2127case RDD::TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL:2128return D3D12_BARRIER_LAYOUT_RESOLVE_DEST;2129case RDD::TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL:2130return D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE;2131case RDD::TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL:2132DEV_ASSERT(false && "Fragment density maps are not supported in D3D12.");2133return D3D12_BARRIER_LAYOUT_UNDEFINED;2134default:2135DEV_ASSERT(false && "Unknown texture layout.");2136return D3D12_BARRIER_LAYOUT_UNDEFINED;2137}2138}21392140void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_buffer,2141BitField<PipelineStageBits> p_src_stages,2142BitField<PipelineStageBits> p_dst_stages,2143VectorView<RDD::MemoryBarrier> p_memory_barriers,2144VectorView<RDD::BufferBarrier> p_buffer_barriers,2145VectorView<RDD::TextureBarrier> p_texture_barriers) {2146if (!barrier_capabilities.enhanced_barriers_supported) {2147// Enhanced barriers are a requirement for this function.2148return;2149}21502151if (p_memory_barriers.size() == 0 && p_buffer_barriers.size() == 0 && p_texture_barriers.size() == 0) {2152// At least one barrier must be present in the arguments.2153return;2154}21552156// The command list must support the required interface.2157const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id);2158ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr;2159HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7));2160ERR_FAIL_COND(FAILED(res));21612162// Convert the RDD barriers to D3D12 enhanced barriers.2163thread_local LocalVector<D3D12_GLOBAL_BARRIER> global_barriers;2164thread_local LocalVector<D3D12_BUFFER_BARRIER> buffer_barriers;2165thread_local LocalVector<D3D12_TEXTURE_BARRIER> texture_barriers;2166global_barriers.clear();2167buffer_barriers.clear();2168texture_barriers.clear();21692170D3D12_GLOBAL_BARRIER global_barrier = {};2171for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {2172const MemoryBarrier &memory_barrier = p_memory_barriers[i];2173_rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.src_access, global_barrier.SyncBefore, global_barrier.AccessBefore);2174_rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, memory_barrier.dst_access, global_barrier.SyncAfter, global_barrier.AccessAfter);2175global_barriers.push_back(global_barrier);2176}21772178D3D12_BUFFER_BARRIER buffer_barrier_d3d12 = {};2179buffer_barrier_d3d12.Offset = 0;2180buffer_barrier_d3d12.Size = UINT64_MAX; // The specification says this must be the size of the buffer barrier.2181for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {2182const BufferBarrier &buffer_barrier_rd = p_buffer_barriers[i];2183const BufferInfo *buffer_info = (const BufferInfo *)(buffer_barrier_rd.buffer.id);2184_rd_stages_and_access_to_d3d12(p_src_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.src_access, buffer_barrier_d3d12.SyncBefore, buffer_barrier_d3d12.AccessBefore);2185_rd_stages_and_access_to_d3d12(p_dst_stages, RDD::TEXTURE_LAYOUT_MAX, buffer_barrier_rd.dst_access, buffer_barrier_d3d12.SyncAfter, buffer_barrier_d3d12.AccessAfter);2186buffer_barrier_d3d12.pResource = buffer_info->resource;2187buffer_barriers.push_back(buffer_barrier_d3d12);2188}21892190D3D12_TEXTURE_BARRIER texture_barrier_d3d12 = {};2191for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {2192const TextureBarrier &texture_barrier_rd = p_texture_barriers[i];2193const TextureInfo *texture_info = (const TextureInfo *)(texture_barrier_rd.texture.id);2194if (texture_info->main_texture) {2195texture_info = texture_info->main_texture;2196}2197// Textures created for simultaneous access do not need explicit transitions.2198if (texture_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) {2199continue;2200}2201_rd_stages_and_access_to_d3d12(p_src_stages, texture_barrier_rd.prev_layout, texture_barrier_rd.src_access, texture_barrier_d3d12.SyncBefore, texture_barrier_d3d12.AccessBefore);2202_rd_stages_and_access_to_d3d12(p_dst_stages, texture_barrier_rd.next_layout, texture_barrier_rd.dst_access, texture_barrier_d3d12.SyncAfter, texture_barrier_d3d12.AccessAfter);2203texture_barrier_d3d12.LayoutBefore = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.prev_layout);2204texture_barrier_d3d12.LayoutAfter = _rd_texture_layout_to_d3d12_barrier_layout(texture_barrier_rd.next_layout);2205texture_barrier_d3d12.pResource = texture_info->resource;2206if (texture_barrier_rd.subresources.mipmap_count == texture_info->mipmaps && texture_barrier_rd.subresources.layer_count == texture_info->layers) {2207// So, all resources. Then, let's be explicit about it so D3D12 doesn't think2208// we are dealing with a subset of subresources.2209texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = 0xffffffff;2210texture_barrier_d3d12.Subresources.NumMipLevels = 0;2211// Because NumMipLevels == 0, all the other fields are ignored by D3D12.2212} else {2213texture_barrier_d3d12.Subresources.IndexOrFirstMipLevel = texture_barrier_rd.subresources.base_mipmap;2214texture_barrier_d3d12.Subresources.NumMipLevels = texture_barrier_rd.subresources.mipmap_count;2215texture_barrier_d3d12.Subresources.FirstArraySlice = texture_barrier_rd.subresources.base_layer;2216texture_barrier_d3d12.Subresources.NumArraySlices = texture_barrier_rd.subresources.layer_count;2217texture_barrier_d3d12.Subresources.FirstPlane = _compute_plane_slice(texture_info->format, texture_barrier_rd.subresources.aspect);2218texture_barrier_d3d12.Subresources.NumPlanes = format_get_plane_count(texture_info->format);2219}2220texture_barrier_d3d12.Flags = (texture_barrier_rd.prev_layout == RDD::TEXTURE_LAYOUT_UNDEFINED) ? D3D12_TEXTURE_BARRIER_FLAG_DISCARD : D3D12_TEXTURE_BARRIER_FLAG_NONE;2221texture_barriers.push_back(texture_barrier_d3d12);2222}22232224// Define the barrier groups and execute.22252226D3D12_BARRIER_GROUP barrier_groups[3] = {};2227uint32_t barrier_groups_count = 0;22282229if (!global_barriers.is_empty()) {2230D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];2231barrier_group.Type = D3D12_BARRIER_TYPE_GLOBAL;2232barrier_group.NumBarriers = global_barriers.size();2233barrier_group.pGlobalBarriers = global_barriers.ptr();2234}22352236if (!buffer_barriers.is_empty()) {2237D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];2238barrier_group.Type = D3D12_BARRIER_TYPE_BUFFER;2239barrier_group.NumBarriers = buffer_barriers.size();2240barrier_group.pBufferBarriers = buffer_barriers.ptr();2241}22422243if (!texture_barriers.is_empty()) {2244D3D12_BARRIER_GROUP &barrier_group = barrier_groups[barrier_groups_count++];2245barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE;2246barrier_group.NumBarriers = texture_barriers.size();2247barrier_group.pTextureBarriers = texture_barriers.ptr();2248}22492250if (barrier_groups_count) {2251cmd_list_7->Barrier(barrier_groups_count, barrier_groups);2252}2253}22542255/****************/2256/**** FENCES ****/2257/****************/22582259RDD::FenceID RenderingDeviceDriverD3D12::fence_create() {2260ComPtr<ID3D12Fence> d3d_fence;2261HRESULT res = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(d3d_fence.GetAddressOf()));2262ERR_FAIL_COND_V(!SUCCEEDED(res), FenceID());22632264HANDLE event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr);2265ERR_FAIL_NULL_V(event_handle, FenceID());22662267FenceInfo *fence = memnew(FenceInfo);2268fence->d3d_fence = d3d_fence;2269fence->event_handle = event_handle;2270return FenceID(fence);2271}22722273Error RenderingDeviceDriverD3D12::fence_wait(FenceID p_fence) {2274FenceInfo *fence = (FenceInfo *)(p_fence.id);2275fence->d3d_fence->SetEventOnCompletion(fence->fence_value, fence->event_handle);2276DWORD res = WaitForSingleObjectEx(fence->event_handle, INFINITE, FALSE);2277#ifdef PIX_ENABLED2278PIXNotifyWakeFromFenceSignal(fence->event_handle);2279#endif22802281return (res == WAIT_FAILED) ? FAILED : OK;2282}22832284void RenderingDeviceDriverD3D12::fence_free(FenceID p_fence) {2285FenceInfo *fence = (FenceInfo *)(p_fence.id);2286CloseHandle(fence->event_handle);2287memdelete(fence);2288}22892290/********************/2291/**** SEMAPHORES ****/2292/********************/22932294RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create() {2295ComPtr<ID3D12Fence> d3d_fence;2296HRESULT res = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(d3d_fence.GetAddressOf()));2297ERR_FAIL_COND_V(!SUCCEEDED(res), SemaphoreID());22982299SemaphoreInfo *semaphore = memnew(SemaphoreInfo);2300semaphore->d3d_fence = d3d_fence;2301return SemaphoreID(semaphore);2302}23032304void RenderingDeviceDriverD3D12::semaphore_free(SemaphoreID p_semaphore) {2305SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_semaphore.id);2306memdelete(semaphore);2307}23082309/******************/2310/**** COMMANDS ****/2311/******************/23122313// ----- QUEUE FAMILY -----23142315RDD::CommandQueueFamilyID RenderingDeviceDriverD3D12::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) {2316// Return the command list type encoded plus one so zero is an invalid value.2317// The only ones that support presenting to a surface are direct queues.2318if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) {2319return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_DIRECT + 1);2320} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) {2321return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_COMPUTE + 1);2322} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) {2323return CommandQueueFamilyID(D3D12_COMMAND_LIST_TYPE_COPY + 1);2324} else {2325return CommandQueueFamilyID();2326}2327}23282329// ----- QUEUE -----23302331RDD::CommandQueueID RenderingDeviceDriverD3D12::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) {2332ComPtr<ID3D12CommandQueue> d3d_queue;2333D3D12_COMMAND_QUEUE_DESC queue_desc = {};2334queue_desc.Type = (D3D12_COMMAND_LIST_TYPE)(p_cmd_queue_family.id - 1);2335HRESULT res = device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(d3d_queue.GetAddressOf()));2336ERR_FAIL_COND_V(!SUCCEEDED(res), CommandQueueID());23372338if (p_identify_as_main_queue && D3D12Hooks::get_singleton() != nullptr) {2339D3D12Hooks::get_singleton()->set_command_queue(d3d_queue.Get());2340}23412342CommandQueueInfo *command_queue = memnew(CommandQueueInfo);2343command_queue->d3d_queue = d3d_queue;2344return CommandQueueID(command_queue);2345}23462347Error RenderingDeviceDriverD3D12::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) {2348CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id);2349for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {2350const SemaphoreInfo *semaphore = (const SemaphoreInfo *)(p_wait_semaphores[i].id);2351command_queue->d3d_queue->Wait(semaphore->d3d_fence.Get(), semaphore->fence_value);2352}23532354if (p_cmd_buffers.size() > 0) {2355thread_local LocalVector<ID3D12CommandList *> command_lists;2356command_lists.resize(p_cmd_buffers.size());2357for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {2358const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffers[i].id);2359command_lists[i] = cmd_buf_info->cmd_list.Get();2360}23612362command_queue->d3d_queue->ExecuteCommandLists(command_lists.size(), command_lists.ptr());23632364for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) {2365SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_cmd_semaphores[i].id);2366semaphore->fence_value++;2367command_queue->d3d_queue->Signal(semaphore->d3d_fence.Get(), semaphore->fence_value);2368}23692370if (p_cmd_fence) {2371FenceInfo *fence = (FenceInfo *)(p_cmd_fence.id);2372fence->fence_value++;2373command_queue->d3d_queue->Signal(fence->d3d_fence.Get(), fence->fence_value);2374}2375}23762377HRESULT res;2378bool any_present_failed = false;2379for (uint32_t i = 0; i < p_swap_chains.size(); i++) {2380SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);2381res = swap_chain->d3d_swap_chain->Present(swap_chain->sync_interval, swap_chain->present_flags);2382if (!SUCCEEDED(res)) {2383print_verbose(vformat("D3D12: Presenting swapchain failed with error 0x%08ux.", (uint64_t)res));2384any_present_failed = true;2385}2386}23872388return any_present_failed ? FAILED : OK;2389}23902391void RenderingDeviceDriverD3D12::command_queue_free(CommandQueueID p_cmd_queue) {2392CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id);2393memdelete(command_queue);2394}23952396// ----- POOL -----23972398RDD::CommandPoolID RenderingDeviceDriverD3D12::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) {2399CommandPoolInfo *command_pool = memnew(CommandPoolInfo);2400command_pool->queue_family = p_cmd_queue_family;2401command_pool->buffer_type = p_cmd_buffer_type;2402return CommandPoolID(command_pool);2403}24042405bool RenderingDeviceDriverD3D12::command_pool_reset(CommandPoolID p_cmd_pool) {2406return true;2407}24082409void RenderingDeviceDriverD3D12::command_pool_free(CommandPoolID p_cmd_pool) {2410CommandPoolInfo *command_pool = (CommandPoolInfo *)(p_cmd_pool.id);24112412// Destroy all command buffers associated with this command pool, mirroring Vulkan's behavior.2413SelfList<CommandBufferInfo> *cmd_buf_elem = command_pool->command_buffers.first();2414while (cmd_buf_elem != nullptr) {2415CommandBufferInfo *cmd_buf_info = cmd_buf_elem->self();2416cmd_buf_elem = cmd_buf_elem->next();24172418cmd_buf_info->cmd_list.Reset();2419cmd_buf_info->cmd_allocator.Reset();24202421VersatileResource::free(resources_allocator, cmd_buf_info);2422}24232424memdelete(command_pool);2425}24262427// ----- BUFFER -----24282429RDD::CommandBufferID RenderingDeviceDriverD3D12::command_buffer_create(CommandPoolID p_cmd_pool) {2430DEV_ASSERT(p_cmd_pool);24312432CommandPoolInfo *command_pool = (CommandPoolInfo *)(p_cmd_pool.id);2433D3D12_COMMAND_LIST_TYPE list_type;2434if (command_pool->buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) {2435list_type = D3D12_COMMAND_LIST_TYPE_BUNDLE;2436} else {2437list_type = D3D12_COMMAND_LIST_TYPE(command_pool->queue_family.id - 1);2438}24392440ID3D12CommandAllocator *cmd_allocator = nullptr;2441{2442HRESULT res = device->CreateCommandAllocator(list_type, IID_PPV_ARGS(&cmd_allocator));2443ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), CommandBufferID(), "CreateCommandAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2444}24452446ID3D12GraphicsCommandList *cmd_list = nullptr;2447{2448ComPtr<ID3D12Device4> device_4;2449device->QueryInterface(device_4.GetAddressOf());2450HRESULT res = E_FAIL;2451if (device_4) {2452res = device_4->CreateCommandList1(0, list_type, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&cmd_list));2453} else {2454res = device->CreateCommandList(0, list_type, cmd_allocator, nullptr, IID_PPV_ARGS(&cmd_list));2455}2456ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), CommandBufferID(), "CreateCommandList failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2457if (!device_4) {2458cmd_list->Close();2459}2460}24612462// Bookkeep24632464CommandBufferInfo *cmd_buf_info = VersatileResource::allocate<CommandBufferInfo>(resources_allocator);2465cmd_buf_info->cmd_allocator = cmd_allocator;2466cmd_buf_info->cmd_list = cmd_list;24672468// Add this command buffer to the command pool's list of command buffers.2469command_pool->command_buffers.add(&cmd_buf_info->command_buffer_info_elem);24702471return CommandBufferID(cmd_buf_info);2472}24732474bool RenderingDeviceDriverD3D12::command_buffer_begin(CommandBufferID p_cmd_buffer) {2475const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;2476HRESULT res = cmd_buf_info->cmd_allocator->Reset();2477ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2478res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr);2479ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2480return true;2481}24822483bool RenderingDeviceDriverD3D12::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) {2484const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;2485HRESULT res = cmd_buf_info->cmd_allocator->Reset();2486ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2487res = cmd_buf_info->cmd_list->Reset(cmd_buf_info->cmd_allocator.Get(), nullptr);2488ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), false, "Reset failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2489return true;2490}24912492void RenderingDeviceDriverD3D12::command_buffer_end(CommandBufferID p_cmd_buffer) {2493CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;2494HRESULT res = cmd_buf_info->cmd_list->Close();24952496ERR_FAIL_COND_MSG(!SUCCEEDED(res), "Close failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");2497cmd_buf_info->graphics_pso = nullptr;2498cmd_buf_info->graphics_root_signature_crc = 0;2499cmd_buf_info->compute_pso = nullptr;2500cmd_buf_info->compute_root_signature_crc = 0;2501cmd_buf_info->descriptor_heaps_set = false;2502}25032504void RenderingDeviceDriverD3D12::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) {2505const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;2506for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) {2507const CommandBufferInfo *secondary_cb_info = (const CommandBufferInfo *)p_secondary_cmd_buffers[i].id;2508cmd_buf_info->cmd_list->ExecuteBundle(secondary_cb_info->cmd_list.Get());2509}2510}25112512/********************/2513/**** SWAP CHAIN ****/2514/********************/25152516void RenderingDeviceDriverD3D12::_swap_chain_release(SwapChain *p_swap_chain) {2517_swap_chain_release_buffers(p_swap_chain);25182519p_swap_chain->d3d_swap_chain.Reset();2520}25212522void RenderingDeviceDriverD3D12::_swap_chain_release_buffers(SwapChain *p_swap_chain) {2523for (ID3D12Resource *render_target : p_swap_chain->render_targets) {2524render_target->Release();2525}25262527p_swap_chain->render_targets.clear();2528p_swap_chain->render_targets_info.clear();25292530for (RDD::FramebufferID framebuffer : p_swap_chain->framebuffers) {2531framebuffer_free(framebuffer);2532}25332534p_swap_chain->framebuffers.clear();2535}25362537RDD::SwapChainID RenderingDeviceDriverD3D12::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) {2538// Create the render pass that will be used to draw to the swap chain's framebuffers.2539RDD::Attachment attachment;2540attachment.format = DATA_FORMAT_R8G8B8A8_UNORM;2541attachment.samples = RDD::TEXTURE_SAMPLES_1;2542attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR;2543attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE;25442545RDD::Subpass subpass;2546RDD::AttachmentReference color_ref;2547color_ref.attachment = 0;2548color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT);2549subpass.color_references.push_back(color_ref);25502551RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, AttachmentReference());2552ERR_FAIL_COND_V(!render_pass, SwapChainID());25532554// Create the empty swap chain until it is resized.2555SwapChain *swap_chain = memnew(SwapChain);2556swap_chain->surface = p_surface;2557swap_chain->data_format = attachment.format;2558swap_chain->render_pass = render_pass;2559return SwapChainID(swap_chain);2560}25612562Error RenderingDeviceDriverD3D12::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) {2563DEV_ASSERT(p_cmd_queue.id != 0);2564DEV_ASSERT(p_swap_chain.id != 0);25652566CommandQueueInfo *command_queue = (CommandQueueInfo *)(p_cmd_queue.id);2567SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);2568RenderingContextDriverD3D12::Surface *surface = (RenderingContextDriverD3D12::Surface *)(swap_chain->surface);2569if (surface->width == 0 || surface->height == 0) {2570// Very likely the window is minimized, don't create a swap chain.2571return ERR_SKIP;2572}25732574HRESULT res;2575const bool is_tearing_supported = context_driver->get_tearing_supported();2576UINT sync_interval = 0;2577UINT present_flags = 0;2578UINT creation_flags = 0;2579switch (surface->vsync_mode) {2580case DisplayServer::VSYNC_MAILBOX: {2581sync_interval = 1;2582present_flags = DXGI_PRESENT_RESTART;2583} break;2584case DisplayServer::VSYNC_ENABLED: {2585sync_interval = 1;2586present_flags = 0;2587} break;2588case DisplayServer::VSYNC_DISABLED: {2589sync_interval = 0;2590present_flags = is_tearing_supported ? DXGI_PRESENT_ALLOW_TEARING : 0;2591creation_flags = is_tearing_supported ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0;2592} break;2593case DisplayServer::VSYNC_ADAPTIVE: // Unsupported.2594default:2595sync_interval = 1;2596present_flags = 0;2597break;2598}25992600if (swap_chain->d3d_swap_chain != nullptr && creation_flags != swap_chain->creation_flags) {2601// The swap chain must be recreated if the creation flags are different.2602_swap_chain_release(swap_chain);2603}26042605DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};2606if (swap_chain->d3d_swap_chain != nullptr) {2607_swap_chain_release_buffers(swap_chain);2608res = swap_chain->d3d_swap_chain->ResizeBuffers(p_desired_framebuffer_count, surface->width, surface->height, DXGI_FORMAT_UNKNOWN, creation_flags);2609ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_UNAVAILABLE);2610} else {2611swap_chain_desc.BufferCount = p_desired_framebuffer_count;2612swap_chain_desc.Format = RD_TO_D3D12_FORMAT[swap_chain->data_format].general_format;2613swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;2614swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;2615swap_chain_desc.SampleDesc.Count = 1;2616swap_chain_desc.Flags = creation_flags;2617swap_chain_desc.Scaling = DXGI_SCALING_STRETCH;2618if (OS::get_singleton()->is_layered_allowed()) {2619swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;2620has_comp_alpha[(uint64_t)p_cmd_queue.id] = true;2621} else {2622swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;2623has_comp_alpha[(uint64_t)p_cmd_queue.id] = false;2624}2625swap_chain_desc.Width = surface->width;2626swap_chain_desc.Height = surface->height;26272628ComPtr<IDXGISwapChain1> swap_chain_1;2629#ifdef DCOMP_ENABLED2630res = context_driver->dxgi_factory_get()->CreateSwapChainForComposition(command_queue->d3d_queue.Get(), &swap_chain_desc, nullptr, swap_chain_1.GetAddressOf());2631#else2632res = context_driver->dxgi_factory_get()->CreateSwapChainForHwnd(command_queue->d3d_queue.Get(), surface->hwnd, &swap_chain_desc, nullptr, nullptr, swap_chain_1.GetAddressOf());2633if (!SUCCEEDED(res) && swap_chain_desc.AlphaMode != DXGI_ALPHA_MODE_IGNORE) {2634swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;2635has_comp_alpha[(uint64_t)p_cmd_queue.id] = false;2636res = context_driver->dxgi_factory_get()->CreateSwapChainForHwnd(command_queue->d3d_queue.Get(), surface->hwnd, &swap_chain_desc, nullptr, nullptr, swap_chain_1.GetAddressOf());2637}2638#endif2639ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26402641swap_chain_1.As(&swap_chain->d3d_swap_chain);2642ERR_FAIL_NULL_V(swap_chain->d3d_swap_chain, ERR_CANT_CREATE);26432644res = context_driver->dxgi_factory_get()->MakeWindowAssociation(surface->hwnd, DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_WINDOW_CHANGES);2645ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);2646}26472648#ifdef DCOMP_ENABLED2649if (surface->composition_device.Get() == nullptr) {2650using PFN_DCompositionCreateDevice = HRESULT(WINAPI *)(IDXGIDevice *, REFIID, void **);2651PFN_DCompositionCreateDevice pfn_DCompositionCreateDevice = (PFN_DCompositionCreateDevice)(void *)GetProcAddress(context_driver->lib_dcomp, "DCompositionCreateDevice");2652ERR_FAIL_NULL_V(pfn_DCompositionCreateDevice, ERR_CANT_CREATE);26532654res = pfn_DCompositionCreateDevice(nullptr, IID_PPV_ARGS(surface->composition_device.GetAddressOf()));2655ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26562657res = surface->composition_device->CreateTargetForHwnd(surface->hwnd, TRUE, surface->composition_target.GetAddressOf());2658ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26592660res = surface->composition_device->CreateVisual(surface->composition_visual.GetAddressOf());2661ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26622663res = surface->composition_visual->SetContent(swap_chain->d3d_swap_chain.Get());2664ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26652666res = surface->composition_target->SetRoot(surface->composition_visual.Get());2667ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26682669res = surface->composition_device->Commit();2670ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);2671} else {2672res = surface->composition_visual->SetContent(swap_chain->d3d_swap_chain.Get());2673ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);26742675res = surface->composition_device->Commit();2676ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);2677}2678#endif26792680res = swap_chain->d3d_swap_chain->GetDesc1(&swap_chain_desc);2681ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);2682ERR_FAIL_COND_V(swap_chain_desc.BufferCount == 0, ERR_CANT_CREATE);26832684surface->width = swap_chain_desc.Width;2685surface->height = swap_chain_desc.Height;26862687swap_chain->creation_flags = creation_flags;2688swap_chain->sync_interval = sync_interval;2689swap_chain->present_flags = present_flags;26902691// Retrieve the render targets associated to the swap chain and recreate the framebuffers. The following code2692// relies on the address of the elements remaining static when new elements are inserted, so the container must2693// follow this restriction when reserving the right amount of elements beforehand.2694swap_chain->render_targets.reserve(swap_chain_desc.BufferCount);2695swap_chain->render_targets_info.reserve(swap_chain_desc.BufferCount);2696swap_chain->framebuffers.reserve(swap_chain_desc.BufferCount);26972698for (uint32_t i = 0; i < swap_chain_desc.BufferCount; i++) {2699// Retrieve the resource corresponding to the swap chain's buffer.2700ID3D12Resource *render_target = nullptr;2701res = swap_chain->d3d_swap_chain->GetBuffer(i, IID_PPV_ARGS(&render_target));2702ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);2703swap_chain->render_targets.push_back(render_target);27042705// Create texture information for the framebuffer to reference the resource. Since the states pointer must2706// reference an address of the element itself, we must insert it first and then modify it.2707swap_chain->render_targets_info.push_back(TextureInfo());2708TextureInfo &texture_info = swap_chain->render_targets_info[i];2709texture_info.owner_info.states.subresource_states.push_back(D3D12_RESOURCE_STATE_PRESENT);2710texture_info.states_ptr = &texture_info.owner_info.states;2711texture_info.format = swap_chain->data_format;2712#if defined(_MSC_VER) || !defined(_WIN32)2713texture_info.desc = CD3DX12_RESOURCE_DESC(render_target->GetDesc());2714#else2715render_target->GetDesc(&texture_info.desc);2716#endif2717texture_info.layers = 1;2718texture_info.mipmaps = 1;2719texture_info.resource = render_target;2720texture_info.view_descs.srv.Format = texture_info.desc.Format;2721texture_info.view_descs.srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;27222723// Create the framebuffer for this buffer.2724FramebufferID framebuffer = _framebuffer_create(swap_chain->render_pass, TextureID(&swap_chain->render_targets_info[i]), swap_chain_desc.Width, swap_chain_desc.Height, true);2725ERR_FAIL_COND_V(!framebuffer, ERR_CANT_CREATE);2726swap_chain->framebuffers.push_back(framebuffer);2727}27282729// Once everything's been created correctly, indicate the surface no longer needs to be resized.2730context_driver->surface_set_needs_resize(swap_chain->surface, false);27312732return OK;2733}27342735RDD::FramebufferID RenderingDeviceDriverD3D12::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) {2736DEV_ASSERT(p_swap_chain.id != 0);27372738const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);2739if (context_driver->surface_get_needs_resize(swap_chain->surface)) {2740r_resize_required = true;2741return FramebufferID();2742}27432744const uint32_t buffer_index = swap_chain->d3d_swap_chain->GetCurrentBackBufferIndex();2745DEV_ASSERT(buffer_index < swap_chain->framebuffers.size());2746return swap_chain->framebuffers[buffer_index];2747}27482749RDD::RenderPassID RenderingDeviceDriverD3D12::swap_chain_get_render_pass(SwapChainID p_swap_chain) {2750const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);2751return swap_chain->render_pass;2752}27532754RDD::DataFormat RenderingDeviceDriverD3D12::swap_chain_get_format(SwapChainID p_swap_chain) {2755const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);2756return swap_chain->data_format;2757}27582759void RenderingDeviceDriverD3D12::swap_chain_free(SwapChainID p_swap_chain) {2760SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);2761_swap_chain_release(swap_chain);2762render_pass_free(swap_chain->render_pass);2763memdelete(swap_chain);2764}27652766/*********************/2767/**** FRAMEBUFFER ****/2768/*********************/27692770D3D12_RENDER_TARGET_VIEW_DESC RenderingDeviceDriverD3D12::_make_rtv_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases) {2771D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {};2772rtv_desc.Format = p_texture_info->view_descs.srv.Format;27732774switch (p_texture_info->view_descs.srv.ViewDimension) {2775case D3D12_SRV_DIMENSION_TEXTURE1D: {2776rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D;2777rtv_desc.Texture1D.MipSlice = p_texture_info->base_mip + p_mipmap_offset;2778} break;2779case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: {2780rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY;2781rtv_desc.Texture1DArray.MipSlice = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset;2782rtv_desc.Texture1DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset;2783rtv_desc.Texture1DArray.ArraySize = p_layers == UINT32_MAX ? p_texture_info->view_descs.srv.Texture1DArray.ArraySize : p_layers;2784} break;2785case D3D12_SRV_DIMENSION_TEXTURE2D: {2786rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;2787rtv_desc.Texture2D.MipSlice = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset;2788rtv_desc.Texture2D.PlaneSlice = p_texture_info->view_descs.srv.Texture2D.PlaneSlice;2789} break;2790case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: {2791rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;2792rtv_desc.Texture2DArray.MipSlice = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset;2793rtv_desc.Texture2DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset;2794rtv_desc.Texture2DArray.ArraySize = p_layers == UINT32_MAX ? p_texture_info->view_descs.srv.Texture2DArray.ArraySize : p_layers;2795rtv_desc.Texture2DArray.PlaneSlice = p_texture_info->view_descs.srv.Texture2DArray.PlaneSlice;2796} break;2797case D3D12_SRV_DIMENSION_TEXTURE2DMS: {2798rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;2799} break;2800case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: {2801rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;2802rtv_desc.Texture2DMSArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset;2803rtv_desc.Texture2DMSArray.ArraySize = p_layers == UINT32_MAX ? p_texture_info->view_descs.srv.Texture2DMSArray.ArraySize : p_layers;2804} break;2805case D3D12_SRV_DIMENSION_TEXTURE3D: {2806rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;2807rtv_desc.Texture3D.MipSlice = p_texture_info->view_descs.srv.Texture3D.MostDetailedMip + p_mipmap_offset;2808rtv_desc.Texture3D.FirstWSlice = 0;2809rtv_desc.Texture3D.WSize = -1;2810} break;2811case D3D12_SRV_DIMENSION_TEXTURECUBE:2812case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: {2813rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;2814rtv_desc.Texture2DArray.MipSlice = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset;2815rtv_desc.Texture2DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset;2816rtv_desc.Texture2DArray.ArraySize = p_layers == UINT32_MAX ? p_texture_info->layers : p_layers;2817rtv_desc.Texture2DArray.PlaneSlice = 0;2818} break;2819default: {2820DEV_ASSERT(false);2821}2822}28232824return rtv_desc;2825}28262827D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_for_texture(const TextureInfo *p_texture_info, uint32_t p_mipmap_offset, uint32_t p_layer_offset, uint32_t p_layers, bool p_add_bases) {2828D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = p_texture_info->view_descs.uav;28292830uint32_t mip = (p_add_bases ? p_texture_info->base_mip : 0) + p_mipmap_offset;2831switch (p_texture_info->view_descs.uav.ViewDimension) {2832case D3D12_UAV_DIMENSION_TEXTURE1D: {2833uav_desc.Texture1DArray.MipSlice = mip;2834} break;2835case D3D12_UAV_DIMENSION_TEXTURE1DARRAY: {2836uav_desc.Texture1DArray.MipSlice = mip;2837uav_desc.Texture1DArray.FirstArraySlice = mip;2838uav_desc.Texture1DArray.ArraySize = p_layers;2839} break;2840case D3D12_UAV_DIMENSION_TEXTURE2D: {2841uav_desc.Texture2D.MipSlice = mip;2842} break;2843case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: {2844uav_desc.Texture2DArray.MipSlice = mip;2845uav_desc.Texture2DArray.FirstArraySlice = (p_add_bases ? p_texture_info->base_layer : 0) + p_layer_offset;2846uav_desc.Texture2DArray.ArraySize = p_layers;2847} break;2848case D3D12_UAV_DIMENSION_TEXTURE3D: {2849uav_desc.Texture3D.MipSlice = mip;2850uav_desc.Texture3D.WSize >>= p_mipmap_offset;2851} break;2852default:2853break;2854}28552856return uav_desc;2857}28582859D3D12_DEPTH_STENCIL_VIEW_DESC RenderingDeviceDriverD3D12::_make_dsv_for_texture(const TextureInfo *p_texture_info) {2860D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};2861dsv_desc.Format = RD_TO_D3D12_FORMAT[p_texture_info->format].dsv_format;2862dsv_desc.Flags = D3D12_DSV_FLAG_NONE;28632864switch (p_texture_info->view_descs.srv.ViewDimension) {2865case D3D12_SRV_DIMENSION_TEXTURE1D: {2866dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D;2867dsv_desc.Texture1D.MipSlice = p_texture_info->base_mip;2868} break;2869case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: {2870dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY;2871dsv_desc.Texture1DArray.MipSlice = p_texture_info->base_mip;2872dsv_desc.Texture1DArray.FirstArraySlice = p_texture_info->base_layer;2873dsv_desc.Texture1DArray.ArraySize = p_texture_info->view_descs.srv.Texture1DArray.ArraySize;2874} break;2875case D3D12_SRV_DIMENSION_TEXTURE2D: {2876dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;2877dsv_desc.Texture2D.MipSlice = p_texture_info->view_descs.srv.Texture2D.MostDetailedMip;2878} break;2879case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: {2880dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;2881dsv_desc.Texture2DArray.MipSlice = p_texture_info->base_mip;2882dsv_desc.Texture2DArray.FirstArraySlice = p_texture_info->base_layer;2883dsv_desc.Texture2DArray.ArraySize = p_texture_info->view_descs.srv.Texture2DArray.ArraySize;2884} break;2885case D3D12_SRV_DIMENSION_TEXTURE2DMS: {2886dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS;2887dsv_desc.Texture2DMS.UnusedField_NothingToDefine = p_texture_info->view_descs.srv.Texture2DMS.UnusedField_NothingToDefine;2888} break;2889case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: {2890dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;2891dsv_desc.Texture2DMSArray.FirstArraySlice = p_texture_info->base_layer;2892dsv_desc.Texture2DMSArray.ArraySize = p_texture_info->view_descs.srv.Texture2DMSArray.ArraySize;2893} break;2894default: {2895DEV_ASSERT(false);2896}2897}28982899return dsv_desc;2900}29012902RDD::FramebufferID RenderingDeviceDriverD3D12::_framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height, bool p_is_screen) {2903// Pre-bookkeep.2904FramebufferInfo *fb_info = VersatileResource::allocate<FramebufferInfo>(resources_allocator);2905fb_info->is_screen = p_is_screen;29062907const RenderPassInfo *pass_info = (const RenderPassInfo *)p_render_pass.id;29082909uint32_t num_color = 0;2910uint32_t num_depth_stencil = 0;2911for (uint32_t i = 0; i < p_attachments.size(); i++) {2912const TextureInfo *tex_info = (const TextureInfo *)p_attachments[i].id;2913if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {2914num_color++;2915} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {2916num_depth_stencil++;2917}2918}29192920uint32_t vrs_index = UINT32_MAX;2921for (const Subpass &E : pass_info->subpasses) {2922if (E.fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) {2923vrs_index = E.fragment_shading_rate_reference.attachment;2924}2925}29262927if (num_color) {2928Error err = fb_info->rtv_heap.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, num_color, false);2929if (err) {2930VersatileResource::free(resources_allocator, fb_info);2931ERR_FAIL_V(FramebufferID());2932}2933}2934DescriptorsHeap::Walker rtv_heap_walker = fb_info->rtv_heap.make_walker();29352936if (num_depth_stencil) {2937Error err = fb_info->dsv_heap.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, num_depth_stencil, false);2938if (err) {2939VersatileResource::free(resources_allocator, fb_info);2940ERR_FAIL_V(FramebufferID());2941}2942}2943DescriptorsHeap::Walker dsv_heap_walker = fb_info->dsv_heap.make_walker();29442945fb_info->attachments_handle_inds.resize(p_attachments.size());2946fb_info->attachments.reserve(num_color + num_depth_stencil);29472948uint32_t color_idx = 0;2949uint32_t depth_stencil_idx = 0;2950for (uint32_t i = 0; i < p_attachments.size(); i++) {2951const TextureInfo *tex_info = (const TextureInfo *)p_attachments[i].id;29522953if (fb_info->size.x == 0) {2954fb_info->size = Size2i(tex_info->desc.Width, tex_info->desc.Height);2955}29562957if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {2958D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, 0, 0, UINT32_MAX);2959device->CreateRenderTargetView(tex_info->resource, &rtv_desc, rtv_heap_walker.get_curr_cpu_handle());2960rtv_heap_walker.advance();29612962fb_info->attachments_handle_inds[i] = color_idx;2963fb_info->attachments.push_back(p_attachments[i]);2964color_idx++;2965} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {2966D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = _make_dsv_for_texture(tex_info);2967device->CreateDepthStencilView(tex_info->resource, &dsv_desc, dsv_heap_walker.get_curr_cpu_handle());2968dsv_heap_walker.advance();29692970fb_info->attachments_handle_inds[i] = depth_stencil_idx;2971fb_info->attachments.push_back(p_attachments[i]);2972depth_stencil_idx++;2973} else if (i == vrs_index) {2974fb_info->vrs_attachment = p_attachments[i];2975} else {2976DEV_ASSERT(false);2977}2978}29792980DEV_ASSERT(fb_info->attachments.size() == color_idx + depth_stencil_idx);2981DEV_ASSERT((fb_info->vrs_attachment.id != 0) == (vrs_index != UINT32_MAX));29822983DEV_ASSERT(rtv_heap_walker.is_at_eof());2984DEV_ASSERT(dsv_heap_walker.is_at_eof());29852986return FramebufferID(fb_info);2987}29882989RDD::FramebufferID RenderingDeviceDriverD3D12::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) {2990return _framebuffer_create(p_render_pass, p_attachments, p_width, p_height, false);2991}29922993void RenderingDeviceDriverD3D12::framebuffer_free(FramebufferID p_framebuffer) {2994FramebufferInfo *fb_info = (FramebufferInfo *)p_framebuffer.id;2995VersatileResource::free(resources_allocator, fb_info);2996}29972998/****************/2999/**** SHADER ****/3000/****************/30013002bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants(3003const ShaderInfo *p_shader_info,3004VectorView<PipelineSpecializationConstant> p_specialization_constants,3005HashMap<ShaderStage, Vector<uint8_t>> &r_final_stages_bytecode) {3006// If something needs to be patched, COW will do the trick.3007r_final_stages_bytecode = p_shader_info->stages_bytecode;3008uint32_t stages_re_sign_mask = 0;3009for (uint32_t i = 0; i < p_specialization_constants.size(); i++) {3010const PipelineSpecializationConstant &psc = p_specialization_constants[i];3011if (!(p_shader_info->spirv_specialization_constants_ids_mask & (1 << psc.constant_id))) {3012// This SC wasn't even in the original SPIR-V shader.3013continue;3014}3015for (const ShaderInfo::SpecializationConstant &sc : p_shader_info->specialization_constants) {3016if (psc.constant_id == sc.constant_id) {3017if (psc.int_value != sc.int_value) {3018stages_re_sign_mask |= RenderingDXIL::patch_specialization_constant(psc.type, &psc.int_value, sc.stages_bit_offsets, r_final_stages_bytecode, false);3019}3020break;3021}3022}3023}3024// Re-sign patched stages.3025for (KeyValue<ShaderStage, Vector<uint8_t>> &E : r_final_stages_bytecode) {3026ShaderStage stage = E.key;3027if ((stages_re_sign_mask & (1 << stage))) {3028Vector<uint8_t> &bytecode = E.value;3029RenderingDXIL::sign_bytecode(stage, bytecode);3030}3031}30323033return true;3034}30353036RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {3037ShaderReflection shader_refl = p_shader_container->get_shader_reflection();3038ShaderInfo shader_info_in;3039const RenderingShaderContainerD3D12 *shader_container_d3d12 = Object::cast_to<RenderingShaderContainerD3D12>(p_shader_container.ptr());3040ERR_FAIL_NULL_V_MSG(shader_container_d3d12, ShaderID(), "Shader container is not a recognized format.");30413042RenderingShaderContainerD3D12::ShaderReflectionD3D12 shader_refl_d3d12 = shader_container_d3d12->get_shader_reflection_d3d12();3043if (shader_refl_d3d12.dxil_push_constant_stages != 0) {3044shader_info_in.dxil_push_constant_size = shader_refl.push_constant_size;3045}30463047shader_info_in.spirv_specialization_constants_ids_mask = shader_refl_d3d12.spirv_specialization_constants_ids_mask;3048shader_info_in.nir_runtime_data_root_param_idx = shader_refl_d3d12.nir_runtime_data_root_param_idx;3049shader_info_in.is_compute = shader_refl.is_compute;30503051shader_info_in.sets.resize(shader_refl.uniform_sets.size());3052for (uint32_t i = 0; i < shader_info_in.sets.size(); i++) {3053shader_info_in.sets[i].bindings.resize(shader_refl.uniform_sets[i].size());3054for (uint32_t j = 0; j < shader_info_in.sets[i].bindings.size(); j++) {3055const ShaderUniform &uniform = shader_refl.uniform_sets[i][j];3056const RenderingShaderContainerD3D12::ReflectionBindingDataD3D12 &uniform_d3d12 = shader_refl_d3d12.reflection_binding_set_uniforms_d3d12[i][j];3057ShaderInfo::UniformBindingInfo &binding = shader_info_in.sets[i].bindings[j];3058binding.stages = uniform_d3d12.dxil_stages;3059binding.res_class = (ResourceClass)(uniform_d3d12.resource_class);3060binding.type = UniformType(uniform.type);3061binding.length = uniform.length;3062#ifdef DEV_ENABLED3063binding.writable = uniform.writable;3064#endif30653066static_assert(sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations) == sizeof(RenderingShaderContainerD3D12::ReflectionBindingDataD3D12::root_signature_locations));3067memcpy((void *)&binding.root_sig_locations, (void *)&uniform_d3d12.root_signature_locations, sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations));30683069if (binding.root_sig_locations.resource.root_param_idx != UINT32_MAX) {3070shader_info_in.sets[i].num_root_params.resources++;3071}3072if (binding.root_sig_locations.sampler.root_param_idx != UINT32_MAX) {3073shader_info_in.sets[i].num_root_params.samplers++;3074}3075}3076}30773078shader_info_in.specialization_constants.resize(shader_refl.specialization_constants.size());3079for (uint32_t i = 0; i < shader_info_in.specialization_constants.size(); i++) {3080ShaderInfo::SpecializationConstant &sc = shader_info_in.specialization_constants[i];3081const ShaderSpecializationConstant &src_sc = shader_refl.specialization_constants[i];3082const RenderingShaderContainerD3D12::ReflectionSpecializationDataD3D12 &src_sc_d3d12 = shader_refl_d3d12.reflection_specialization_data_d3d12[i];3083sc.constant_id = src_sc.constant_id;3084sc.int_value = src_sc.int_value;3085memcpy(sc.stages_bit_offsets, src_sc_d3d12.stages_bit_offsets, sizeof(sc.stages_bit_offsets));3086}30873088Vector<uint8_t> decompressed_code;3089for (uint32_t i = 0; i < shader_refl.stages_vector.size(); i++) {3090const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i];3091bool requires_decompression = (shader.code_decompressed_size > 0);3092if (requires_decompression) {3093decompressed_code.resize(shader.code_decompressed_size);3094bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size());3095ERR_FAIL_COND_V_MSG(!decompressed, ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])));3096}30973098if (requires_decompression) {3099shader_info_in.stages_bytecode[shader.shader_stage] = decompressed_code;3100} else {3101shader_info_in.stages_bytecode[shader.shader_stage] = shader.code_compressed_bytes;3102}3103}31043105PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER d3d_D3D12CreateRootSignatureDeserializer = (PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateRootSignatureDeserializer");3106ERR_FAIL_NULL_V(d3d_D3D12CreateRootSignatureDeserializer, ShaderID());31073108HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf()));3109ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "D3D12CreateRootSignatureDeserializer failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");31103111ComPtr<ID3D12RootSignature> root_signature;3112res = device->CreateRootSignature(0, shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature.GetAddressOf()));3113ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "CreateRootSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");31143115shader_info_in.root_signature_desc = shader_info_in.root_signature_deserializer->GetRootSignatureDesc();3116shader_info_in.root_signature_crc = shader_refl_d3d12.root_signature_crc;31173118// Bookkeep.3119ShaderInfo *shader_info_ptr = VersatileResource::allocate<ShaderInfo>(resources_allocator);3120*shader_info_ptr = shader_info_in;3121return ShaderID(shader_info_ptr);3122}31233124uint32_t RenderingDeviceDriverD3D12::shader_get_layout_hash(ShaderID p_shader) {3125const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;3126return shader_info_in->root_signature_crc;3127}31283129void RenderingDeviceDriverD3D12::shader_free(ShaderID p_shader) {3130ShaderInfo *shader_info_in = (ShaderInfo *)p_shader.id;3131VersatileResource::free(resources_allocator, shader_info_in);3132}31333134void RenderingDeviceDriverD3D12::shader_destroy_modules(ShaderID p_shader) {3135ShaderInfo *shader_info_in = (ShaderInfo *)p_shader.id;3136shader_info_in->stages_bytecode.clear();3137}31383139/*********************/3140/**** UNIFORM SET ****/3141/*********************/31423143static void _add_descriptor_count_for_uniform(RenderingDevice::UniformType p_type, uint32_t p_binding_length, bool p_double_srv_uav_ambiguous, uint32_t &r_num_resources, uint32_t &r_num_samplers, bool &r_srv_uav_ambiguity) {3144r_srv_uav_ambiguity = false;31453146// Some resource types can be SRV or UAV, depending on what NIR-DXIL decided for a specific shader variant.3147// The goal is to generate both SRV and UAV for the descriptor sets' heaps and copy only the relevant one3148// to the frame descriptor heap at binding time.3149// [[SRV_UAV_AMBIGUITY]]31503151switch (p_type) {3152case RenderingDevice::UNIFORM_TYPE_SAMPLER: {3153r_num_samplers += p_binding_length;3154} break;3155case RenderingDevice::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE:3156case RenderingDevice::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {3157r_num_resources += p_binding_length;3158r_num_samplers += p_binding_length;3159} break;3160case RenderingDevice::UNIFORM_TYPE_UNIFORM_BUFFER: {3161r_num_resources += 1;3162} break;3163case RenderingDevice::UNIFORM_TYPE_STORAGE_BUFFER: {3164r_num_resources += p_double_srv_uav_ambiguous ? 2 : 1;3165r_srv_uav_ambiguity = true;3166} break;3167case RenderingDevice::UNIFORM_TYPE_IMAGE: {3168r_num_resources += p_binding_length * (p_double_srv_uav_ambiguous ? 2 : 1);3169r_srv_uav_ambiguity = true;3170} break;3171default: {3172r_num_resources += p_binding_length;3173}3174}3175}31763177RDD::UniformSetID RenderingDeviceDriverD3D12::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {3178//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.31793180// Pre-bookkeep.3181UniformSetInfo *uniform_set_info = VersatileResource::allocate<UniformSetInfo>(resources_allocator);31823183// Do a first pass to count resources and samplers.3184uint32_t num_resource_descs = 0;3185uint32_t num_sampler_descs = 0;3186for (uint32_t i = 0; i < p_uniforms.size(); i++) {3187const BoundUniform &uniform = p_uniforms[i];31883189// Since the uniform set may be created for a shader different than the one that will be actually bound,3190// which may have a different set of uniforms optimized out, the stages mask we can check now is not reliable.3191// Therefore, we can't make any assumptions here about descriptors that we may not need to create,3192// pixel or vertex-only shader resource states, etc.31933194bool srv_uav_ambiguity = false;3195uint32_t binding_length = uniform.ids.size();3196if (uniform.type == UNIFORM_TYPE_SAMPLER_WITH_TEXTURE || uniform.type == UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER) {3197binding_length /= 2;3198}3199_add_descriptor_count_for_uniform(uniform.type, binding_length, true, num_resource_descs, num_sampler_descs, srv_uav_ambiguity);3200}3201#ifdef DEV_ENABLED3202uniform_set_info->resources_desc_info.reserve(num_resource_descs);3203#endif32043205if (num_resource_descs) {3206Error err = uniform_set_info->desc_heaps.resources.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, num_resource_descs, false);3207if (err) {3208VersatileResource::free(resources_allocator, uniform_set_info);3209ERR_FAIL_V(UniformSetID());3210}3211}3212if (num_sampler_descs) {3213Error err = uniform_set_info->desc_heaps.samplers.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_sampler_descs, false);3214if (err) {3215VersatileResource::free(resources_allocator, uniform_set_info);3216ERR_FAIL_V(UniformSetID());3217}3218}3219struct {3220DescriptorsHeap::Walker resources;3221DescriptorsHeap::Walker samplers;3222} desc_heap_walkers;3223desc_heap_walkers.resources = uniform_set_info->desc_heaps.resources.make_walker();3224desc_heap_walkers.samplers = uniform_set_info->desc_heaps.samplers.make_walker();32253226struct NeededState {3227bool is_buffer = false;3228uint64_t shader_uniform_idx_mask = 0;3229D3D12_RESOURCE_STATES states = {};3230};3231HashMap<ResourceInfo *, NeededState> resource_states;32323233for (uint32_t i = 0; i < p_uniforms.size(); i++) {3234const BoundUniform &uniform = p_uniforms[i];32353236#ifdef DEV_ENABLED3237const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;3238const ShaderInfo::UniformBindingInfo &shader_uniform = shader_info_in->sets[p_set_index].bindings[i];3239bool is_compute = shader_info_in->stages_bytecode.has(SHADER_STAGE_COMPUTE);3240DEV_ASSERT(!(is_compute && (shader_uniform.stages & (SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT))));3241DEV_ASSERT(!(!is_compute && (shader_uniform.stages & SHADER_STAGE_COMPUTE_BIT)));3242#endif32433244switch (uniform.type) {3245case UNIFORM_TYPE_SAMPLER: {3246for (uint32_t j = 0; j < uniform.ids.size(); j++) {3247const D3D12_SAMPLER_DESC &sampler_desc = samplers[uniform.ids[j].id];3248device->CreateSampler(&sampler_desc, desc_heap_walkers.samplers.get_curr_cpu_handle());3249desc_heap_walkers.samplers.advance();3250}3251} break;3252case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {3253for (uint32_t j = 0; j < uniform.ids.size(); j += 2) {3254const D3D12_SAMPLER_DESC &sampler_desc = samplers[uniform.ids[j].id];3255TextureInfo *texture_info = (TextureInfo *)uniform.ids[j + 1].id;32563257device->CreateSampler(&sampler_desc, desc_heap_walkers.samplers.get_curr_cpu_handle());3258desc_heap_walkers.samplers.advance();3259device->CreateShaderResourceView(texture_info->resource, &texture_info->view_descs.srv, desc_heap_walkers.resources.get_curr_cpu_handle());3260#ifdef DEV_ENABLED3261uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_info->view_descs.srv.ViewDimension });3262#endif3263desc_heap_walkers.resources.advance();32643265NeededState &ns = resource_states[texture_info];3266ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3267ns.states |= D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;3268}3269} break;3270case UNIFORM_TYPE_TEXTURE: {3271for (uint32_t j = 0; j < uniform.ids.size(); j++) {3272TextureInfo *texture_info = (TextureInfo *)uniform.ids[j].id;3273device->CreateShaderResourceView(texture_info->resource, &texture_info->view_descs.srv, desc_heap_walkers.resources.get_curr_cpu_handle());3274#ifdef DEV_ENABLED3275uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_info->view_descs.srv.ViewDimension });3276#endif3277desc_heap_walkers.resources.advance();32783279NeededState &ns = resource_states[texture_info];3280ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3281ns.states |= D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;3282}3283} break;3284case UNIFORM_TYPE_IMAGE: {3285for (uint32_t j = 0; j < uniform.ids.size(); j++) {3286TextureInfo *texture_info = (TextureInfo *)uniform.ids[j].id;32873288NeededState &ns = resource_states[texture_info];3289ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3290ns.states |= (D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_UNORDERED_ACCESS);3291}32923293// SRVs first. [[SRV_UAV_AMBIGUITY]]3294for (uint32_t j = 0; j < uniform.ids.size(); j++) {3295TextureInfo *texture_info = (TextureInfo *)uniform.ids[j].id;32963297device->CreateShaderResourceView(texture_info->resource, &texture_info->view_descs.srv, desc_heap_walkers.resources.get_curr_cpu_handle());3298#ifdef DEV_ENABLED3299uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_info->view_descs.srv.ViewDimension });3300#endif3301desc_heap_walkers.resources.advance();3302}33033304// UAVs then. [[SRV_UAV_AMBIGUITY]]3305for (uint32_t j = 0; j < uniform.ids.size(); j++) {3306TextureInfo *texture_info = (TextureInfo *)uniform.ids[j].id;33073308device->CreateUnorderedAccessView(texture_info->resource, nullptr, &texture_info->view_descs.uav, desc_heap_walkers.resources.get_curr_cpu_handle());3309#ifdef DEV_ENABLED3310uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_UAV, {} });3311#endif3312desc_heap_walkers.resources.advance();3313}3314} break;3315case UNIFORM_TYPE_TEXTURE_BUFFER:3316case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {3317CRASH_NOW_MSG("Unimplemented!");3318} break;3319case UNIFORM_TYPE_IMAGE_BUFFER: {3320CRASH_NOW_MSG("Unimplemented!");3321} break;3322case UNIFORM_TYPE_UNIFORM_BUFFER: {3323BufferInfo *buf_info = (BufferInfo *)uniform.ids[0].id;33243325D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};3326cbv_desc.BufferLocation = buf_info->resource->GetGPUVirtualAddress();3327cbv_desc.SizeInBytes = STEPIFY(buf_info->size, 256);3328device->CreateConstantBufferView(&cbv_desc, desc_heap_walkers.resources.get_curr_cpu_handle());3329desc_heap_walkers.resources.advance();3330#ifdef DEV_ENABLED3331uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_CBV, {} });3332#endif33333334NeededState &ns = resource_states[buf_info];3335ns.is_buffer = true;3336ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3337ns.states |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;3338} break;3339case UNIFORM_TYPE_STORAGE_BUFFER: {3340BufferInfo *buf_info = (BufferInfo *)uniform.ids[0].id;33413342// SRV first. [[SRV_UAV_AMBIGUITY]]3343{3344D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};3345srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;3346srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;3347srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;3348srv_desc.Buffer.FirstElement = 0;3349srv_desc.Buffer.NumElements = (buf_info->size + 3) / 4;3350srv_desc.Buffer.StructureByteStride = 0;3351srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;3352device->CreateShaderResourceView(buf_info->resource, &srv_desc, desc_heap_walkers.resources.get_curr_cpu_handle());3353#ifdef DEV_ENABLED3354uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, srv_desc.ViewDimension });3355#endif3356desc_heap_walkers.resources.advance();3357}33583359// UAV then. [[SRV_UAV_AMBIGUITY]]3360{3361if (buf_info->flags.usable_as_uav) {3362D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};3363uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;3364uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;3365uav_desc.Buffer.FirstElement = 0;3366uav_desc.Buffer.NumElements = (buf_info->size + 3) / 4;3367uav_desc.Buffer.StructureByteStride = 0;3368uav_desc.Buffer.CounterOffsetInBytes = 0;3369uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;3370device->CreateUnorderedAccessView(buf_info->resource, nullptr, &uav_desc, desc_heap_walkers.resources.get_curr_cpu_handle());3371#ifdef DEV_ENABLED3372uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_UAV, {} });3373#endif3374} else {3375// If can't transition to UAV, leave this one empty since it won't be3376// used, and trying to create an UAV view would trigger a validation error.3377}33783379desc_heap_walkers.resources.advance();3380}33813382NeededState &ns = resource_states[buf_info];3383ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3384ns.is_buffer = true;3385ns.states |= (D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_UNORDERED_ACCESS);3386} break;3387case UNIFORM_TYPE_INPUT_ATTACHMENT: {3388for (uint32_t j = 0; j < uniform.ids.size(); j++) {3389TextureInfo *texture_info = (TextureInfo *)uniform.ids[j].id;33903391device->CreateShaderResourceView(texture_info->resource, &texture_info->view_descs.srv, desc_heap_walkers.resources.get_curr_cpu_handle());3392#ifdef DEV_ENABLED3393uniform_set_info->resources_desc_info.push_back({ D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_info->view_descs.srv.ViewDimension });3394#endif3395desc_heap_walkers.resources.advance();33963397NeededState &ns = resource_states[texture_info];3398ns.shader_uniform_idx_mask |= ((uint64_t)1 << i);3399ns.states |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;3400}3401} break;3402default: {3403DEV_ASSERT(false);3404}3405}3406}34073408DEV_ASSERT(desc_heap_walkers.resources.is_at_eof());3409DEV_ASSERT(desc_heap_walkers.samplers.is_at_eof());34103411{3412uniform_set_info->resource_states.reserve(resource_states.size());3413for (const KeyValue<ResourceInfo *, NeededState> &E : resource_states) {3414UniformSetInfo::StateRequirement sr;3415sr.resource = E.key;3416sr.is_buffer = E.value.is_buffer;3417sr.states = E.value.states;3418sr.shader_uniform_idx_mask = E.value.shader_uniform_idx_mask;3419uniform_set_info->resource_states.push_back(sr);3420}3421}34223423return UniformSetID(uniform_set_info);3424}34253426void RenderingDeviceDriverD3D12::uniform_set_free(UniformSetID p_uniform_set) {3427UniformSetInfo *uniform_set_info = (UniformSetInfo *)p_uniform_set.id;3428VersatileResource::free(resources_allocator, uniform_set_info);3429}34303431// ----- COMMANDS -----34323433void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {3434if (barrier_capabilities.enhanced_barriers_supported) {3435return;3436}34373438// Perform pending blackouts.3439{3440SelfList<TextureInfo> *E = textures_pending_clear.first();3441while (E) {3442TextureSubresourceRange subresources;3443subresources.layer_count = E->self()->layers;3444subresources.mipmap_count = E->self()->mipmaps;3445command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources);34463447SelfList<TextureInfo> *next = E->next();3448E->remove_from_list();3449E = next;3450}3451}34523453CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3454const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_uniform_set.id;3455const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;3456const ShaderInfo::UniformSet &shader_set = shader_info_in->sets[p_set_index];34573458for (const UniformSetInfo::StateRequirement &sr : uniform_set_info->resource_states) {3459#ifdef DEV_ENABLED3460{3461uint32_t stages = 0;3462D3D12_RESOURCE_STATES wanted_state = {};3463bool writable = false;3464// Doing the full loop for debugging since the real one below may break early,3465// but we want an exhaustive check3466uint64_t inv_uniforms_mask = ~sr.shader_uniform_idx_mask; // Inverting the mask saves operations.3467for (uint8_t bit = 0; inv_uniforms_mask != UINT64_MAX; bit++) {3468uint64_t bit_mask = ((uint64_t)1 << bit);3469if (likely((inv_uniforms_mask & bit_mask))) {3470continue;3471}3472inv_uniforms_mask |= bit_mask;34733474const ShaderInfo::UniformBindingInfo &binding = shader_set.bindings[bit];3475if (unlikely(!binding.stages)) {3476continue;3477}34783479D3D12_RESOURCE_STATES required_states = sr.states;34803481// Resolve a case of SRV/UAV ambiguity now. [[SRV_UAV_AMBIGUITY]]3482if ((required_states & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE) && (required_states & D3D12_RESOURCE_STATE_UNORDERED_ACCESS)) {3483if (binding.res_class == RES_CLASS_SRV) {3484required_states &= ~D3D12_RESOURCE_STATE_UNORDERED_ACCESS;3485} else {3486required_states = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;3487}3488}34893490if (stages) { // Second occurrence at least?3491CRASH_COND_MSG(binding.writable != writable, "A resource is used in the same uniform set both as R/O and R/W. That's not supported and shouldn't happen.");3492CRASH_COND_MSG(required_states != wanted_state, "A resource is used in the same uniform set with different resource states. The code needs to be enhanced to support that.");3493} else {3494wanted_state = required_states;3495stages |= binding.stages;3496writable = binding.writable;3497}34983499DEV_ASSERT((wanted_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) == (bool)(wanted_state & D3D12_RESOURCE_STATE_UNORDERED_ACCESS));3500}3501}3502#endif35033504// We may have assumed D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE for a resource,3505// because at uniform set creation time we couldn't know for sure which stages3506// it would be used in (due to the fact that a set can be created against a different,3507// albeit compatible, shader, which may make a different usage in the end).3508// However, now we know and can exclude up to one unneeded states.35093510// TODO: If subresources involved already in the needed states, or scheduled for it,3511// maybe it's more optimal not to do anything here35123513uint32_t stages = 0;3514D3D12_RESOURCE_STATES wanted_state = {};3515uint64_t inv_uniforms_mask = ~sr.shader_uniform_idx_mask; // Inverting the mask saves operations.3516for (uint8_t bit = 0; inv_uniforms_mask != UINT64_MAX; bit++) {3517uint64_t bit_mask = ((uint64_t)1 << bit);3518if (likely((inv_uniforms_mask & bit_mask))) {3519continue;3520}3521inv_uniforms_mask |= bit_mask;35223523const ShaderInfo::UniformBindingInfo &binding = shader_set.bindings[bit];3524if (unlikely(!binding.stages)) {3525continue;3526}35273528if (!stages) {3529D3D12_RESOURCE_STATES required_states = sr.states;35303531// Resolve a case of SRV/UAV ambiguity now. [[SRV_UAV_AMBIGUITY]]3532if ((required_states & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE) && (required_states & D3D12_RESOURCE_STATE_UNORDERED_ACCESS)) {3533if (binding.res_class == RES_CLASS_SRV) {3534required_states &= ~D3D12_RESOURCE_STATE_UNORDERED_ACCESS;3535} else {3536required_states = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;3537}3538}35393540wanted_state = required_states;35413542if (!(wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) {3543// By now, we already know the resource is used, and with no PS/NON_PS disjuntive; no need to check further.3544break;3545}3546}35473548stages |= binding.stages;35493550if (stages == (SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT) || stages == SHADER_STAGE_COMPUTE_BIT) {3551// By now, we already know the resource is used, and as both PS/NON_PS; no need to check further.3552break;3553}3554}35553556if (likely(wanted_state)) {3557if ((wanted_state & D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE)) {3558if (stages == SHADER_STAGE_VERTEX_BIT || stages == SHADER_STAGE_COMPUTE_BIT) {3559D3D12_RESOURCE_STATES unneeded_states = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;3560wanted_state &= ~unneeded_states;3561} else if (stages == SHADER_STAGE_FRAGMENT_BIT) {3562D3D12_RESOURCE_STATES unneeded_states = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;3563wanted_state &= ~unneeded_states;3564}3565}35663567if (likely(wanted_state)) {3568if (sr.is_buffer) {3569_resource_transition_batch(cmd_buf_info, sr.resource, 0, 1, wanted_state);3570} else {3571TextureInfo *tex_info = (TextureInfo *)sr.resource;3572uint32_t planes = 1;3573if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {3574planes = format_get_plane_count(tex_info->format);3575}3576for (uint32_t i = 0; i < tex_info->layers; i++) {3577for (uint32_t j = 0; j < tex_info->mipmaps; j++) {3578uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize());3579_resource_transition_batch(cmd_buf_info, tex_info, subresource, planes, wanted_state);3580}3581}3582}3583}3584}3585}35863587if (p_set_index == shader_info_in->sets.size() - 1) {3588_resource_transitions_flush(cmd_buf_info);3589}3590}35913592void RenderingDeviceDriverD3D12::_command_check_descriptor_sets(CommandBufferID p_cmd_buffer) {3593DEV_ASSERT(segment_begun && "Unable to use commands that rely on descriptors because a segment was never begun.");35943595CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3596if (!cmd_buf_info->descriptor_heaps_set) {3597// Set descriptor heaps for the command buffer if they haven't been set yet.3598ID3D12DescriptorHeap *heaps[] = {3599frames[frame_idx].desc_heaps.resources.get_heap(),3600frames[frame_idx].desc_heaps.samplers.get_heap(),3601};36023603cmd_buf_info->cmd_list->SetDescriptorHeaps(2, heaps);3604cmd_buf_info->descriptor_heaps_set = true;3605}3606}36073608void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, bool p_for_compute) {3609_command_check_descriptor_sets(p_cmd_buffer);36103611UniformSetInfo *uniform_set_info = (UniformSetInfo *)p_uniform_set.id;3612const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;3613const ShaderInfo::UniformSet &shader_set = shader_info_in->sets[p_set_index];3614const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;36153616using SetRootDescriptorTableFn = void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE);3617SetRootDescriptorTableFn set_root_desc_table_fn = p_for_compute ? &ID3D12GraphicsCommandList::SetComputeRootDescriptorTable : &ID3D12GraphicsCommandList1::SetGraphicsRootDescriptorTable;36183619// If this set's descriptors have already been set for the current execution and a compatible root signature, reuse!3620uint32_t root_sig_crc = p_for_compute ? cmd_buf_info->compute_root_signature_crc : cmd_buf_info->graphics_root_signature_crc;3621UniformSetInfo::RecentBind *last_bind = nullptr;3622for (int i = 0; i < (int)ARRAY_SIZE(uniform_set_info->recent_binds); i++) {3623if (uniform_set_info->recent_binds[i].segment_serial == frames[frame_idx].segment_serial) {3624if (uniform_set_info->recent_binds[i].root_signature_crc == root_sig_crc) {3625for (const RootDescriptorTable &table : uniform_set_info->recent_binds[i].root_tables.resources) {3626(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(table.root_param_idx, table.start_gpu_handle);3627}3628for (const RootDescriptorTable &table : uniform_set_info->recent_binds[i].root_tables.samplers) {3629(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(table.root_param_idx, table.start_gpu_handle);3630}3631#ifdef DEV_ENABLED3632uniform_set_info->recent_binds[i].uses++;3633frames[frame_idx].uniform_set_reused++;3634#endif3635return;3636} else {3637if (!last_bind || uniform_set_info->recent_binds[i].uses < last_bind->uses) {3638// Prefer this one since it's been used less or we still haven't a better option.3639last_bind = &uniform_set_info->recent_binds[i];3640}3641}3642} else {3643// Prefer this one since it's unused.3644last_bind = &uniform_set_info->recent_binds[i];3645last_bind->uses = 0;3646}3647}36483649struct {3650DescriptorsHeap::Walker *resources = nullptr;3651DescriptorsHeap::Walker *samplers = nullptr;3652} frame_heap_walkers;3653frame_heap_walkers.resources = &frames[frame_idx].desc_heap_walkers.resources;3654frame_heap_walkers.samplers = &frames[frame_idx].desc_heap_walkers.samplers;36553656struct {3657DescriptorsHeap::Walker resources;3658DescriptorsHeap::Walker samplers;3659} set_heap_walkers;3660set_heap_walkers.resources = uniform_set_info->desc_heaps.resources.make_walker();3661set_heap_walkers.samplers = uniform_set_info->desc_heaps.samplers.make_walker();36623663#ifdef DEV_ENABLED3664// Whether we have stages where the uniform is actually used should match3665// whether we have any root signature locations for it.3666for (uint32_t i = 0; i < shader_set.bindings.size(); i++) {3667bool has_rs_locations = false;3668if (shader_set.bindings[i].root_sig_locations.resource.root_param_idx != UINT32_MAX ||3669shader_set.bindings[i].root_sig_locations.sampler.root_param_idx != UINT32_MAX) {3670has_rs_locations = true;3671break;3672}36733674bool has_stages = shader_set.bindings[i].stages;36753676DEV_ASSERT(has_rs_locations == has_stages);3677}3678#endif36793680last_bind->root_tables.resources.clear();3681last_bind->root_tables.samplers.clear();3682last_bind->root_tables.resources.reserve(shader_set.num_root_params.resources);3683last_bind->root_tables.samplers.reserve(shader_set.num_root_params.samplers);3684last_bind->uses++;36853686struct {3687RootDescriptorTable *resources = nullptr;3688RootDescriptorTable *samplers = nullptr;3689} tables;3690for (uint32_t i = 0; i < shader_set.bindings.size(); i++) {3691const ShaderInfo::UniformBindingInfo &binding = shader_set.bindings[i];36923693uint32_t num_resource_descs = 0;3694uint32_t num_sampler_descs = 0;3695bool srv_uav_ambiguity = false;3696_add_descriptor_count_for_uniform(binding.type, binding.length, false, num_resource_descs, num_sampler_descs, srv_uav_ambiguity);36973698bool resource_used = false;3699if (shader_set.bindings[i].stages) {3700{3701const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_resource = shader_set.bindings[i].root_sig_locations.resource;3702if (rs_loc_resource.root_param_idx != UINT32_MAX) { // Location used?3703DEV_ASSERT(num_resource_descs);3704DEV_ASSERT(!(srv_uav_ambiguity && (shader_set.bindings[i].res_class != RES_CLASS_SRV && shader_set.bindings[i].res_class != RES_CLASS_UAV))); // [[SRV_UAV_AMBIGUITY]]37053706bool must_flush_table = tables.resources && rs_loc_resource.root_param_idx != tables.resources->root_param_idx;3707if (must_flush_table) {3708// Check the root signature data has been filled ordered.3709DEV_ASSERT(rs_loc_resource.root_param_idx > tables.resources->root_param_idx);37103711(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(tables.resources->root_param_idx, tables.resources->start_gpu_handle);3712tables.resources = nullptr;3713}37143715if (unlikely(frame_heap_walkers.resources->get_free_handles() < num_resource_descs)) {3716if (!frames[frame_idx].desc_heaps_exhausted_reported.resources) {3717frames[frame_idx].desc_heaps_exhausted_reported.resources = true;3718ERR_FAIL_MSG("Cannot bind uniform set because there's not enough room in the current frame's RESOURCES descriptor heap.\n"3719"Please increase the value of the rendering/rendering_device/d3d12/max_resource_descriptors_per_frame project setting.");3720} else {3721return;3722}3723}37243725if (!tables.resources) {3726DEV_ASSERT(last_bind->root_tables.resources.size() < last_bind->root_tables.resources.get_capacity());3727last_bind->root_tables.resources.resize(last_bind->root_tables.resources.size() + 1);3728tables.resources = &last_bind->root_tables.resources[last_bind->root_tables.resources.size() - 1];3729tables.resources->root_param_idx = rs_loc_resource.root_param_idx;3730tables.resources->start_gpu_handle = frame_heap_walkers.resources->get_curr_gpu_handle();3731}37323733// If there is ambiguity and it didn't clarify as SRVs, skip them, which come first. [[SRV_UAV_AMBIGUITY]]3734if (srv_uav_ambiguity && shader_set.bindings[i].res_class != RES_CLASS_SRV) {3735set_heap_walkers.resources.advance(num_resource_descs);3736}37373738// TODO: Batch to avoid multiple calls where possible (in any case, flush before setting root descriptor tables, or even batch that as well).3739device->CopyDescriptorsSimple(3740num_resource_descs,3741frame_heap_walkers.resources->get_curr_cpu_handle(),3742set_heap_walkers.resources.get_curr_cpu_handle(),3743D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);3744frame_heap_walkers.resources->advance(num_resource_descs);37453746// If there is ambiguity and it didn't clarify as UAVs, skip them, which come later. [[SRV_UAV_AMBIGUITY]]3747if (srv_uav_ambiguity && shader_set.bindings[i].res_class != RES_CLASS_UAV) {3748set_heap_walkers.resources.advance(num_resource_descs);3749}37503751resource_used = true;3752}3753}37543755{3756const ShaderInfo::UniformBindingInfo::RootSignatureLocation &rs_loc_sampler = shader_set.bindings[i].root_sig_locations.sampler;3757if (rs_loc_sampler.root_param_idx != UINT32_MAX) { // Location used?3758DEV_ASSERT(num_sampler_descs);3759DEV_ASSERT(!srv_uav_ambiguity); // [[SRV_UAV_AMBIGUITY]]37603761bool must_flush_table = tables.samplers && rs_loc_sampler.root_param_idx != tables.samplers->root_param_idx;3762if (must_flush_table) {3763// Check the root signature data has been filled ordered.3764DEV_ASSERT(rs_loc_sampler.root_param_idx > tables.samplers->root_param_idx);37653766(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(tables.samplers->root_param_idx, tables.samplers->start_gpu_handle);3767tables.samplers = nullptr;3768}37693770if (unlikely(frame_heap_walkers.samplers->get_free_handles() < num_sampler_descs)) {3771if (!frames[frame_idx].desc_heaps_exhausted_reported.samplers) {3772frames[frame_idx].desc_heaps_exhausted_reported.samplers = true;3773ERR_FAIL_MSG("Cannot bind uniform set because there's not enough room in the current frame's SAMPLERS descriptors heap.\n"3774"Please increase the value of the rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame project setting.");3775} else {3776return;3777}3778}37793780if (!tables.samplers) {3781DEV_ASSERT(last_bind->root_tables.samplers.size() < last_bind->root_tables.samplers.get_capacity());3782last_bind->root_tables.samplers.resize(last_bind->root_tables.samplers.size() + 1);3783tables.samplers = &last_bind->root_tables.samplers[last_bind->root_tables.samplers.size() - 1];3784tables.samplers->root_param_idx = rs_loc_sampler.root_param_idx;3785tables.samplers->start_gpu_handle = frame_heap_walkers.samplers->get_curr_gpu_handle();3786}37873788// TODO: Batch to avoid multiple calls where possible (in any case, flush before setting root descriptor tables, or even batch that as well).3789device->CopyDescriptorsSimple(3790num_sampler_descs,3791frame_heap_walkers.samplers->get_curr_cpu_handle(),3792set_heap_walkers.samplers.get_curr_cpu_handle(),3793D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);3794frame_heap_walkers.samplers->advance(num_sampler_descs);3795}3796}3797}37983799// Uniform set descriptor heaps are always full (descriptors are created for every uniform in them) despite3800// the shader variant a given set is created upon may not need all of them due to DXC optimizations.3801// Therefore, at this point we have to advance through the descriptor set descriptor's heap unconditionally.38023803set_heap_walkers.resources.advance(num_resource_descs);3804if (srv_uav_ambiguity) {3805DEV_ASSERT(num_resource_descs);3806if (!resource_used) {3807set_heap_walkers.resources.advance(num_resource_descs); // Additional skip, since both SRVs and UAVs have to be bypassed.3808}3809}38103811set_heap_walkers.samplers.advance(num_sampler_descs);3812}38133814DEV_ASSERT(set_heap_walkers.resources.is_at_eof());3815DEV_ASSERT(set_heap_walkers.samplers.is_at_eof());38163817{3818bool must_flush_table = tables.resources;3819if (must_flush_table) {3820(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(tables.resources->root_param_idx, tables.resources->start_gpu_handle);3821}3822}3823{3824bool must_flush_table = tables.samplers;3825if (must_flush_table) {3826(cmd_buf_info->cmd_list.Get()->*set_root_desc_table_fn)(tables.samplers->root_param_idx, tables.samplers->start_gpu_handle);3827}3828}38293830last_bind->root_signature_crc = root_sig_crc;3831last_bind->segment_serial = frames[frame_idx].segment_serial;3832}38333834/******************/3835/**** TRANSFER ****/3836/******************/38373838void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {3839_command_check_descriptor_sets(p_cmd_buffer);38403841CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3842BufferInfo *buf_info = (BufferInfo *)p_buffer.id;38433844if (frames[frame_idx].desc_heap_walkers.resources.is_at_eof()) {3845if (!frames[frame_idx].desc_heaps_exhausted_reported.resources) {3846frames[frame_idx].desc_heaps_exhausted_reported.resources = true;3847ERR_FAIL_MSG(3848"Cannot clear buffer because there's not enough room in the current frame's RESOURCE descriptors heap.\n"3849"Please increase the value of the rendering/rendering_device/d3d12/max_resource_descriptors_per_frame project setting.");3850} else {3851return;3852}3853}3854if (frames[frame_idx].desc_heap_walkers.aux.is_at_eof()) {3855if (!frames[frame_idx].desc_heaps_exhausted_reported.aux) {3856frames[frame_idx].desc_heaps_exhausted_reported.aux = true;3857ERR_FAIL_MSG(3858"Cannot clear buffer because there's not enough room in the current frame's AUX descriptors heap.\n"3859"Please increase the value of the rendering/rendering_device/d3d12/max_misc_descriptors_per_frame project setting.");3860} else {3861return;3862}3863}38643865if (!barrier_capabilities.enhanced_barriers_supported) {3866_resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);3867_resource_transitions_flush(cmd_buf_info);3868}38693870D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};3871uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;3872uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;3873uav_desc.Buffer.FirstElement = 0;3874uav_desc.Buffer.NumElements = (buf_info->size + 3) / 4;3875uav_desc.Buffer.StructureByteStride = 0;3876uav_desc.Buffer.CounterOffsetInBytes = 0;3877uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;3878device->CreateUnorderedAccessView(3879buf_info->resource,3880nullptr,3881&uav_desc,3882frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle());38833884device->CopyDescriptorsSimple(38851,3886frames[frame_idx].desc_heap_walkers.resources.get_curr_cpu_handle(),3887frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),3888D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);38893890static const UINT values[4] = {};3891cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint(3892frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(),3893frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),3894buf_info->resource,3895values,38960,3897nullptr);38983899frames[frame_idx].desc_heap_walkers.resources.advance();3900frames[frame_idx].desc_heap_walkers.aux.advance();3901}39023903void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_buf_locfer, VectorView<BufferCopyRegion> p_regions) {3904CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3905BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id;3906BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id;39073908if (!barrier_capabilities.enhanced_barriers_supported) {3909_resource_transition_batch(cmd_buf_info, src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);3910_resource_transition_batch(cmd_buf_info, buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);3911_resource_transitions_flush(cmd_buf_info);3912}39133914for (uint32_t i = 0; i < p_regions.size(); i++) {3915cmd_buf_info->cmd_list->CopyBufferRegion(buf_loc_info->resource, p_regions[i].dst_offset, src_buf_info->resource, p_regions[i].src_offset, p_regions[i].size);3916}3917}39183919void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {3920CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3921TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id;3922TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;39233924if (!barrier_capabilities.enhanced_barriers_supported) {3925// Batch all barrier transitions for the textures before performing the copies.3926for (uint32_t i = 0; i < p_regions.size(); i++) {3927uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);3928for (uint32_t j = 0; j < layer_count; j++) {3929UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);3930UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);3931_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);3932_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);3933}3934}39353936_resource_transitions_flush(cmd_buf_info);3937}39383939CD3DX12_BOX src_box;3940for (uint32_t i = 0; i < p_regions.size(); i++) {3941uint32_t layer_count = MIN(p_regions[i].src_subresources.layer_count, p_regions[i].dst_subresources.layer_count);3942for (uint32_t j = 0; j < layer_count; j++) {3943UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j);3944UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j);3945CD3DX12_TEXTURE_COPY_LOCATION src_location(src_tex_info->resource, src_subresource);3946CD3DX12_TEXTURE_COPY_LOCATION dst_location(dst_tex_info->resource, dst_subresource);3947src_box.left = p_regions[i].src_offset.x;3948src_box.top = p_regions[i].src_offset.y;3949src_box.front = p_regions[i].src_offset.z;3950src_box.right = p_regions[i].src_offset.x + p_regions[i].size.x;3951src_box.bottom = p_regions[i].src_offset.y + p_regions[i].size.y;3952src_box.back = p_regions[i].src_offset.z + p_regions[i].size.z;3953cmd_buf_info->cmd_list->CopyTextureRegion(&dst_location, p_regions[i].dst_offset.x, p_regions[i].dst_offset.y, p_regions[i].dst_offset.z, &src_location, &src_box);3954}3955}3956}39573958void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {3959CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3960TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id;3961TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id;39623963UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());3964UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());3965if (!barrier_capabilities.enhanced_barriers_supported) {3966_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);3967_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);3968_resource_transitions_flush(cmd_buf_info);3969}39703971cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format);3972}39733974void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {3975CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;3976TextureInfo *tex_info = (TextureInfo *)p_texture.id;3977if (tex_info->main_texture) {3978tex_info = tex_info->main_texture;3979}39803981auto _transition_subresources = [&](D3D12_RESOURCE_STATES p_new_state) {3982for (uint32_t i = 0; i < p_subresources.layer_count; i++) {3983for (uint32_t j = 0; j < p_subresources.mipmap_count; j++) {3984UINT subresource = D3D12CalcSubresource(3985p_subresources.base_mipmap + j,3986p_subresources.base_layer + i,39870,3988tex_info->desc.MipLevels,3989tex_info->desc.ArraySize());3990_resource_transition_batch(cmd_buf_info, tex_info, subresource, 1, p_new_state);3991}3992}3993_resource_transitions_flush(cmd_buf_info);3994};39953996if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {3997// Clear via RTV.39983999if (frames[frame_idx].desc_heap_walkers.rtv.get_free_handles() < p_subresources.mipmap_count) {4000if (!frames[frame_idx].desc_heaps_exhausted_reported.rtv) {4001frames[frame_idx].desc_heaps_exhausted_reported.rtv = true;4002ERR_FAIL_MSG(4003"Cannot clear texture because there's not enough room in the current frame's RENDER TARGET descriptors heap.\n"4004"Please increase the value of the rendering/rendering_device/d3d12/max_misc_descriptors_per_frame project setting.");4005} else {4006return;4007}4008}40094010if (!barrier_capabilities.enhanced_barriers_supported) {4011_transition_subresources(D3D12_RESOURCE_STATE_RENDER_TARGET);4012}40134014for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {4015D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = _make_rtv_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);4016rtv_desc.Format = tex_info->view_descs.uav.Format;4017device->CreateRenderTargetView(4018tex_info->resource,4019&rtv_desc,4020frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle());40214022cmd_buf_info->cmd_list->ClearRenderTargetView(4023frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle(),4024p_color.components,40250,4026nullptr);40274028frames[frame_idx].desc_heap_walkers.rtv.advance();4029}4030} else if (tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) {4031// Clear via UAV.4032_command_check_descriptor_sets(p_cmd_buffer);40334034if (frames[frame_idx].desc_heap_walkers.resources.get_free_handles() < p_subresources.mipmap_count) {4035if (!frames[frame_idx].desc_heaps_exhausted_reported.resources) {4036frames[frame_idx].desc_heaps_exhausted_reported.resources = true;4037ERR_FAIL_MSG(4038"Cannot clear texture because there's not enough room in the current frame's RESOURCE descriptors heap.\n"4039"Please increase the value of the rendering/rendering_device/d3d12/max_resource_descriptors_per_frame project setting.");4040} else {4041return;4042}4043}4044if (frames[frame_idx].desc_heap_walkers.aux.get_free_handles() < p_subresources.mipmap_count) {4045if (!frames[frame_idx].desc_heaps_exhausted_reported.aux) {4046frames[frame_idx].desc_heaps_exhausted_reported.aux = true;4047ERR_FAIL_MSG(4048"Cannot clear texture because there's not enough room in the current frame's AUX descriptors heap.\n"4049"Please increase the value of the rendering/rendering_device/d3d12/max_misc_descriptors_per_frame project setting.");4050} else {4051return;4052}4053}40544055if (!barrier_capabilities.enhanced_barriers_supported) {4056_transition_subresources(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);4057}40584059for (uint32_t i = 0; i < p_subresources.mipmap_count; i++) {4060D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = _make_ranged_uav_for_texture(tex_info, p_subresources.base_mipmap + i, p_subresources.base_layer, p_subresources.layer_count, false);4061device->CreateUnorderedAccessView(4062tex_info->resource,4063nullptr,4064&uav_desc,4065frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle());4066device->CopyDescriptorsSimple(40671,4068frames[frame_idx].desc_heap_walkers.resources.get_curr_cpu_handle(),4069frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),4070D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);40714072UINT values[4] = {4073(UINT)p_color.get_r8(),4074(UINT)p_color.get_g8(),4075(UINT)p_color.get_b8(),4076(UINT)p_color.get_a8(),4077};40784079cmd_buf_info->cmd_list->ClearUnorderedAccessViewUint(4080frames[frame_idx].desc_heap_walkers.resources.get_curr_gpu_handle(),4081frames[frame_idx].desc_heap_walkers.aux.get_curr_cpu_handle(),4082tex_info->resource,4083values,40840,4085nullptr);40864087frames[frame_idx].desc_heap_walkers.resources.advance();4088frames[frame_idx].desc_heap_walkers.aux.advance();4089}4090} else {4091ERR_FAIL_MSG("Cannot clear texture because its format does not support UAV writes. You'll need to update its contents through another method.");4092}4093}40944095void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {4096CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4097BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id;4098TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id;4099if (!barrier_capabilities.enhanced_barriers_supported) {4100_resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);4101}41024103uint32_t pixel_size = get_image_format_pixel_size(tex_info->format);4104uint32_t block_w = 0, block_h = 0;4105get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h);41064107for (uint32_t i = 0; i < p_regions.size(); i++) {4108uint32_t region_pitch = (p_regions[i].texture_region_size.x * pixel_size * block_w) >> get_compressed_image_format_pixel_rshift(tex_info->format);4109region_pitch = STEPIFY(region_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);41104111D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = {};4112src_footprint.Offset = p_regions[i].buffer_offset;4113src_footprint.Footprint = CD3DX12_SUBRESOURCE_FOOTPRINT(4114RD_TO_D3D12_FORMAT[tex_info->format].family,4115STEPIFY(p_regions[i].texture_region_size.x, block_w),4116STEPIFY(p_regions[i].texture_region_size.y, block_h),4117p_regions[i].texture_region_size.z,4118region_pitch);4119CD3DX12_TEXTURE_COPY_LOCATION copy_src(buf_info->resource, src_footprint);41204121CD3DX12_BOX src_box(41220, 0, 0,4123STEPIFY(p_regions[i].texture_region_size.x, block_w),4124STEPIFY(p_regions[i].texture_region_size.y, block_h),4125p_regions[i].texture_region_size.z);41264127if (!barrier_capabilities.enhanced_barriers_supported) {4128for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {4129UINT dst_subresource = D3D12CalcSubresource(4130p_regions[i].texture_subresources.mipmap,4131p_regions[i].texture_subresources.base_layer + j,4132_compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),4133tex_info->desc.MipLevels,4134tex_info->desc.ArraySize());4135CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);41364137_resource_transition_batch(cmd_buf_info, tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST);4138}41394140_resource_transitions_flush(cmd_buf_info);4141}41424143for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {4144UINT dst_subresource = D3D12CalcSubresource(4145p_regions[i].texture_subresources.mipmap,4146p_regions[i].texture_subresources.base_layer + j,4147_compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),4148tex_info->desc.MipLevels,4149tex_info->desc.ArraySize());4150CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource);41514152cmd_buf_info->cmd_list->CopyTextureRegion(4153©_dst,4154p_regions[i].texture_offset.x,4155p_regions[i].texture_offset.y,4156p_regions[i].texture_offset.z,4157©_src,4158&src_box);4159}4160}4161}41624163void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_buf_locfer, VectorView<BufferTextureCopyRegion> p_regions) {4164CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4165TextureInfo *tex_info = (TextureInfo *)p_src_texture.id;4166BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id;41674168if (!barrier_capabilities.enhanced_barriers_supported) {4169_resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST);4170}41714172uint32_t block_w = 0, block_h = 0;4173get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h);41744175for (uint32_t i = 0; i < p_regions.size(); i++) {4176if (!barrier_capabilities.enhanced_barriers_supported) {4177for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {4178UINT src_subresource = D3D12CalcSubresource(4179p_regions[i].texture_subresources.mipmap,4180p_regions[i].texture_subresources.base_layer + j,4181_compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),4182tex_info->desc.MipLevels,4183tex_info->desc.ArraySize());41844185_resource_transition_batch(cmd_buf_info, tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE);4186}41874188_resource_transitions_flush(cmd_buf_info);4189}41904191for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) {4192UINT src_subresource = D3D12CalcSubresource(4193p_regions[i].texture_subresources.mipmap,4194p_regions[i].texture_subresources.base_layer + j,4195_compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect),4196tex_info->desc.MipLevels,4197tex_info->desc.ArraySize());41984199CD3DX12_TEXTURE_COPY_LOCATION copy_src(tex_info->resource, src_subresource);42004201uint32_t computed_d = MAX(1, tex_info->desc.DepthOrArraySize >> p_regions[i].texture_subresources.mipmap);4202uint32_t image_size = get_image_format_required_size(4203tex_info->format,4204MAX(1u, tex_info->desc.Width >> p_regions[i].texture_subresources.mipmap),4205MAX(1u, tex_info->desc.Height >> p_regions[i].texture_subresources.mipmap),4206computed_d,42071);4208uint32_t row_pitch = image_size / (p_regions[i].texture_region_size.y * computed_d) * block_h;4209row_pitch = STEPIFY(row_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);42104211D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {};4212dst_footprint.Offset = p_regions[i].buffer_offset;4213dst_footprint.Footprint.Width = STEPIFY(p_regions[i].texture_region_size.x, block_w);4214dst_footprint.Footprint.Height = STEPIFY(p_regions[i].texture_region_size.y, block_h);4215dst_footprint.Footprint.Depth = p_regions[i].texture_region_size.z;4216dst_footprint.Footprint.RowPitch = row_pitch;4217dst_footprint.Footprint.Format = RD_TO_D3D12_FORMAT[tex_info->format].family;42184219CD3DX12_TEXTURE_COPY_LOCATION copy_dst(buf_info->resource, dst_footprint);42204221cmd_buf_info->cmd_list->CopyTextureRegion(©_dst, 0, 0, 0, ©_src, nullptr);4222}4223}4224}42254226/******************/4227/**** PIPELINE ****/4228/******************/42294230void RenderingDeviceDriverD3D12::pipeline_free(PipelineID p_pipeline) {4231PipelineInfo *pipeline_info = (PipelineInfo *)(p_pipeline.id);4232pipeline_info->pso->Release();4233memdelete(pipeline_info);4234}42354236// ----- BINDING -----42374238void RenderingDeviceDriverD3D12::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {4239const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;4240const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;4241if (!shader_info_in->dxil_push_constant_size) {4242return;4243}4244if (shader_info_in->is_compute) {4245cmd_buf_info->cmd_list->SetComputeRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index);4246} else {4247cmd_buf_info->cmd_list->SetGraphicsRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index);4248}4249}42504251// ----- CACHE -----42524253bool RenderingDeviceDriverD3D12::pipeline_cache_create(const Vector<uint8_t> &p_data) {4254WARN_PRINT("PSO caching is not implemented yet in the Direct3D 12 driver.");4255return false;4256}42574258void RenderingDeviceDriverD3D12::pipeline_cache_free() {4259ERR_FAIL_MSG("Not implemented.");4260}42614262size_t RenderingDeviceDriverD3D12::pipeline_cache_query_size() {4263ERR_FAIL_V_MSG(0, "Not implemented.");4264}42654266Vector<uint8_t> RenderingDeviceDriverD3D12::pipeline_cache_serialize() {4267ERR_FAIL_V_MSG(Vector<uint8_t>(), "Not implemented.");4268}42694270/*******************/4271/**** RENDERING ****/4272/*******************/42734274// ----- SUBPASS -----42754276RDD::RenderPassID RenderingDeviceDriverD3D12::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) {4277ERR_FAIL_COND_V_MSG(p_fragment_density_map_attachment.attachment != AttachmentReference::UNUSED, RenderPassID(), "Fragment density maps are not supported in D3D12.");42784279// Pre-bookkeep.4280RenderPassInfo *pass_info = VersatileResource::allocate<RenderPassInfo>(resources_allocator);42814282pass_info->attachments.resize(p_attachments.size());4283for (uint32_t i = 0; i < p_attachments.size(); i++) {4284pass_info->attachments[i] = p_attachments[i];4285}42864287pass_info->subpasses.resize(p_subpasses.size());4288for (uint32_t i = 0; i < p_subpasses.size(); i++) {4289pass_info->subpasses[i] = p_subpasses[i];4290}42914292pass_info->view_count = p_view_count;42934294DXGI_FORMAT *formats = ALLOCA_ARRAY(DXGI_FORMAT, p_attachments.size());4295for (uint32_t i = 0; i < p_attachments.size(); i++) {4296const D3D12Format &format = RD_TO_D3D12_FORMAT[p_attachments[i].format];4297if (format.dsv_format != DXGI_FORMAT_UNKNOWN) {4298formats[i] = format.dsv_format;4299} else {4300formats[i] = format.general_format;4301}4302}4303pass_info->max_supported_sample_count = _find_max_common_supported_sample_count(VectorView(formats, p_attachments.size()));43044305return RenderPassID(pass_info);4306}43074308void RenderingDeviceDriverD3D12::render_pass_free(RenderPassID p_render_pass) {4309RenderPassInfo *pass_info = (RenderPassInfo *)p_render_pass.id;4310VersatileResource::free(resources_allocator, pass_info);4311}43124313// ----- COMMANDS -----43144315void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_attachment_clears) {4316CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4317const RenderPassInfo *pass_info = (const RenderPassInfo *)p_render_pass.id;4318const FramebufferInfo *fb_info = (const FramebufferInfo *)p_framebuffer.id;43194320DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass == UINT32_MAX);43214322auto _transition_subresources = [&](TextureInfo *p_texture_info, D3D12_RESOURCE_STATES p_states) {4323uint32_t planes = 1;4324if ((p_texture_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {4325planes = format_get_plane_count(p_texture_info->format);4326}4327for (uint32_t i = 0; i < p_texture_info->layers; i++) {4328for (uint32_t j = 0; j < p_texture_info->mipmaps; j++) {4329uint32_t subresource = D3D12CalcSubresource(4330p_texture_info->base_mip + j,4331p_texture_info->base_layer + i,43320,4333p_texture_info->desc.MipLevels,4334p_texture_info->desc.ArraySize());43354336_resource_transition_batch(cmd_buf_info, p_texture_info, subresource, planes, p_states);4337}4338}4339};43404341if (fb_info->is_screen || !barrier_capabilities.enhanced_barriers_supported) {4342// Screen framebuffers must perform this transition even if enhanced barriers are supported.4343for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {4344TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;4345if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {4346_transition_subresources(tex_info, D3D12_RESOURCE_STATE_RENDER_TARGET);4347} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {4348_transition_subresources(tex_info, D3D12_RESOURCE_STATE_DEPTH_WRITE);4349} else {4350DEV_ASSERT(false);4351}4352}4353if (fb_info->vrs_attachment) {4354TextureInfo *tex_info = (TextureInfo *)fb_info->vrs_attachment.id;4355_transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);4356}43574358_resource_transitions_flush(cmd_buf_info);4359}43604361cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT(4362p_rect.position.x,4363p_rect.position.y,4364p_rect.position.x + p_rect.size.x,4365p_rect.position.y + p_rect.size.y);4366cmd_buf_info->render_pass_state.region_is_all = !(4367cmd_buf_info->render_pass_state.region_rect.left == 0 &&4368cmd_buf_info->render_pass_state.region_rect.top == 0 &&4369cmd_buf_info->render_pass_state.region_rect.right == fb_info->size.x &&4370cmd_buf_info->render_pass_state.region_rect.bottom == fb_info->size.y);43714372for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {4373if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_DONT_CARE) {4374const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;4375_discard_texture_subresources(tex_info, cmd_buf_info);4376}4377}43784379if (fb_info->vrs_attachment && fsr_capabilities.attachment_supported) {4380ComPtr<ID3D12GraphicsCommandList5> cmd_list_5;4381cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf());4382if (cmd_list_5) {4383static const D3D12_SHADING_RATE_COMBINER COMBINERS[D3D12_RS_SET_SHADING_RATE_COMBINER_COUNT] = {4384D3D12_SHADING_RATE_COMBINER_PASSTHROUGH,4385D3D12_SHADING_RATE_COMBINER_OVERRIDE,4386};4387cmd_list_5->RSSetShadingRate(D3D12_SHADING_RATE_1X1, COMBINERS);4388}4389}43904391cmd_buf_info->render_pass_state.current_subpass = UINT32_MAX;4392cmd_buf_info->render_pass_state.fb_info = fb_info;4393cmd_buf_info->render_pass_state.pass_info = pass_info;4394command_next_render_subpass(p_cmd_buffer, p_cmd_buffer_type);43954396AttachmentClear *clears = ALLOCA_ARRAY(AttachmentClear, pass_info->attachments.size());4397Rect2i *clear_rects = ALLOCA_ARRAY(Rect2i, pass_info->attachments.size());4398uint32_t num_clears = 0;43994400for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {4401TextureInfo *tex_info = (TextureInfo *)fb_info->attachments[i].id;4402if (!tex_info) {4403continue;4404}44054406AttachmentClear clear;4407if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {4408if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) {4409clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT);4410clear.color_attachment = i;4411tex_info->pending_clear.remove_from_list();4412}4413} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {4414if (pass_info->attachments[i].stencil_load_op == ATTACHMENT_LOAD_OP_CLEAR) {4415clear.aspect.set_flag(TEXTURE_ASPECT_DEPTH_BIT);4416}4417}4418if (!clear.aspect.is_empty()) {4419clear.value = p_attachment_clears[i];4420clears[num_clears] = clear;4421clear_rects[num_clears] = p_rect;4422num_clears++;4423}4424}44254426if (num_clears) {4427command_render_clear_attachments(p_cmd_buffer, VectorView(clears, num_clears), VectorView(clear_rects, num_clears));4428}4429}44304431void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) {4432CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;44334434DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX);44354436const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info;4437const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;4438const Subpass &subpass = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass];44394440if (fb_info->is_screen) {4441// Screen framebuffers must transition back to present state when the render pass is finished.4442for (uint32_t i = 0; i < fb_info->attachments.size(); i++) {4443TextureInfo *src_tex_info = (TextureInfo *)(fb_info->attachments[i].id);4444uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());4445_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_PRESENT);4446}4447}44484449struct Resolve {4450ID3D12Resource *src_res = nullptr;4451uint32_t src_subres = 0;4452ID3D12Resource *dst_res = nullptr;4453uint32_t dst_subres = 0;4454DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN;4455};4456Resolve *resolves = ALLOCA_ARRAY(Resolve, subpass.resolve_references.size());4457uint32_t num_resolves = 0;44584459for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {4460uint32_t color_index = subpass.color_references[i].attachment;4461uint32_t resolve_index = subpass.resolve_references[i].attachment;4462DEV_ASSERT((color_index == AttachmentReference::UNUSED) == (resolve_index == AttachmentReference::UNUSED));4463if (color_index == AttachmentReference::UNUSED || !fb_info->attachments[color_index]) {4464continue;4465}44664467TextureInfo *src_tex_info = (TextureInfo *)fb_info->attachments[color_index].id;4468uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());4469_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);44704471TextureInfo *dst_tex_info = (TextureInfo *)fb_info->attachments[resolve_index].id;4472uint32_t dst_subresource = D3D12CalcSubresource(dst_tex_info->base_mip, dst_tex_info->base_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());4473_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);44744475resolves[num_resolves].src_res = src_tex_info->resource;4476resolves[num_resolves].src_subres = src_subresource;4477resolves[num_resolves].dst_res = dst_tex_info->resource;4478resolves[num_resolves].dst_subres = dst_subresource;4479resolves[num_resolves].format = RD_TO_D3D12_FORMAT[src_tex_info->format].general_format;4480num_resolves++;4481}44824483_resource_transitions_flush(cmd_buf_info);44844485for (uint32_t i = 0; i < num_resolves; i++) {4486cmd_buf_info->cmd_list->ResolveSubresource(resolves[i].dst_res, resolves[i].dst_subres, resolves[i].src_res, resolves[i].src_subres, resolves[i].format);4487}4488}44894490void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_buffer) {4491_end_render_pass(p_cmd_buffer);44924493CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4494DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX);44954496const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info;4497const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;44984499if (fsr_capabilities.attachment_supported) {4500ComPtr<ID3D12GraphicsCommandList5> cmd_list_5;4501cmd_buf_info->cmd_list->QueryInterface(cmd_list_5.GetAddressOf());4502if (cmd_list_5) {4503cmd_list_5->RSSetShadingRateImage(nullptr);4504}4505}45064507for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {4508if (pass_info->attachments[i].store_op == ATTACHMENT_STORE_OP_DONT_CARE) {4509const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;4510_discard_texture_subresources(tex_info, cmd_buf_info);4511}4512}45134514cmd_buf_info->render_pass_state.current_subpass = UINT32_MAX;4515}45164517void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {4518CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;45194520if (cmd_buf_info->render_pass_state.current_subpass == UINT32_MAX) {4521cmd_buf_info->render_pass_state.current_subpass = 0;4522} else {4523_end_render_pass(p_cmd_buffer);4524cmd_buf_info->render_pass_state.current_subpass++;4525}45264527const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info;4528const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;4529const Subpass &subpass = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass];45304531D3D12_CPU_DESCRIPTOR_HANDLE *rtv_handles = ALLOCA_ARRAY(D3D12_CPU_DESCRIPTOR_HANDLE, subpass.color_references.size());4532DescriptorsHeap::Walker rtv_heap_walker = fb_info->rtv_heap.make_walker();4533for (uint32_t i = 0; i < subpass.color_references.size(); i++) {4534uint32_t attachment = subpass.color_references[i].attachment;4535if (attachment == AttachmentReference::UNUSED) {4536if (!frames[frame_idx].null_rtv_handle.ptr) {4537// No null descriptor-handle created for this frame yet.45384539if (frames[frame_idx].desc_heap_walkers.rtv.is_at_eof()) {4540if (!frames[frame_idx].desc_heaps_exhausted_reported.rtv) {4541frames[frame_idx].desc_heaps_exhausted_reported.rtv = true;4542ERR_FAIL_MSG("Cannot begin subpass because there's not enough room in the current frame's RENDER TARGET descriptors heap.\n"4543"Please increase the value of the rendering/rendering_device/d3d12/max_misc_descriptors_per_frame project setting.");4544} else {4545return;4546}4547}45484549D3D12_RENDER_TARGET_VIEW_DESC rtv_desc_null = {};4550rtv_desc_null.Format = DXGI_FORMAT_R8_UINT;4551rtv_desc_null.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;4552frames[frame_idx].null_rtv_handle = frames[frame_idx].desc_heap_walkers.rtv.get_curr_cpu_handle();4553device->CreateRenderTargetView(nullptr, &rtv_desc_null, frames[frame_idx].null_rtv_handle);4554frames[frame_idx].desc_heap_walkers.rtv.advance();4555}4556rtv_handles[i] = frames[frame_idx].null_rtv_handle;4557} else {4558uint32_t rt_index = fb_info->attachments_handle_inds[attachment];4559rtv_heap_walker.rewind();4560rtv_heap_walker.advance(rt_index);4561rtv_handles[i] = rtv_heap_walker.get_curr_cpu_handle();4562}4563}45644565D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = {};4566{4567DescriptorsHeap::Walker dsv_heap_walker = fb_info->dsv_heap.make_walker();4568if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) {4569uint32_t ds_index = fb_info->attachments_handle_inds[subpass.depth_stencil_reference.attachment];4570dsv_heap_walker.rewind();4571dsv_heap_walker.advance(ds_index);4572dsv_handle = dsv_heap_walker.get_curr_cpu_handle();4573}4574}45754576cmd_buf_info->cmd_list->OMSetRenderTargets(subpass.color_references.size(), rtv_handles, false, dsv_handle.ptr ? &dsv_handle : nullptr);4577}45784579void RenderingDeviceDriverD3D12::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) {4580const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;45814582D3D12_VIEWPORT *d3d12_viewports = ALLOCA_ARRAY(D3D12_VIEWPORT, p_viewports.size());4583for (uint32_t i = 0; i < p_viewports.size(); i++) {4584d3d12_viewports[i] = CD3DX12_VIEWPORT(4585p_viewports[i].position.x,4586p_viewports[i].position.y,4587p_viewports[i].size.x,4588p_viewports[i].size.y);4589}45904591cmd_buf_info->cmd_list->RSSetViewports(p_viewports.size(), d3d12_viewports);4592}45934594void RenderingDeviceDriverD3D12::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) {4595const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;45964597D3D12_RECT *d3d12_scissors = ALLOCA_ARRAY(D3D12_RECT, p_scissors.size());4598for (uint32_t i = 0; i < p_scissors.size(); i++) {4599d3d12_scissors[i] = CD3DX12_RECT(4600p_scissors[i].position.x,4601p_scissors[i].position.y,4602p_scissors[i].position.x + p_scissors[i].size.x,4603p_scissors[i].position.y + p_scissors[i].size.y);4604}46054606cmd_buf_info->cmd_list->RSSetScissorRects(p_scissors.size(), d3d12_scissors);4607}46084609void RenderingDeviceDriverD3D12::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {4610const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;46114612DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX);4613const FramebufferInfo *fb_info = cmd_buf_info->render_pass_state.fb_info;4614const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;46154616DescriptorsHeap::Walker rtv_heap_walker = fb_info->rtv_heap.make_walker();4617DescriptorsHeap::Walker dsv_heap_walker = fb_info->dsv_heap.make_walker();46184619for (uint32_t i = 0; i < p_attachment_clears.size(); i++) {4620uint32_t attachment = UINT32_MAX;4621bool is_render_target = false;4622if (p_attachment_clears[i].aspect.has_flag(TEXTURE_ASPECT_COLOR_BIT)) {4623attachment = p_attachment_clears[i].color_attachment;4624is_render_target = true;4625} else {4626attachment = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass].depth_stencil_reference.attachment;4627}46284629for (uint32_t j = 0; j < p_rects.size(); j++) {4630D3D12_RECT rect = CD3DX12_RECT(4631p_rects[j].position.x,4632p_rects[j].position.y,4633p_rects[j].position.x + p_rects[j].size.x,4634p_rects[j].position.y + p_rects[j].size.y);4635const D3D12_RECT *rect_ptr = cmd_buf_info->render_pass_state.region_is_all ? nullptr : ▭46364637if (is_render_target) {4638uint32_t color_idx = fb_info->attachments_handle_inds[attachment];4639rtv_heap_walker.rewind();4640rtv_heap_walker.advance(color_idx);4641cmd_buf_info->cmd_list->ClearRenderTargetView(4642rtv_heap_walker.get_curr_cpu_handle(),4643p_attachment_clears[i].value.color.components,4644rect_ptr ? 1 : 0,4645rect_ptr);4646} else {4647uint32_t depth_stencil_idx = fb_info->attachments_handle_inds[attachment];4648dsv_heap_walker.rewind();4649dsv_heap_walker.advance(depth_stencil_idx);4650D3D12_CLEAR_FLAGS flags = {};4651if (p_attachment_clears[i].aspect.has_flag(TEXTURE_ASPECT_DEPTH_BIT)) {4652flags |= D3D12_CLEAR_FLAG_DEPTH;4653}4654if (p_attachment_clears[i].aspect.has_flag(TEXTURE_ASPECT_STENCIL_BIT)) {4655flags |= D3D12_CLEAR_FLAG_STENCIL;4656}4657cmd_buf_info->cmd_list->ClearDepthStencilView(4658dsv_heap_walker.get_curr_cpu_handle(),4659flags,4660p_attachment_clears[i].value.depth,4661p_attachment_clears[i].value.stencil,4662rect_ptr ? 1 : 0,4663rect_ptr);4664}4665}4666}4667}46684669void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {4670CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4671const PipelineInfo *pipeline_info = (const PipelineInfo *)p_pipeline.id;46724673if (cmd_buf_info->graphics_pso == pipeline_info->pso) {4674return;4675}46764677const ShaderInfo *shader_info_in = pipeline_info->shader_info;4678const RenderPipelineInfo &render_info = pipeline_info->render_info;46794680cmd_buf_info->cmd_list->SetPipelineState(pipeline_info->pso);4681if (cmd_buf_info->graphics_root_signature_crc != shader_info_in->root_signature_crc) {4682cmd_buf_info->cmd_list->SetGraphicsRootSignature(shader_info_in->root_signature.Get());4683cmd_buf_info->graphics_root_signature_crc = shader_info_in->root_signature_crc;4684}46854686cmd_buf_info->cmd_list->IASetPrimitiveTopology(render_info.dyn_params.primitive_topology);4687cmd_buf_info->cmd_list->OMSetBlendFactor(render_info.dyn_params.blend_constant.components);4688cmd_buf_info->cmd_list->OMSetStencilRef(render_info.dyn_params.stencil_reference);46894690if (misc_features_support.depth_bounds_supported) {4691ComPtr<ID3D12GraphicsCommandList1> command_list_1;4692cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf());4693if (command_list_1) {4694command_list_1->OMSetDepthBounds(render_info.dyn_params.depth_bounds_min, render_info.dyn_params.depth_bounds_max);4695}4696}46974698cmd_buf_info->render_pass_state.vf_info = render_info.vf_info;46994700cmd_buf_info->graphics_pso = pipeline_info->pso;4701cmd_buf_info->compute_pso = nullptr;4702}47034704void RenderingDeviceDriverD3D12::command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {4705_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, false);4706}47074708void RenderingDeviceDriverD3D12::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {4709for (uint32_t i = 0u; i < p_set_count; ++i) {4710// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.4711_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, false);4712}4713}47144715void RenderingDeviceDriverD3D12::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {4716CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4717_bind_vertex_buffers(cmd_buf_info);4718cmd_buf_info->cmd_list->DrawInstanced(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance);4719}47204721void RenderingDeviceDriverD3D12::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) {4722CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4723_bind_vertex_buffers(cmd_buf_info);4724cmd_buf_info->cmd_list->DrawIndexedInstanced(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance);4725}47264727void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {4728CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4729_bind_vertex_buffers(cmd_buf_info);4730BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;4731if (!barrier_capabilities.enhanced_barriers_supported) {4732_resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4733_resource_transitions_flush(cmd_buf_info);4734}47354736cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);4737}47384739void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {4740CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4741_bind_vertex_buffers(cmd_buf_info);4742BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;4743BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;4744if (!barrier_capabilities.enhanced_barriers_supported) {4745_resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4746_resource_transition_batch(cmd_buf_info, count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4747_resource_transitions_flush(cmd_buf_info);4748}47494750cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);4751}47524753void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {4754CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4755_bind_vertex_buffers(cmd_buf_info);4756BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;4757if (!barrier_capabilities.enhanced_barriers_supported) {4758_resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4759_resource_transitions_flush(cmd_buf_info);4760}47614762cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0);4763}47644765void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {4766CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4767_bind_vertex_buffers(cmd_buf_info);4768BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;4769BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id;4770if (!barrier_capabilities.enhanced_barriers_supported) {4771_resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4772_resource_transition_batch(cmd_buf_info, count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);4773_resource_transitions_flush(cmd_buf_info);4774}47754776cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset);4777}47784779void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) {4780CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;47814782DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX);47834784// Vertex buffer views are set deferredly, to be sure we already know the strides by then,4785// which is only true once the pipeline has been bound. Otherwise, we'd need that the pipeline4786// is always bound first, which would be not kind of us. [[DEFERRED_VERTEX_BUFFERS]]4787DEV_ASSERT(p_binding_count <= ARRAY_SIZE(cmd_buf_info->render_pass_state.vertex_buffer_views));4788for (uint32_t i = 0; i < p_binding_count; i++) {4789BufferInfo *buffer_info = (BufferInfo *)p_buffers[i].id;47904791cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {};4792cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i];4793cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i];4794if (!barrier_capabilities.enhanced_barriers_supported) {4795_resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);4796}4797}47984799if (!barrier_capabilities.enhanced_barriers_supported) {4800_resource_transitions_flush(cmd_buf_info);4801}48024803cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count;4804}48054806void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {4807CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;4808BufferInfo *buffer_info = (BufferInfo *)p_buffer.id;48094810D3D12_INDEX_BUFFER_VIEW d3d12_ib_view = {};4811d3d12_ib_view.BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offset;4812d3d12_ib_view.SizeInBytes = buffer_info->size - p_offset;4813d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;48144815if (!barrier_capabilities.enhanced_barriers_supported) {4816_resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER);4817_resource_transitions_flush(cmd_buf_info);4818}48194820cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view);4821}48224823// [[DEFERRED_VERTEX_BUFFERS]]4824void RenderingDeviceDriverD3D12::_bind_vertex_buffers(CommandBufferInfo *p_cmd_buf_info) {4825RenderPassState &render_pass_state = p_cmd_buf_info->render_pass_state;4826if (render_pass_state.vertex_buffer_count && render_pass_state.vf_info) {4827for (uint32_t i = 0; i < render_pass_state.vertex_buffer_count; i++) {4828render_pass_state.vertex_buffer_views[i].StrideInBytes = render_pass_state.vf_info->vertex_buffer_strides[i];4829}4830p_cmd_buf_info->cmd_list->IASetVertexBuffers(0, render_pass_state.vertex_buffer_count, render_pass_state.vertex_buffer_views);4831render_pass_state.vertex_buffer_count = 0;4832}4833}48344835void RenderingDeviceDriverD3D12::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) {4836const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;4837cmd_buf_info->cmd_list->OMSetBlendFactor(p_constants.components);4838}48394840void RenderingDeviceDriverD3D12::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) {4841if (!Math::is_equal_approx(p_width, 1.0f)) {4842ERR_FAIL_MSG("Setting line widths other than 1.0 is not supported by the Direct3D 12 rendering driver.");4843}4844}48454846// ----- PIPELINE -----48474848static const D3D12_PRIMITIVE_TOPOLOGY_TYPE RD_PRIMITIVE_TO_D3D12_TOPOLOGY_TYPE[RDD::RENDER_PRIMITIVE_MAX] = {4849D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT,4850D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,4851D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,4852D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,4853D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE,4854D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,4855D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,4856D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,4857D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,4858D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,4859D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH,4860};48614862static const D3D12_PRIMITIVE_TOPOLOGY RD_PRIMITIVE_TO_D3D12_TOPOLOGY[RDD::RENDER_PRIMITIVE_MAX] = {4863D3D_PRIMITIVE_TOPOLOGY_POINTLIST,4864D3D_PRIMITIVE_TOPOLOGY_LINELIST,4865D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,4866D3D_PRIMITIVE_TOPOLOGY_LINESTRIP,4867D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ,4868D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,4869D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ,4870D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,4871D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ,4872D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,4873D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST,4874};48754876static const D3D12_CULL_MODE RD_POLYGON_CULL_TO_D3D12_CULL_MODE[RDD::POLYGON_CULL_MAX] = {4877D3D12_CULL_MODE_NONE,4878D3D12_CULL_MODE_FRONT,4879D3D12_CULL_MODE_BACK,4880};48814882static const D3D12_STENCIL_OP RD_TO_D3D12_STENCIL_OP[RDD::STENCIL_OP_MAX] = {4883D3D12_STENCIL_OP_KEEP,4884D3D12_STENCIL_OP_ZERO,4885D3D12_STENCIL_OP_REPLACE,4886D3D12_STENCIL_OP_INCR_SAT,4887D3D12_STENCIL_OP_DECR_SAT,4888D3D12_STENCIL_OP_INVERT,4889D3D12_STENCIL_OP_INCR,4890D3D12_STENCIL_OP_DECR,4891};48924893static const D3D12_LOGIC_OP RD_TO_D3D12_LOGIC_OP[RDD::LOGIC_OP_MAX] = {4894D3D12_LOGIC_OP_CLEAR,4895D3D12_LOGIC_OP_AND,4896D3D12_LOGIC_OP_AND_REVERSE,4897D3D12_LOGIC_OP_COPY,4898D3D12_LOGIC_OP_AND_INVERTED,4899D3D12_LOGIC_OP_NOOP,4900D3D12_LOGIC_OP_XOR,4901D3D12_LOGIC_OP_OR,4902D3D12_LOGIC_OP_NOR,4903D3D12_LOGIC_OP_EQUIV,4904D3D12_LOGIC_OP_INVERT,4905D3D12_LOGIC_OP_OR_REVERSE,4906D3D12_LOGIC_OP_COPY_INVERTED,4907D3D12_LOGIC_OP_OR_INVERTED,4908D3D12_LOGIC_OP_NAND,4909D3D12_LOGIC_OP_SET,4910};49114912static const D3D12_BLEND RD_TO_D3D12_BLEND_FACTOR[RDD::BLEND_FACTOR_MAX] = {4913D3D12_BLEND_ZERO,4914D3D12_BLEND_ONE,4915D3D12_BLEND_SRC_COLOR,4916D3D12_BLEND_INV_SRC_COLOR,4917D3D12_BLEND_DEST_COLOR,4918D3D12_BLEND_INV_DEST_COLOR,4919D3D12_BLEND_SRC_ALPHA,4920D3D12_BLEND_INV_SRC_ALPHA,4921D3D12_BLEND_DEST_ALPHA,4922D3D12_BLEND_INV_DEST_ALPHA,4923D3D12_BLEND_BLEND_FACTOR,4924D3D12_BLEND_INV_BLEND_FACTOR,4925D3D12_BLEND_BLEND_FACTOR,4926D3D12_BLEND_INV_BLEND_FACTOR,4927D3D12_BLEND_SRC_ALPHA_SAT,4928D3D12_BLEND_SRC1_COLOR,4929D3D12_BLEND_INV_SRC1_COLOR,4930D3D12_BLEND_SRC1_ALPHA,4931D3D12_BLEND_INV_SRC1_ALPHA,4932};49334934static const D3D12_BLEND_OP RD_TO_D3D12_BLEND_OP[RDD::BLEND_OP_MAX] = {4935D3D12_BLEND_OP_ADD,4936D3D12_BLEND_OP_SUBTRACT,4937D3D12_BLEND_OP_REV_SUBTRACT,4938D3D12_BLEND_OP_MIN,4939D3D12_BLEND_OP_MAX,4940};49414942RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create(4943ShaderID p_shader,4944VertexFormatID p_vertex_format,4945RenderPrimitive p_render_primitive,4946PipelineRasterizationState p_rasterization_state,4947PipelineMultisampleState p_multisample_state,4948PipelineDepthStencilState p_depth_stencil_state,4949PipelineColorBlendState p_blend_state,4950VectorView<int32_t> p_color_attachments,4951BitField<PipelineDynamicStateFlags> p_dynamic_state,4952RenderPassID p_render_pass,4953uint32_t p_render_subpass,4954VectorView<PipelineSpecializationConstant> p_specialization_constants) {4955const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;49564957CD3DX12_PIPELINE_STATE_STREAM1 pipeline_desc = {};49584959const RenderPassInfo *pass_info = (const RenderPassInfo *)p_render_pass.id;4960RenderPipelineInfo render_info;49614962// Attachments.4963LocalVector<uint32_t> color_attachments;4964{4965const Subpass &subpass = pass_info->subpasses[p_render_subpass];49664967for (uint32_t i = 0; i < ARRAY_SIZE((&pipeline_desc.RTVFormats)->RTFormats); i++) {4968(&pipeline_desc.RTVFormats)->RTFormats[i] = DXGI_FORMAT_UNKNOWN;4969}49704971for (uint32_t i = 0; i < subpass.color_references.size(); i++) {4972const AttachmentReference &ref = subpass.color_references[i];4973if (ref.attachment != AttachmentReference::UNUSED) {4974const Attachment &attachment = pass_info->attachments[ref.attachment];4975DEV_ASSERT((&pipeline_desc.RTVFormats)->RTFormats[i] == DXGI_FORMAT_UNKNOWN);4976(&pipeline_desc.RTVFormats)->RTFormats[i] = RD_TO_D3D12_FORMAT[attachment.format].general_format;4977}4978}4979(&pipeline_desc.RTVFormats)->NumRenderTargets = p_color_attachments.size();49804981if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) {4982const Attachment &attachment = pass_info->attachments[subpass.depth_stencil_reference.attachment];4983pipeline_desc.DSVFormat = RD_TO_D3D12_FORMAT[attachment.format].dsv_format;4984} else {4985pipeline_desc.DSVFormat = DXGI_FORMAT_UNKNOWN;4986}4987}49884989// Vertex.4990if (p_vertex_format) {4991const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id;4992(&pipeline_desc.InputLayout)->pInputElementDescs = vf_info->input_elem_descs.ptr();4993(&pipeline_desc.InputLayout)->NumElements = vf_info->input_elem_descs.size();4994render_info.vf_info = vf_info;4995}49964997// Input assembly & tessellation.49984999pipeline_desc.PrimitiveTopologyType = RD_PRIMITIVE_TO_D3D12_TOPOLOGY_TYPE[p_render_primitive];5000if (p_render_primitive == RENDER_PRIMITIVE_TESSELATION_PATCH) {5001// Is there any way to get the true point count limit?5002ERR_FAIL_COND_V(p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > 32, PipelineID());5003render_info.dyn_params.primitive_topology = (D3D12_PRIMITIVE_TOPOLOGY)((int)D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + p_rasterization_state.patch_control_points);5004} else {5005render_info.dyn_params.primitive_topology = RD_PRIMITIVE_TO_D3D12_TOPOLOGY[p_render_primitive];5006}5007if (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX) {5008// TODO: This is right for 16-bit indices; for 32-bit there's a different enum value to set, but we don't know at this point.5009pipeline_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;5010} else {5011pipeline_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;5012}50135014// Rasterization.5015(&pipeline_desc.RasterizerState)->DepthClipEnable = !p_rasterization_state.enable_depth_clamp;5016// In D3D12, discard can be supported with some extra effort (empty pixel shader + disable depth/stencil test); that said, unsupported by now.5017ERR_FAIL_COND_V(p_rasterization_state.discard_primitives, PipelineID());5018(&pipeline_desc.RasterizerState)->FillMode = p_rasterization_state.wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;5019(&pipeline_desc.RasterizerState)->CullMode = RD_POLYGON_CULL_TO_D3D12_CULL_MODE[p_rasterization_state.cull_mode];5020(&pipeline_desc.RasterizerState)->FrontCounterClockwise = p_rasterization_state.front_face == POLYGON_FRONT_FACE_COUNTER_CLOCKWISE;5021// In D3D12, there's still a point in setting up depth bias with no depth buffer, but just zeroing (disabling) it all in such case is closer to Vulkan.5022if (p_rasterization_state.depth_bias_enabled && pipeline_desc.DSVFormat != DXGI_FORMAT_UNKNOWN) {5023(&pipeline_desc.RasterizerState)->DepthBias = p_rasterization_state.depth_bias_constant_factor;5024(&pipeline_desc.RasterizerState)->DepthBiasClamp = p_rasterization_state.depth_bias_clamp;5025(&pipeline_desc.RasterizerState)->SlopeScaledDepthBias = p_rasterization_state.depth_bias_slope_factor;5026(&pipeline_desc.RasterizerState)->DepthBias = 0;5027(&pipeline_desc.RasterizerState)->DepthBiasClamp = 0.0f;5028(&pipeline_desc.RasterizerState)->SlopeScaledDepthBias = 0.0f;5029}5030(&pipeline_desc.RasterizerState)->ForcedSampleCount = 0;5031(&pipeline_desc.RasterizerState)->ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;5032(&pipeline_desc.RasterizerState)->MultisampleEnable = TEXTURE_SAMPLES_COUNT[p_multisample_state.sample_count] != 1;5033(&pipeline_desc.RasterizerState)->AntialiasedLineEnable = true;50345035// In D3D12, there's no line width.5036ERR_FAIL_COND_V(!Math::is_equal_approx(p_rasterization_state.line_width, 1.0f), PipelineID());50375038// Multisample.5039ERR_FAIL_COND_V(p_multisample_state.enable_sample_shading, PipelineID()); // How one enables this in D3D12?5040if ((&pipeline_desc.RTVFormats)->NumRenderTargets || pipeline_desc.DSVFormat != DXGI_FORMAT_UNKNOWN) {5041uint32_t sample_count = MIN(5042pass_info->max_supported_sample_count,5043TEXTURE_SAMPLES_COUNT[p_multisample_state.sample_count]);5044(&pipeline_desc.SampleDesc)->Count = sample_count;5045} else {5046(&pipeline_desc.SampleDesc)->Count = 1;5047}5048if ((&pipeline_desc.SampleDesc)->Count > 1) {5049(&pipeline_desc.SampleDesc)->Quality = DXGI_STANDARD_MULTISAMPLE_QUALITY_PATTERN;5050} else {5051(&pipeline_desc.SampleDesc)->Quality = 0;5052}5053if (p_multisample_state.sample_mask.size()) {5054for (int i = 1; i < p_multisample_state.sample_mask.size(); i++) {5055// In D3D12 there's a single sample mask for every pixel.5056ERR_FAIL_COND_V(p_multisample_state.sample_mask[i] != p_multisample_state.sample_mask[0], PipelineID());5057}5058pipeline_desc.SampleMask = p_multisample_state.sample_mask[0];5059} else {5060pipeline_desc.SampleMask = 0xffffffff;5061}50625063// Depth stencil.50645065if (pipeline_desc.DSVFormat == DXGI_FORMAT_UNKNOWN) {5066(&pipeline_desc.DepthStencilState)->DepthEnable = false;5067(&pipeline_desc.DepthStencilState)->StencilEnable = false;5068} else {5069(&pipeline_desc.DepthStencilState)->DepthEnable = p_depth_stencil_state.enable_depth_test;5070(&pipeline_desc.DepthStencilState)->DepthWriteMask = p_depth_stencil_state.enable_depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;5071(&pipeline_desc.DepthStencilState)->DepthFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.depth_compare_operator];5072(&pipeline_desc.DepthStencilState)->DepthBoundsTestEnable = p_depth_stencil_state.enable_depth_range;5073(&pipeline_desc.DepthStencilState)->StencilEnable = p_depth_stencil_state.enable_stencil;50745075// In D3D12 some elements can't be different across front and back.5076ERR_FAIL_COND_V(p_depth_stencil_state.front_op.compare_mask != p_depth_stencil_state.back_op.compare_mask, PipelineID());5077ERR_FAIL_COND_V(p_depth_stencil_state.front_op.write_mask != p_depth_stencil_state.back_op.write_mask, PipelineID());5078ERR_FAIL_COND_V(p_depth_stencil_state.front_op.reference != p_depth_stencil_state.back_op.reference, PipelineID());5079(&pipeline_desc.DepthStencilState)->StencilReadMask = p_depth_stencil_state.front_op.compare_mask;5080(&pipeline_desc.DepthStencilState)->StencilWriteMask = p_depth_stencil_state.front_op.write_mask;50815082(&pipeline_desc.DepthStencilState)->FrontFace.StencilFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.front_op.fail];5083(&pipeline_desc.DepthStencilState)->FrontFace.StencilPassOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.front_op.pass];5084(&pipeline_desc.DepthStencilState)->FrontFace.StencilDepthFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.front_op.depth_fail];5085(&pipeline_desc.DepthStencilState)->FrontFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.front_op.compare];50865087(&pipeline_desc.DepthStencilState)->BackFace.StencilFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.fail];5088(&pipeline_desc.DepthStencilState)->BackFace.StencilPassOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.pass];5089(&pipeline_desc.DepthStencilState)->BackFace.StencilDepthFailOp = RD_TO_D3D12_STENCIL_OP[p_depth_stencil_state.back_op.depth_fail];5090(&pipeline_desc.DepthStencilState)->BackFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.back_op.compare];50915092if (misc_features_support.depth_bounds_supported) {5093render_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f;5094render_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f;5095} else {5096if (p_depth_stencil_state.enable_depth_range) {5097WARN_PRINT_ONCE("Depth bounds test is not supported by the GPU driver.");5098}5099}51005101render_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference;5102}51035104// Blend states.5105(&pipeline_desc.BlendState)->AlphaToCoverageEnable = p_multisample_state.enable_alpha_to_coverage;5106{5107bool all_attachments_same_blend = true;5108for (int i = 0; i < p_blend_state.attachments.size(); i++) {5109const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i];5110D3D12_RENDER_TARGET_BLEND_DESC &bd = (&pipeline_desc.BlendState)->RenderTarget[i];51115112bd.BlendEnable = bs.enable_blend;5113bd.LogicOpEnable = p_blend_state.enable_logic_op;5114bd.LogicOp = RD_TO_D3D12_LOGIC_OP[p_blend_state.logic_op];51155116bd.SrcBlend = RD_TO_D3D12_BLEND_FACTOR[bs.src_color_blend_factor];5117bd.DestBlend = RD_TO_D3D12_BLEND_FACTOR[bs.dst_color_blend_factor];5118bd.BlendOp = RD_TO_D3D12_BLEND_OP[bs.color_blend_op];51195120bd.SrcBlendAlpha = RD_TO_D3D12_BLEND_FACTOR[bs.src_alpha_blend_factor];5121bd.DestBlendAlpha = RD_TO_D3D12_BLEND_FACTOR[bs.dst_alpha_blend_factor];5122bd.BlendOpAlpha = RD_TO_D3D12_BLEND_OP[bs.alpha_blend_op];51235124if (bs.write_r) {5125bd.RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_RED;5126}5127if (bs.write_g) {5128bd.RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_GREEN;5129}5130if (bs.write_b) {5131bd.RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_BLUE;5132}5133if (bs.write_a) {5134bd.RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;5135}51365137if (i > 0 && all_attachments_same_blend) {5138all_attachments_same_blend = &(&pipeline_desc.BlendState)->RenderTarget[i] == &(&pipeline_desc.BlendState)->RenderTarget[0];5139}5140}51415142// Per D3D12 docs, if logic op used, independent blending is not supported.5143ERR_FAIL_COND_V(p_blend_state.enable_logic_op && !all_attachments_same_blend, PipelineID());51445145(&pipeline_desc.BlendState)->IndependentBlendEnable = !all_attachments_same_blend;5146}51475148render_info.dyn_params.blend_constant = p_blend_state.blend_constant;51495150// Multiview5151// We are using render target slices for each view.5152const D3D12_VIEW_INSTANCE_LOCATION viewInstanceLocations[D3D12_MAX_VIEW_INSTANCE_COUNT] = { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 } };5153if (pass_info->view_count > 1) {5154(&pipeline_desc.ViewInstancingDesc)->ViewInstanceCount = pass_info->view_count;5155(&pipeline_desc.ViewInstancingDesc)->Flags = D3D12_VIEW_INSTANCING_FLAG_NONE;5156(&pipeline_desc.ViewInstancingDesc)->pViewInstanceLocations = viewInstanceLocations;5157}51585159// Stages bytecodes + specialization constants.51605161pipeline_desc.pRootSignature = shader_info_in->root_signature.Get();51625163HashMap<ShaderStage, Vector<uint8_t>> final_stages_bytecode;5164bool ok = _shader_apply_specialization_constants(shader_info_in, p_specialization_constants, final_stages_bytecode);5165ERR_FAIL_COND_V(!ok, PipelineID());51665167pipeline_desc.VS = D3D12_SHADER_BYTECODE{5168final_stages_bytecode[SHADER_STAGE_VERTEX].ptr(),5169(SIZE_T)final_stages_bytecode[SHADER_STAGE_VERTEX].size()5170};5171pipeline_desc.PS = D3D12_SHADER_BYTECODE{5172final_stages_bytecode[SHADER_STAGE_FRAGMENT].ptr(),5173(SIZE_T)final_stages_bytecode[SHADER_STAGE_FRAGMENT].size()5174};51755176ComPtr<ID3D12Device2> device_2;5177device->QueryInterface(device_2.GetAddressOf());5178ID3D12PipelineState *pso = nullptr;5179HRESULT res = E_FAIL;5180if (device_2) {5181D3D12_PIPELINE_STATE_STREAM_DESC pssd = {};5182pssd.pPipelineStateSubobjectStream = &pipeline_desc;5183pssd.SizeInBytes = sizeof(pipeline_desc);5184res = device_2->CreatePipelineState(&pssd, IID_PPV_ARGS(&pso));5185} else {5186D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = pipeline_desc.GraphicsDescV0();5187res = device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso));5188}5189ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), PipelineID(), "Create(Graphics)PipelineState failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");51905191PipelineInfo *pipeline_info = memnew(PipelineInfo);5192pipeline_info->pso = pso;5193pipeline_info->shader_info = shader_info_in;5194pipeline_info->render_info = render_info;51955196return PipelineID(pipeline_info);5197}51985199/*****************/5200/**** COMPUTE ****/5201/*****************/52025203// ----- COMMANDS -----52045205void RenderingDeviceDriverD3D12::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {5206CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;5207const PipelineInfo *pipeline_info = (const PipelineInfo *)p_pipeline.id;52085209if (cmd_buf_info->compute_pso == pipeline_info->pso) {5210return;5211}52125213const ShaderInfo *shader_info_in = pipeline_info->shader_info;5214cmd_buf_info->cmd_list->SetPipelineState(pipeline_info->pso);5215if (cmd_buf_info->compute_root_signature_crc != shader_info_in->root_signature_crc) {5216cmd_buf_info->cmd_list->SetComputeRootSignature(shader_info_in->root_signature.Get());5217cmd_buf_info->compute_root_signature_crc = shader_info_in->root_signature_crc;5218}52195220cmd_buf_info->compute_pso = pipeline_info->pso;5221cmd_buf_info->graphics_pso = nullptr;5222}52235224void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {5225_command_bind_uniform_set(p_cmd_buffer, p_uniform_set, p_shader, p_set_index, true);5226}52275228void RenderingDeviceDriverD3D12::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {5229for (uint32_t i = 0u; i < p_set_count; ++i) {5230// TODO: _command_bind_uniform_set() does WAAAAY too much stuff. A lot of it should be already cached in UniformSetID when uniform_set_create() was called. Binding is supposed to be a cheap operation, ideally a memcpy.5231_command_bind_uniform_set(p_cmd_buffer, p_uniform_sets[i], p_shader, p_first_set_index + i, true);5232}5233}52345235void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {5236CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;5237if (!barrier_capabilities.enhanced_barriers_supported) {5238_resource_transitions_flush(cmd_buf_info);5239}52405241cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups);5242}52435244void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {5245CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;5246BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id;5247if (!barrier_capabilities.enhanced_barriers_supported) {5248_resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);5249_resource_transitions_flush(cmd_buf_info);5250}52515252cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0);5253}52545255// ----- PIPELINE -----52565257RDD::PipelineID RenderingDeviceDriverD3D12::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {5258const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;52595260CD3DX12_PIPELINE_STATE_STREAM pipeline_desc = {};52615262// Stages bytecodes + specialization constants.52635264pipeline_desc.pRootSignature = shader_info_in->root_signature.Get();52655266HashMap<ShaderStage, Vector<uint8_t>> final_stages_bytecode;5267bool ok = _shader_apply_specialization_constants(shader_info_in, p_specialization_constants, final_stages_bytecode);5268ERR_FAIL_COND_V(!ok, PipelineID());52695270pipeline_desc.CS = D3D12_SHADER_BYTECODE{5271final_stages_bytecode[SHADER_STAGE_COMPUTE].ptr(),5272(SIZE_T)final_stages_bytecode[SHADER_STAGE_COMPUTE].size()5273};52745275ComPtr<ID3D12Device2> device_2;5276device->QueryInterface(device_2.GetAddressOf());5277ID3D12PipelineState *pso = nullptr;5278HRESULT res = E_FAIL;5279if (device_2) {5280D3D12_PIPELINE_STATE_STREAM_DESC pssd = {};5281pssd.pPipelineStateSubobjectStream = &pipeline_desc;5282pssd.SizeInBytes = sizeof(pipeline_desc);5283res = device_2->CreatePipelineState(&pssd, IID_PPV_ARGS(&pso));5284} else {5285D3D12_COMPUTE_PIPELINE_STATE_DESC desc = pipeline_desc.ComputeDescV0();5286res = device->CreateComputePipelineState(&desc, IID_PPV_ARGS(&pso));5287}5288ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), PipelineID(), "Create(Compute)PipelineState failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");52895290PipelineInfo *pipeline_info = memnew(PipelineInfo);5291pipeline_info->pso = pso;5292pipeline_info->shader_info = shader_info_in;52935294return PipelineID(pipeline_info);5295}52965297/*****************/5298/**** QUERIES ****/5299/*****************/53005301// ----- TIMESTAMP -----53025303RDD::QueryPoolID RenderingDeviceDriverD3D12::timestamp_query_pool_create(uint32_t p_query_count) {5304ComPtr<ID3D12QueryHeap> query_heap;5305{5306D3D12_QUERY_HEAP_DESC qh_desc = {};5307qh_desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;5308qh_desc.Count = p_query_count;5309qh_desc.NodeMask = 0;5310HRESULT res = device->CreateQueryHeap(&qh_desc, IID_PPV_ARGS(query_heap.GetAddressOf()));5311ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), QueryPoolID(), "CreateQueryHeap failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");5312}53135314ComPtr<D3D12MA::Allocation> results_buffer_allocation;5315{5316D3D12MA::ALLOCATION_DESC allocation_desc = {};5317allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;53185319CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint64_t) * p_query_count);53205321ComPtr<ID3D12Resource> results_buffer;5322HRESULT res = allocator->CreateResource(5323&allocation_desc,5324&resource_desc,5325D3D12_RESOURCE_STATE_COPY_DEST,5326nullptr,5327results_buffer_allocation.GetAddressOf(),5328IID_PPV_ARGS(results_buffer.GetAddressOf()));5329ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), QueryPoolID(), "D3D12MA::CreateResource failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");5330}53315332// Bookkeep.53335334TimestampQueryPoolInfo *tqp_info = VersatileResource::allocate<TimestampQueryPoolInfo>(resources_allocator);5335tqp_info->query_heap = query_heap;5336tqp_info->query_count = p_query_count;5337tqp_info->results_buffer_allocation = results_buffer_allocation;53385339return RDD::QueryPoolID(tqp_info);5340}53415342void RenderingDeviceDriverD3D12::timestamp_query_pool_free(QueryPoolID p_pool_id) {5343TimestampQueryPoolInfo *tqp_info = (TimestampQueryPoolInfo *)p_pool_id.id;5344VersatileResource::free(resources_allocator, tqp_info);5345}53465347void RenderingDeviceDriverD3D12::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {5348TimestampQueryPoolInfo *tqp_info = (TimestampQueryPoolInfo *)p_pool_id.id;53495350ID3D12Resource *results_buffer = tqp_info->results_buffer_allocation->GetResource();53515352void *results_buffer_data = nullptr;5353results_buffer->Map(0, &VOID_RANGE, &results_buffer_data);5354memcpy(r_results, results_buffer_data, sizeof(uint64_t) * p_query_count);5355results_buffer->Unmap(0, &VOID_RANGE);5356}53575358uint64_t RenderingDeviceDriverD3D12::timestamp_query_result_to_time(uint64_t p_result) {5359return p_result / (double)device_limits.timestamp_frequency * 1000000000.0;5360}53615362void RenderingDeviceDriverD3D12::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) {5363}53645365void RenderingDeviceDriverD3D12::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) {5366const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;5367TimestampQueryPoolInfo *tqp_info = (TimestampQueryPoolInfo *)p_pool_id.id;5368ID3D12Resource *results_buffer = tqp_info->results_buffer_allocation->GetResource();5369cmd_buf_info->cmd_list->EndQuery(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index);5370cmd_buf_info->cmd_list->ResolveQueryData(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index, 1, results_buffer, p_index * sizeof(uint64_t));5371}53725373void RenderingDeviceDriverD3D12::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {5374#ifdef PIX_ENABLED5375const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;5376PIXBeginEvent(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name);5377#endif5378}53795380void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer) {5381#ifdef PIX_ENABLED5382const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id;5383PIXEndEvent(cmd_buf_info->cmd_list.Get());5384#endif5385}53865387void RenderingDeviceDriverD3D12::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {5388// TODO: Implement via DRED.5389}53905391/********************/5392/**** SUBMISSION ****/5393/********************/53945395void RenderingDeviceDriverD3D12::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {5396frame_idx = p_frame_index;53975398frames_drawn = p_frames_drawn;5399allocator->SetCurrentFrameIndex(p_frames_drawn);54005401frames[frame_idx].desc_heap_walkers.resources.rewind();5402frames[frame_idx].desc_heap_walkers.samplers.rewind();5403frames[frame_idx].desc_heap_walkers.aux.rewind();5404frames[frame_idx].desc_heap_walkers.rtv.rewind();5405frames[frame_idx].desc_heaps_exhausted_reported = {};5406frames[frame_idx].null_rtv_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE{};5407frames[frame_idx].segment_serial = segment_serial;54085409segment_begun = true;5410}54115412void RenderingDeviceDriverD3D12::end_segment() {5413segment_serial++;5414segment_begun = false;5415}54165417/**************/5418/**** MISC ****/5419/**************/54205421void RenderingDeviceDriverD3D12::_set_object_name(ID3D12Object *p_object, String p_object_name) {5422ERR_FAIL_NULL(p_object);5423int name_len = p_object_name.size();5424WCHAR *name_w = (WCHAR *)alloca(sizeof(WCHAR) * (name_len + 1));5425MultiByteToWideChar(CP_UTF8, 0, p_object_name.utf8().get_data(), -1, name_w, name_len);5426p_object->SetName(name_w);5427}54285429void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) {5430switch (p_type) {5431case OBJECT_TYPE_TEXTURE: {5432const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;5433if (tex_info->owner_info.allocation) {5434_set_object_name(tex_info->resource, p_name);5435}5436} break;5437case OBJECT_TYPE_SAMPLER: {5438} break;5439case OBJECT_TYPE_BUFFER: {5440const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id;5441_set_object_name(buf_info->resource, p_name);5442} break;5443case OBJECT_TYPE_SHADER: {5444const ShaderInfo *shader_info_in = (const ShaderInfo *)p_driver_id.id;5445_set_object_name(shader_info_in->root_signature.Get(), p_name);5446} break;5447case OBJECT_TYPE_UNIFORM_SET: {5448const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_driver_id.id;5449if (uniform_set_info->desc_heaps.resources.get_heap()) {5450_set_object_name(uniform_set_info->desc_heaps.resources.get_heap(), p_name + " resources heap");5451}5452if (uniform_set_info->desc_heaps.samplers.get_heap()) {5453_set_object_name(uniform_set_info->desc_heaps.samplers.get_heap(), p_name + " samplers heap");5454}5455} break;5456case OBJECT_TYPE_PIPELINE: {5457const PipelineInfo *pipeline_info = (const PipelineInfo *)p_driver_id.id;5458_set_object_name(pipeline_info->pso, p_name);5459} break;5460default: {5461DEV_ASSERT(false);5462}5463}5464}54655466uint64_t RenderingDeviceDriverD3D12::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {5467switch (p_type) {5468case DRIVER_RESOURCE_LOGICAL_DEVICE: {5469return (uint64_t)device.Get();5470}5471case DRIVER_RESOURCE_PHYSICAL_DEVICE: {5472return (uint64_t)adapter.Get();5473}5474case DRIVER_RESOURCE_TOPMOST_OBJECT: {5475return 0;5476}5477case DRIVER_RESOURCE_COMMAND_QUEUE: {5478return (uint64_t)p_driver_id.id;5479}5480case DRIVER_RESOURCE_QUEUE_FAMILY: {5481return 0;5482}5483case DRIVER_RESOURCE_TEXTURE: {5484const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;5485return (uint64_t)tex_info->main_texture;5486} break;5487case DRIVER_RESOURCE_TEXTURE_VIEW: {5488const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;5489return (uint64_t)tex_info->resource;5490}5491case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: {5492const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;5493return (uint64_t)tex_info->desc.Format;5494}5495case DRIVER_RESOURCE_SAMPLER:5496case DRIVER_RESOURCE_UNIFORM_SET:5497return 0;5498case DRIVER_RESOURCE_BUFFER: {5499const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;5500return (uint64_t)tex_info->resource;5501} break;5502case DRIVER_RESOURCE_COMPUTE_PIPELINE:5503case DRIVER_RESOURCE_RENDER_PIPELINE: {5504return p_driver_id.id;5505}5506default: {5507return 0;5508}5509}5510}55115512uint64_t RenderingDeviceDriverD3D12::get_total_memory_used() {5513D3D12MA::TotalStatistics stats;5514allocator->CalculateStatistics(&stats);5515return stats.Total.Stats.BlockBytes;5516}55175518uint64_t RenderingDeviceDriverD3D12::get_lazily_memory_used() {5519return 0;5520}55215522uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) {5523uint64_t safe_unbounded = ((uint64_t)1 << 30);5524switch (p_limit) {5525case LIMIT_MAX_BOUND_UNIFORM_SETS:5526return safe_unbounded;5527case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:5528return D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION;5529case LIMIT_MAX_TEXTURE_SIZE_1D:5530return D3D12_REQ_TEXTURE1D_U_DIMENSION;5531case LIMIT_MAX_TEXTURE_SIZE_2D:5532return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;5533case LIMIT_MAX_TEXTURE_SIZE_3D:5534return D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;5535case LIMIT_MAX_TEXTURE_SIZE_CUBE:5536return D3D12_REQ_TEXTURECUBE_DIMENSION;5537case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:5538return device_limits.max_srvs_per_shader_stage;5539case LIMIT_MAX_UNIFORM_BUFFER_SIZE:5540return 65536;5541case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:5542case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:5543return 16384; // Based on max. texture size. Maybe not correct.5544case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:5545return D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;5546case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:5547return D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;5548case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:5549return D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;5550case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:5551return D3D12_CS_THREAD_GROUP_MAX_X;5552case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:5553return D3D12_CS_THREAD_GROUP_MAX_Y;5554case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:5555return D3D12_CS_THREAD_GROUP_MAX_Z;5556case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:5557return D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float);5558case LIMIT_SUBGROUP_SIZE:5559// Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html),5560// but at this time I don't know the implications on the transpilation to DXIL, etc.5561case LIMIT_SUBGROUP_MIN_SIZE:5562case LIMIT_SUBGROUP_MAX_SIZE:5563return subgroup_capabilities.size;5564case LIMIT_SUBGROUP_IN_SHADERS:5565return subgroup_capabilities.supported_stages_flags_rd();5566case LIMIT_SUBGROUP_OPERATIONS:5567return subgroup_capabilities.supported_operations_flags_rd();5568case LIMIT_MAX_SHADER_VARYINGS:5569return MIN(D3D12_VS_OUTPUT_REGISTER_COUNT, D3D12_PS_INPUT_REGISTER_COUNT);5570default: {5571#ifdef DEV_ENABLED5572WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");5573#endif5574return safe_unbounded;5575}5576}5577}55785579uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {5580switch (p_trait) {5581case API_TRAIT_HONORS_PIPELINE_BARRIERS:5582return barrier_capabilities.enhanced_barriers_supported;5583case API_TRAIT_SHADER_CHANGE_INVALIDATION:5584return (uint64_t)SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH;5585case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT:5586return D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;5587case API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP:5588return D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;5589case API_TRAIT_SECONDARY_VIEWPORT_SCISSOR:5590return false;5591case API_TRAIT_CLEARS_WITH_COPY_ENGINE:5592return false;5593case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:5594return true;5595case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS:5596return !barrier_capabilities.enhanced_barriers_supported;5597default:5598return RenderingDeviceDriver::api_trait_get(p_trait);5599}5600}56015602bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {5603switch (p_feature) {5604case SUPPORTS_HALF_FLOAT:5605return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported;5606case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:5607return true;5608case SUPPORTS_BUFFER_DEVICE_ADDRESS:5609return true;5610case SUPPORTS_IMAGE_ATOMIC_32_BIT:5611return true;5612case SUPPORTS_VULKAN_MEMORY_MODEL:5613return false;5614default:5615return false;5616}5617}56185619const RDD::MultiviewCapabilities &RenderingDeviceDriverD3D12::get_multiview_capabilities() {5620return multiview_capabilities;5621}56225623const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverD3D12::get_fragment_shading_rate_capabilities() {5624return fsr_capabilities;5625}56265627const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverD3D12::get_fragment_density_map_capabilities() {5628return fdm_capabilities;5629}56305631String RenderingDeviceDriverD3D12::get_api_name() const {5632return "D3D12";5633}56345635String RenderingDeviceDriverD3D12::get_api_version() const {5636return vformat("%d_%d", feature_level / 10, feature_level % 10);5637}56385639String RenderingDeviceDriverD3D12::get_pipeline_cache_uuid() const {5640return pipeline_cache_id;5641}56425643const RDD::Capabilities &RenderingDeviceDriverD3D12::get_capabilities() const {5644return device_capabilities;5645}56465647const RenderingShaderContainerFormat &RenderingDeviceDriverD3D12::get_shader_container_format() const {5648return shader_container_format;5649}56505651bool RenderingDeviceDriverD3D12::is_composite_alpha_supported(CommandQueueID p_queue) const {5652if (has_comp_alpha.has((uint64_t)p_queue.id)) {5653return has_comp_alpha[(uint64_t)p_queue.id];5654}5655return false;5656}56575658/******************/56595660RenderingDeviceDriverD3D12::RenderingDeviceDriverD3D12(RenderingContextDriverD3D12 *p_context_driver) {5661DEV_ASSERT(p_context_driver != nullptr);56625663this->context_driver = p_context_driver;5664}56655666RenderingDeviceDriverD3D12::~RenderingDeviceDriverD3D12() {5667if (D3D12Hooks::get_singleton() != nullptr) {5668D3D12Hooks::get_singleton()->cleanup_device();5669}5670glsl_type_singleton_decref();5671}56725673bool RenderingDeviceDriverD3D12::is_in_developer_mode() {5674HKEY hkey = nullptr;5675LSTATUS result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &hkey);5676if (result != ERROR_SUCCESS) {5677return false;5678}56795680DWORD value = 0;5681DWORD dword_size = sizeof(DWORD);5682result = RegQueryValueExW(hkey, L"AllowDevelopmentWithoutDevLicense", nullptr, nullptr, (PBYTE)&value, &dword_size);5683RegCloseKey(hkey);56845685if (result != ERROR_SUCCESS) {5686return false;5687}56885689return (value != 0);5690}56915692Error RenderingDeviceDriverD3D12::_initialize_device() {5693HRESULT res;56945695if (is_in_developer_mode()) {5696typedef HRESULT(WINAPI * PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES)(_In_ UINT, _In_count_(NumFeatures) const IID *, _In_opt_count_(NumFeatures) void *, _In_opt_count_(NumFeatures) UINT *);5697PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES d3d_D3D12EnableExperimentalFeatures = (PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12EnableExperimentalFeatures");5698ERR_FAIL_NULL_V(d3d_D3D12EnableExperimentalFeatures, ERR_CANT_CREATE);56995700UUID experimental_features[] = { D3D12ExperimentalShaderModels };5701d3d_D3D12EnableExperimentalFeatures(1, experimental_features, nullptr, nullptr);5702}57035704D3D_FEATURE_LEVEL requested_feature_level = D3D_FEATURE_LEVEL_11_0;5705// Override the adapter and feature level if needed by the XR backend.5706if (D3D12Hooks::get_singleton() != nullptr) {5707const LUID adapter_luid = D3D12Hooks::get_singleton()->get_adapter_luid();5708requested_feature_level = D3D12Hooks::get_singleton()->get_feature_level();5709ComPtr<IDXGIAdapter1> desired_adapter;5710for (UINT adapter_index = 0;; adapter_index++) {5711// EnumAdapters1 will fail with DXGI_ERROR_NOT_FOUND when there are no more adapters to5712// enumerate.5713if (context_driver->dxgi_factory_get()->EnumAdapters1(adapter_index, desired_adapter.ReleaseAndGetAddressOf()) == DXGI_ERROR_NOT_FOUND) {5714break;5715}5716DXGI_ADAPTER_DESC1 desc;5717desired_adapter->GetDesc1(&desc);5718if (!memcmp(&desc.AdapterLuid, &adapter_luid, sizeof(LUID))) {5719break;5720}5721}5722ERR_FAIL_NULL_V(desired_adapter, ERR_CANT_CREATE);5723adapter = desired_adapter;5724}57255726ID3D12DeviceFactory *device_factory = context_driver->device_factory_get();5727if (device_factory != nullptr) {5728res = device_factory->CreateDevice(adapter.Get(), requested_feature_level, IID_PPV_ARGS(device.GetAddressOf()));5729} else {5730PFN_D3D12_CREATE_DEVICE d3d_D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateDevice");5731ERR_FAIL_NULL_V(d3d_D3D12CreateDevice, ERR_CANT_CREATE);57325733res = d3d_D3D12CreateDevice(adapter.Get(), requested_feature_level, IID_PPV_ARGS(device.GetAddressOf()));5734}5735ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12CreateDevice failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");57365737if (D3D12Hooks::get_singleton() != nullptr) {5738D3D12Hooks::get_singleton()->set_device(device.Get());5739}57405741if (context_driver->use_validation_layers()) {5742ComPtr<ID3D12InfoQueue> info_queue;5743res = device.As(&info_queue);5744ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);57455746#if CUSTOM_INFO_QUEUE_ENABLED5747ComPtr<ID3D12InfoQueue1> info_queue_1;5748device.As(&info_queue_1);5749if (info_queue_1) {5750// Custom printing supported (added in Windows 10 Release Preview build 20236). Even if the callback cookie is unused, it seems the5751// argument is not optional and the function will fail if it's not specified.5752DWORD callback_cookie;5753info_queue_1->SetMuteDebugOutput(TRUE);5754res = info_queue_1->RegisterMessageCallback(&_debug_message_func, D3D12_MESSAGE_CALLBACK_IGNORE_FILTERS, nullptr, &callback_cookie);5755ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);5756} else5757#endif5758{5759// Rely on D3D12's own debug printing.5760if (Engine::get_singleton()->is_abort_on_gpu_errors_enabled()) {5761res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);5762ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);5763res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);5764ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);5765res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);5766ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);5767}5768}57695770D3D12_MESSAGE_SEVERITY severities_to_mute[] = {5771D3D12_MESSAGE_SEVERITY_INFO,5772};57735774D3D12_MESSAGE_ID messages_to_mute[] = {5775D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,5776D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,5777// These happen due to how D3D12MA manages buffers; seems benign.5778D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_HAS_NO_RESOURCE,5779D3D12_MESSAGE_ID_HEAP_ADDRESS_RANGE_INTERSECTS_MULTIPLE_BUFFERS,5780// Seemingly a false positive.5781D3D12_MESSAGE_ID_DATA_STATIC_WHILE_SET_AT_EXECUTE_DESCRIPTOR_INVALID_DATA_CHANGE,5782};57835784D3D12_INFO_QUEUE_FILTER filter = {};5785filter.DenyList.NumSeverities = ARRAY_SIZE(severities_to_mute);5786filter.DenyList.pSeverityList = severities_to_mute;5787filter.DenyList.NumIDs = ARRAY_SIZE(messages_to_mute);5788filter.DenyList.pIDList = messages_to_mute;57895790res = info_queue->PushStorageFilter(&filter);5791ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);5792}57935794return OK;5795}57965797Error RenderingDeviceDriverD3D12::_check_capabilities() {5798// Check feature levels.5799const D3D_FEATURE_LEVEL FEATURE_LEVELS[] = {5800D3D_FEATURE_LEVEL_11_0,5801D3D_FEATURE_LEVEL_11_1,5802D3D_FEATURE_LEVEL_12_0,5803D3D_FEATURE_LEVEL_12_1,5804D3D_FEATURE_LEVEL_12_2,5805};58065807D3D12_FEATURE_DATA_FEATURE_LEVELS feat_levels = {};5808feat_levels.NumFeatureLevels = ARRAY_SIZE(FEATURE_LEVELS);5809feat_levels.pFeatureLevelsRequested = FEATURE_LEVELS;58105811HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &feat_levels, sizeof(feat_levels));5812ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");58135814// Example: D3D_FEATURE_LEVEL_12_1 = 0xc100.5815uint32_t feat_level_major = feat_levels.MaxSupportedFeatureLevel >> 12;5816uint32_t feat_level_minor = (feat_levels.MaxSupportedFeatureLevel >> 16) & 0xff;5817feature_level = feat_level_major * 10 + feat_level_minor;58185819// Fill device capabilities.5820device_capabilities.device_family = DEVICE_DIRECTX;5821device_capabilities.version_major = feature_level / 10;5822device_capabilities.version_minor = feature_level % 10;58235824// Assume not supported until proven otherwise.5825multiview_capabilities.is_supported = false;5826multiview_capabilities.geometry_shader_is_supported = false;5827multiview_capabilities.tessellation_shader_is_supported = false;5828multiview_capabilities.max_view_count = 0;5829multiview_capabilities.max_instance_count = 0;5830multiview_capabilities.is_supported = false;5831subgroup_capabilities.size = 0;5832subgroup_capabilities.wave_ops_supported = false;5833shader_capabilities.shader_model = (D3D_SHADER_MODEL)0;5834shader_capabilities.native_16bit_ops = false;5835storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false;5836format_capabilities.relaxed_casting_supported = false;58375838{5839static const D3D_SHADER_MODEL SMS_TO_CHECK[] = {5840D3D_SHADER_MODEL_6_6,5841D3D_SHADER_MODEL_6_5,5842D3D_SHADER_MODEL_6_4,5843D3D_SHADER_MODEL_6_3,5844D3D_SHADER_MODEL_6_2,5845D3D_SHADER_MODEL_6_1,5846D3D_SHADER_MODEL_6_0, // Determined by NIR (dxil_min_shader_model).5847};58485849D3D12_FEATURE_DATA_SHADER_MODEL shader_model = {};5850for (uint32_t i = 0; i < ARRAY_SIZE(SMS_TO_CHECK); i++) {5851shader_model.HighestShaderModel = SMS_TO_CHECK[i];5852res = device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model));5853if (SUCCEEDED(res)) {5854shader_capabilities.shader_model = shader_model.HighestShaderModel;5855break;5856}5857if (res == E_INVALIDARG) {5858continue; // Must assume the device doesn't know about the SM just checked.5859}5860ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");5861}58625863#define D3D_SHADER_MODEL_TO_STRING(m_sm) vformat("%d.%d", (m_sm >> 4), (m_sm & 0xf))58645865ERR_FAIL_COND_V_MSG(shader_capabilities.shader_model < SMS_TO_CHECK[ARRAY_SIZE(SMS_TO_CHECK) - 1], ERR_UNAVAILABLE,5866vformat("No support for any of the suitable shader models (%s-%s) has been found.", D3D_SHADER_MODEL_TO_STRING(SMS_TO_CHECK[ARRAY_SIZE(SMS_TO_CHECK) - 1]), D3D_SHADER_MODEL_TO_STRING(SMS_TO_CHECK[0])));58675868print_verbose("- Shader:");5869print_verbose(" model: " + D3D_SHADER_MODEL_TO_STRING(shader_capabilities.shader_model));5870}58715872shader_container_format.set_lib_d3d12(context_driver->lib_d3d12);58735874D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};5875res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));5876if (SUCCEEDED(res)) {5877storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = options.TypedUAVLoadAdditionalFormats;5878}58795880D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {};5881res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1));5882if (SUCCEEDED(res)) {5883subgroup_capabilities.size = options1.WaveLaneCountMin;5884subgroup_capabilities.wave_ops_supported = options1.WaveOps;5885}58865887D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = {};5888res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2));5889if (SUCCEEDED(res)) {5890misc_features_support.depth_bounds_supported = options2.DepthBoundsTestSupported;5891}58925893D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};5894res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3));5895if (SUCCEEDED(res)) {5896// https://docs.microsoft.com/en-us/windows/win32/api/d3d12/ne-d3d12-d3d12_view_instancing_tier5897// https://microsoft.github.io/DirectX-Specs/d3d/ViewInstancing.html#sv_viewid5898if (options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_1) {5899multiview_capabilities.is_supported = true;5900multiview_capabilities.geometry_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3;5901multiview_capabilities.tessellation_shader_is_supported = options3.ViewInstancingTier >= D3D12_VIEW_INSTANCING_TIER_3;5902multiview_capabilities.max_view_count = D3D12_MAX_VIEW_INSTANCE_COUNT;5903multiview_capabilities.max_instance_count = UINT32_MAX;5904}5905}59065907D3D12_FEATURE_DATA_D3D12_OPTIONS4 options4 = {};5908res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, &options4, sizeof(options4));5909if (SUCCEEDED(res)) {5910shader_capabilities.native_16bit_ops = options4.Native16BitShaderOpsSupported;5911}59125913D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {};5914res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6));5915if (SUCCEEDED(res)) {5916if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_1) {5917fsr_capabilities.pipeline_supported = true;5918if (options6.VariableShadingRateTier >= D3D12_VARIABLE_SHADING_RATE_TIER_2) {5919fsr_capabilities.primitive_supported = true;5920fsr_capabilities.attachment_supported = true;5921fsr_capabilities.min_texel_size = Size2i(options6.ShadingRateImageTileSize, options6.ShadingRateImageTileSize);5922fsr_capabilities.max_texel_size = Size2i(8, 8);5923}5924}5925}59265927D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = {};5928res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12));5929if (SUCCEEDED(res)) {5930format_capabilities.relaxed_casting_supported = options12.RelaxedFormatCastingSupported;5931barrier_capabilities.enhanced_barriers_supported = options12.EnhancedBarriersSupported;5932}59335934if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) {5935print_verbose("- D3D12 Variable Rate Shading supported:");5936if (fsr_capabilities.pipeline_supported) {5937print_verbose(" Draw call");5938}5939if (fsr_capabilities.primitive_supported) {5940print_verbose(" Primitive");5941}5942if (fsr_capabilities.attachment_supported) {5943print_verbose(String(" Screen-space image (tile size: ") + itos(fsr_capabilities.min_texel_size.x) + ")");5944}5945} else {5946print_verbose("- D3D12 Variable Rate Shading not supported");5947}59485949if (multiview_capabilities.is_supported) {5950print_verbose("- D3D12 multiview supported:");5951print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count));5952//print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); // Hardcoded; not very useful at the moment.5953} else {5954print_verbose("- D3D12 multiview not supported");5955}59565957if (format_capabilities.relaxed_casting_supported) {5958#if 05959print_verbose("- Relaxed casting supported");5960#else5961// Certain configurations (Windows 11 with an updated NVIDIA driver) crash when using relaxed casting.5962// Therefore, we disable it temporarily until we can assure that it's reliable.5963// There are fallbacks in place that work in every case, if less efficient.5964format_capabilities.relaxed_casting_supported = false;5965print_verbose("- Relaxed casting supported (but disabled for now)");5966#endif5967} else {5968print_verbose("- Relaxed casting not supported");5969}59705971print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no"));59725973if (misc_features_support.depth_bounds_supported) {5974print_verbose("- Depth bounds test supported");5975} else {5976print_verbose("- Depth bounds test not supported");5977}59785979return OK;5980}59815982Error RenderingDeviceDriverD3D12::_get_device_limits() {5983D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};5984HRESULT res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));5985ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");59865987// https://docs.microsoft.com/en-us/windows/win32/direct3d12/hardware-support5988device_limits.max_srvs_per_shader_stage = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 128 : UINT64_MAX;5989device_limits.max_cbvs_per_shader_stage = options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 14 : UINT64_MAX;5990device_limits.max_samplers_across_all_stages = options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 16 : 2048;5991if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1) {5992device_limits.max_uavs_across_all_stages = feature_level <= 110 ? 8 : 64;5993} else if (options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_2) {5994device_limits.max_uavs_across_all_stages = 64;5995} else {5996device_limits.max_uavs_across_all_stages = UINT64_MAX;5997}59985999// Retrieving the timestamp frequency requires creating a command queue that will be discarded immediately.6000ComPtr<ID3D12CommandQueue> unused_command_queue;6001D3D12_COMMAND_QUEUE_DESC queue_desc = {};6002queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;6003res = device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(unused_command_queue.GetAddressOf()));6004ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);60056006res = unused_command_queue->GetTimestampFrequency(&device_limits.timestamp_frequency);6007if (!SUCCEEDED(res)) {6008print_verbose("D3D12: GetTimestampFrequency failed with error " + vformat("0x%08ux", (uint64_t)res) + ". Timestamps will be inaccurate.");6009}60106011return OK;6012}60136014Error RenderingDeviceDriverD3D12::_initialize_allocator() {6015D3D12MA::ALLOCATOR_DESC allocator_desc = {};6016allocator_desc.pDevice = device.Get();6017allocator_desc.pAdapter = adapter.Get();6018allocator_desc.Flags = D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED;60196020HRESULT res = D3D12MA::CreateAllocator(&allocator_desc, &allocator);6021ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "D3D12MA::CreateAllocator failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");60226023return OK;6024}60256026static Error create_command_signature(ID3D12Device *device, D3D12_INDIRECT_ARGUMENT_TYPE p_type, uint32_t p_stride, ComPtr<ID3D12CommandSignature> *r_cmd_sig) {6027D3D12_INDIRECT_ARGUMENT_DESC iarg_desc = {};6028iarg_desc.Type = p_type;6029D3D12_COMMAND_SIGNATURE_DESC cs_desc = {};6030cs_desc.ByteStride = p_stride;6031cs_desc.NumArgumentDescs = 1;6032cs_desc.pArgumentDescs = &iarg_desc;6033cs_desc.NodeMask = 0;6034HRESULT res = device->CreateCommandSignature(&cs_desc, nullptr, IID_PPV_ARGS(r_cmd_sig->GetAddressOf()));6035ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_CANT_CREATE, "CreateCommandSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");6036return OK;6037}60386039Error RenderingDeviceDriverD3D12::_initialize_frames(uint32_t p_frame_count) {6040Error err;60416042//CD3DX12_RESOURCE_DESC resource_desc = CD3DX12_RESOURCE_DESC::Buffer(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);6043uint32_t resource_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame");6044uint32_t sampler_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame");6045uint32_t misc_descriptors_per_frame = GLOBAL_GET("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame");60466047frames.resize(p_frame_count);6048for (uint32_t i = 0; i < frames.size(); i++) {6049err = frames[i].desc_heaps.resources.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, resource_descriptors_per_frame, true);6050ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's RESOURCE descriptors heap failed.");60516052err = frames[i].desc_heaps.samplers.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler_descriptors_per_frame, true);6053ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's SAMPLER descriptors heap failed.");60546055err = frames[i].desc_heaps.aux.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, misc_descriptors_per_frame, false);6056ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's AUX descriptors heap failed.");60576058err = frames[i].desc_heaps.rtv.allocate(device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, misc_descriptors_per_frame, false);6059ERR_FAIL_COND_V_MSG(err != OK, ERR_CANT_CREATE, "Creating the frame's RENDER TARGET descriptors heap failed.");60606061frames[i].desc_heap_walkers.resources = frames[i].desc_heaps.resources.make_walker();6062frames[i].desc_heap_walkers.samplers = frames[i].desc_heaps.samplers.make_walker();6063frames[i].desc_heap_walkers.aux = frames[i].desc_heaps.aux.make_walker();6064frames[i].desc_heap_walkers.rtv = frames[i].desc_heaps.rtv.make_walker();6065}60666067return OK;6068}60696070Error RenderingDeviceDriverD3D12::_initialize_command_signatures() {6071Error err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, sizeof(D3D12_DRAW_ARGUMENTS), &indirect_cmd_signatures.draw);6072ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);60736074err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, sizeof(D3D12_DRAW_INDEXED_ARGUMENTS), &indirect_cmd_signatures.draw_indexed);6075ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);60766077err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, sizeof(D3D12_DISPATCH_ARGUMENTS), &indirect_cmd_signatures.dispatch);6078ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);60796080return OK;6081}60826083Error RenderingDeviceDriverD3D12::initialize(uint32_t p_device_index, uint32_t p_frame_count) {6084glsl_type_singleton_init_or_ref();60856086context_device = context_driver->device_get(p_device_index);6087adapter = context_driver->create_adapter(p_device_index);6088ERR_FAIL_NULL_V(adapter, ERR_CANT_CREATE);60896090HRESULT res = adapter->GetDesc(&adapter_desc);6091ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE);60926093// Set the pipeline cache ID based on the adapter information.6094pipeline_cache_id = String::hex_encode_buffer((uint8_t *)&adapter_desc.AdapterLuid, sizeof(LUID));6095pipeline_cache_id += "-driver-" + itos(adapter_desc.Revision);60966097Error err = _initialize_device();6098ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);60996100err = _check_capabilities();6101ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);61026103err = _get_device_limits();6104ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);61056106err = _initialize_allocator();6107ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);61086109err = _initialize_frames(p_frame_count);6110ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);61116112err = _initialize_command_signatures();6113ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);61146115return OK;6116}611761186119