Path: blob/master/drivers/metal/metal_objects_shared.h
20919 views
/**************************************************************************/1/* metal_objects_shared.h */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#pragma once3132#include "metal_device_properties.h"33#include "metal_utils.h"34#include "pixel_formats.h"35#include "sha256_digest.h"3637#include <CoreFoundation/CoreFoundation.h>38#include <memory>39#include <optional>4041class RenderingDeviceDriverMetal;4243using RDC = RenderingDeviceCommons;4445enum ShaderStageUsage : uint32_t {46None = 0,47Vertex = RDD::SHADER_STAGE_VERTEX_BIT,48Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,49TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,50TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,51Compute = RDD::SHADER_STAGE_COMPUTE_BIT,52};5354_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {55p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));56return p_a;57}5859struct ClearAttKey {60const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;61const static uint32_t DEPTH_INDEX = COLOR_COUNT;62const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;63const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;6465enum Flags : uint16_t {66CLEAR_FLAGS_NONE = 0,67CLEAR_FLAGS_LAYERED = 1 << 0,68};6970Flags flags = CLEAR_FLAGS_NONE;71uint16_t sample_count = 0;72uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };7374_FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTL::PixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }75_FORCE_INLINE_ void set_depth_format(MTL::PixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }76_FORCE_INLINE_ void set_stencil_format(MTL::PixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }77_FORCE_INLINE_ MTL::PixelFormat depth_format() const { return (MTL::PixelFormat)pixel_formats[DEPTH_INDEX]; }78_FORCE_INLINE_ MTL::PixelFormat stencil_format() const { return (MTL::PixelFormat)pixel_formats[STENCIL_INDEX]; }79_FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }8081_FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }82_FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }83_FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }84_FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }8586_FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {87return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;88}8990uint32_t hash() const {91uint32_t h = hash_murmur3_one_32(flags);92h = hash_murmur3_one_32(sample_count, h);93h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);94return hash_fmix32(h);95}96};9798#pragma mark - Ring Buffer99100/// A ring buffer backed by MTLBuffer instances for transient GPU allocations.101/// Allocations are 16-byte aligned with a minimum size of 16 bytes.102/// When the current buffer is exhausted, a new buffer is allocated.103class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRingBuffer {104public:105static constexpr uint32_t DEFAULT_BUFFER_SIZE = 512 * 1024;106static constexpr uint32_t MIN_BLOCK_SIZE = 16;107static constexpr uint32_t ALIGNMENT = 16;108109struct Allocation {110void *ptr = nullptr;111MTL::Buffer *buffer = nullptr;112uint64_t gpu_address = 0;113uint32_t offset = 0;114115_FORCE_INLINE_ bool is_valid() const { return ptr != nullptr; }116};117118private:119MTL::Device *device = nullptr;120LocalVector<MTL::Buffer *> buffers;121LocalVector<uint32_t> heads;122uint32_t current_segment = 0;123uint32_t buffer_size = DEFAULT_BUFFER_SIZE;124bool changed = false;125126_FORCE_INLINE_ uint32_t alloc_segment() {127MTL::Buffer *buffer = device->newBuffer(buffer_size, MTL::ResourceStorageModeShared | MTL::ResourceHazardTrackingModeUntracked);128buffers.push_back(buffer);129heads.push_back(0);130changed = true;131132return buffers.size() - 1;133}134135public:136MDRingBuffer() = default;137138MDRingBuffer(MTL::Device *p_device, uint32_t p_buffer_size = DEFAULT_BUFFER_SIZE) :139device(p_device), buffer_size(p_buffer_size) {}140141~MDRingBuffer() {142for (MTL::Buffer *buffer : buffers) {143buffer->release();144}145}146147/// Allocates a block of memory from the ring buffer.148/// Returns an Allocation with the pointer, buffer, and offset.149_FORCE_INLINE_ Allocation allocate(uint32_t p_size) {150p_size = MAX(p_size, MIN_BLOCK_SIZE);151p_size = (p_size + ALIGNMENT - 1) & ~(ALIGNMENT - 1);152153if (buffers.is_empty()) {154alloc_segment();155}156157uint32_t aligned_head = (heads[current_segment] + ALIGNMENT - 1) & ~(ALIGNMENT - 1);158159if (aligned_head + p_size > buffer_size) {160// Current segment exhausted, try to find one with space or allocate new.161bool found = false;162for (uint32_t i = 0; i < buffers.size(); i++) {163uint32_t ah = (heads[i] + ALIGNMENT - 1) & ~(ALIGNMENT - 1);164if (ah + p_size <= buffer_size) {165current_segment = i;166aligned_head = ah;167found = true;168break;169}170}171172if (!found) {173current_segment = alloc_segment();174aligned_head = 0;175}176}177178MTL::Buffer *buffer = buffers[current_segment];179Allocation alloc;180alloc.buffer = buffer;181alloc.offset = aligned_head;182alloc.ptr = static_cast<uint8_t *>(buffer->contents()) + aligned_head;183if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {184alloc.gpu_address = buffer->gpuAddress() + aligned_head;185}186heads[current_segment] = aligned_head + p_size;187188return alloc;189}190191/// Resets all segments for reuse. Call at frame boundaries when GPU work is complete.192_FORCE_INLINE_ void reset() {193for (uint32_t &head : heads) {194head = 0;195}196current_segment = 0;197}198199/// Returns true if buffers were added or removed since last clear_changed().200_FORCE_INLINE_ bool is_changed() const { return changed; }201202/// Clears the changed flag.203_FORCE_INLINE_ void clear_changed() { changed = false; }204205/// Returns a Span of all backing buffers.206_FORCE_INLINE_ Span<MTL::Buffer *const> get_buffers() const {207return Span<MTL::Buffer *const>(buffers.ptr(), buffers.size());208}209210/// Returns the number of buffer segments currently allocated.211_FORCE_INLINE_ uint32_t get_segment_count() const {212return buffers.size();213}214};215216#pragma mark - Resource Factory217218class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDResourceFactory {219private:220MTL::Device *device;221PixelFormats &pixel_formats;222uint32_t max_buffer_count;223224NS::SharedPtr<MTL::Function> new_func(NS::String *p_source, NS::String *p_name, NS::Error **p_error);225NS::SharedPtr<MTL::Function> new_clear_vert_func(ClearAttKey &p_key);226NS::SharedPtr<MTL::Function> new_clear_frag_func(ClearAttKey &p_key);227const char *get_format_type_string(MTL::PixelFormat p_fmt) const;228229_FORCE_INLINE_ uint32_t get_vertex_buffer_index(uint32_t p_binding) {230return (max_buffer_count - 1) - p_binding;231}232233public:234NS::SharedPtr<MTL::RenderPipelineState> new_clear_pipeline_state(ClearAttKey &p_key, NS::Error **p_error);235NS::SharedPtr<MTL::RenderPipelineState> new_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error);236NS::SharedPtr<MTL::DepthStencilState> new_depth_stencil_state(bool p_use_depth, bool p_use_stencil);237238MDResourceFactory(MTL::Device *p_device, PixelFormats &p_pixel_formats, uint32_t p_max_buffer_count) :239device(p_device), pixel_formats(p_pixel_formats), max_buffer_count(p_max_buffer_count) {}240~MDResourceFactory() = default;241};242243class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDResourceCache {244private:245typedef HashMap<ClearAttKey, NS::SharedPtr<MTL::RenderPipelineState>> HashMap;246std::unique_ptr<MDResourceFactory> resource_factory;247HashMap clear_states;248HashMap empty_draw_states;249250struct {251NS::SharedPtr<MTL::DepthStencilState> all;252NS::SharedPtr<MTL::DepthStencilState> depth_only;253NS::SharedPtr<MTL::DepthStencilState> stencil_only;254NS::SharedPtr<MTL::DepthStencilState> none;255} clear_depth_stencil_state;256257public:258MTL::RenderPipelineState *get_clear_render_pipeline_state(ClearAttKey &p_key, NS::Error **p_error);259MTL::RenderPipelineState *get_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error);260MTL::DepthStencilState *get_depth_stencil_state(bool p_use_depth, bool p_use_stencil);261262explicit MDResourceCache(MTL::Device *p_device, PixelFormats &p_pixel_formats, uint32_t p_max_buffer_count) :263resource_factory(new MDResourceFactory(p_device, p_pixel_formats, p_max_buffer_count)) {}264~MDResourceCache() = default;265};266267/**268* Returns an index that can be used to map a shader stage to an index in a fixed-size array that is used for269* a single pipeline type.270*/271_FORCE_INLINE_ static uint32_t to_index(RDD::ShaderStage p_s) {272switch (p_s) {273case RenderingDeviceCommons::SHADER_STAGE_VERTEX:274case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_CONTROL:275case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_EVALUATION:276case RenderingDeviceCommons::SHADER_STAGE_COMPUTE:277default:278return 0;279case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT:280return 1;281}282}283284class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDFrameBuffer {285Vector<MTL::Texture *> textures;286287public:288Size2i size;289MDFrameBuffer(Vector<MTL::Texture *> p_textures, Size2i p_size) :290textures(p_textures), size(p_size) {}291MDFrameBuffer() {}292293/// Returns the texture at the given index.294_ALWAYS_INLINE_ MTL::Texture *get_texture(uint32_t p_idx) const {295return textures[p_idx];296}297298/// Returns true if the texture at the given index is not nil.299_ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {300return textures[p_idx] != nullptr;301}302303/// Set the texture at the given index.304_ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture *p_texture) {305textures.write[p_idx] = p_texture;306}307308/// Unset or nil the texture at the given index.309_ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {310textures.write[p_idx] = nullptr;311}312313/// Resizes buffers to the specified size.314_ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {315textures.resize(p_size);316}317318virtual ~MDFrameBuffer() = default;319};320321template <>322struct HashMapComparatorDefault<RDD::ShaderID> {323static bool compare(const RDD::ShaderID &p_lhs, const RDD::ShaderID &p_rhs) {324return p_lhs.id == p_rhs.id;325}326};327328template <>329struct HashMapComparatorDefault<RDD::BufferID> {330static bool compare(const RDD::BufferID &p_lhs, const RDD::BufferID &p_rhs) {331return p_lhs.id == p_rhs.id;332}333};334335template <>336struct HashMapComparatorDefault<RDD::TextureID> {337static bool compare(const RDD::TextureID &p_lhs, const RDD::TextureID &p_rhs) {338return p_lhs.id == p_rhs.id;339}340};341342template <>343struct HashMapHasherDefaultImpl<RDD::BufferID> {344static _FORCE_INLINE_ uint32_t hash(const RDD::BufferID &p_value) {345return HashMapHasherDefaultImpl<uint64_t>::hash(p_value.id);346}347};348349template <>350struct HashMapHasherDefaultImpl<RDD::TextureID> {351static _FORCE_INLINE_ uint32_t hash(const RDD::TextureID &p_value) {352return HashMapHasherDefaultImpl<uint64_t>::hash(p_value.id);353}354};355356namespace rid {357358template <typename T>359_FORCE_INLINE_ T *get(RDD::ID p_id) {360return reinterpret_cast<T *>(p_id.id);361}362363template <typename T>364_FORCE_INLINE_ T *get(uint64_t p_id) {365return reinterpret_cast<T *>(p_id);366}367368} // namespace rid369370#pragma mark - Render Pass Types371372class MDRenderPass;373374enum class MDAttachmentType : uint8_t {375None = 0,376Color = 1 << 0,377Depth = 1 << 1,378Stencil = 1 << 2,379};380381_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {382flags::set(p_a, p_b);383return p_a;384}385386_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {387return uint8_t(p_a) & uint8_t(p_b);388}389390struct MDSubpass {391uint32_t subpass_index = 0;392uint32_t view_count = 0;393LocalVector<RDD::AttachmentReference> input_references;394LocalVector<RDD::AttachmentReference> color_references;395RDD::AttachmentReference depth_stencil_reference;396LocalVector<RDD::AttachmentReference> resolve_references;397398MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;399};400401struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDAttachment {402private:403uint32_t index = 0;404uint32_t firstUseSubpassIndex = 0;405uint32_t lastUseSubpassIndex = 0;406407public:408MTL::PixelFormat format = MTL::PixelFormatInvalid;409MDAttachmentType type = MDAttachmentType::None;410MTL::LoadAction loadAction = MTL::LoadActionDontCare;411MTL::StoreAction storeAction = MTL::StoreActionDontCare;412MTL::LoadAction stencilLoadAction = MTL::LoadActionDontCare;413MTL::StoreAction stencilStoreAction = MTL::StoreActionDontCare;414uint32_t samples = 1;415416/*!417* @brief Returns true if this attachment is first used in the given subpass.418* @param p_subpass419* @return420*/421_FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {422return p_subpass.subpass_index == firstUseSubpassIndex;423}424425/*!426* @brief Returns true if this attachment is last used in the given subpass.427* @param p_subpass428* @return429*/430_FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {431return p_subpass.subpass_index == lastUseSubpassIndex;432}433434void linkToSubpass(MDRenderPass const &p_pass);435436MTL::StoreAction getMTLStoreAction(MDSubpass const &p_subpass,437bool p_is_rendering_entire_area,438bool p_has_resolve,439bool p_can_resolve,440bool p_is_stencil) const;441bool configureDescriptor(MTL::RenderPassAttachmentDescriptor *p_desc,442PixelFormats &p_pf,443MDSubpass const &p_subpass,444MTL::Texture *p_attachment,445bool p_is_rendering_entire_area,446bool p_has_resolve,447bool p_can_resolve,448bool p_is_stencil) const {449p_desc->setTexture(p_attachment);450451MTL::LoadAction load;452if (!p_is_rendering_entire_area || !isFirstUseOf(p_subpass)) {453load = MTL::LoadActionLoad;454} else {455load = p_is_stencil ? (MTL::LoadAction)stencilLoadAction : (MTL::LoadAction)loadAction;456}457458p_desc->setLoadAction(load);459460MTL::PixelFormat mtlFmt = p_attachment->pixelFormat();461bool isDepthFormat = p_pf.isDepthFormat(mtlFmt);462bool isStencilFormat = p_pf.isStencilFormat(mtlFmt);463if (isStencilFormat && !p_is_stencil && !isDepthFormat) {464p_desc->setStoreAction(MTL::StoreActionDontCare);465} else {466p_desc->setStoreAction(getMTLStoreAction(p_subpass, p_is_rendering_entire_area, p_has_resolve, p_can_resolve, p_is_stencil));467}468469return load == MTL::LoadActionClear;470}471472/** Returns whether this attachment should be cleared in the subpass. */473bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;474};475476class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderPass {477public:478Vector<MDAttachment> attachments;479Vector<MDSubpass> subpasses;480481uint32_t get_sample_count() const {482return attachments.is_empty() ? 1 : attachments[0].samples;483}484485MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);486};487488#pragma mark - Command Buffer Helpers489490_FORCE_INLINE_ static MTL::Size MTLSizeFromVector3i(Vector3i p_size) {491return MTL::Size{ (NS::UInteger)p_size.x, (NS::UInteger)p_size.y, (NS::UInteger)p_size.z };492}493494_FORCE_INLINE_ static MTL::Origin MTLOriginFromVector3i(Vector3i p_origin) {495return MTL::Origin{ (NS::UInteger)p_origin.x, (NS::UInteger)p_origin.y, (NS::UInteger)p_origin.z };496}497498// Clamps the size so that the sum of the origin and size do not exceed the maximum size.499_FORCE_INLINE_ static MTL::Size clampMTLSize(MTL::Size p_size, MTL::Origin p_origin, MTL::Size p_max_size) {500MTL::Size clamped;501clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x);502clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y);503clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z);504return clamped;505}506507API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))508_FORCE_INLINE_ static bool isArrayTexture(MTL::TextureType p_type) {509return (p_type == MTL::TextureType3D ||510p_type == MTL::TextureType2DArray ||511p_type == MTL::TextureType2DMultisampleArray ||512p_type == MTL::TextureType1DArray);513}514515_FORCE_INLINE_ static bool operator==(MTL::Size p_a, MTL::Size p_b) {516return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth;517}518519#pragma mark - Pipeline Stage Conversion520521GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")522523_FORCE_INLINE_ static MTL::Stages convert_src_pipeline_stages_to_metal(BitField<RDD::PipelineStageBits> p_stages) {524p_stages.clear_flag(RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT);525526// BOTTOM_OF_PIPE or ALL_COMMANDS means "all prior work must complete".527if (p_stages & (RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT)) {528return MTL::StageAll;529}530531MTL::Stages mtlStages = 0;532533// Vertex stage mappings.534if (p_stages & (RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT | RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT | RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {535mtlStages |= MTL::StageVertex;536}537538// Fragment stage mappings.539// Includes resolve and clear_storage, which on Metal use the render pipeline.540if (p_stages & (RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT | RDD::PIPELINE_STAGE_RESOLVE_BIT | RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {541mtlStages |= MTL::StageFragment;542}543544// Compute stage.545if (p_stages & RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT) {546mtlStages |= MTL::StageDispatch;547}548549// Blit stage (transfer operations).550if (p_stages & RDD::PIPELINE_STAGE_COPY_BIT) {551mtlStages |= MTL::StageBlit;552}553554// ALL_GRAPHICS_BIT special case.555if (p_stages & RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT) {556mtlStages |= (MTL::StageVertex | MTL::StageFragment);557}558559return mtlStages;560}561562_FORCE_INLINE_ static MTL::Stages convert_dst_pipeline_stages_to_metal(BitField<RDD::PipelineStageBits> p_stages) {563p_stages.clear_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);564565// TOP_OF_PIPE or ALL_COMMANDS means "wait before any work starts".566if (p_stages & (RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT | RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {567return MTL::StageAll;568}569570MTL::Stages mtlStages = 0;571572// Vertex stage mappings.573if (p_stages & (RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT | RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT | RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {574mtlStages |= MTL::StageVertex;575}576577// Fragment stage mappings.578// Includes resolve and clear_storage, which on Metal use the render pipeline.579if (p_stages & (RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT | RDD::PIPELINE_STAGE_RESOLVE_BIT | RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {580mtlStages |= MTL::StageFragment;581}582583// Compute stage.584if (p_stages & RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT) {585mtlStages |= MTL::StageDispatch;586}587588// Blit stage (transfer operations).589if (p_stages & RDD::PIPELINE_STAGE_COPY_BIT) {590mtlStages |= MTL::StageBlit;591}592593// ALL_GRAPHICS_BIT special case.594if (p_stages & RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT) {595mtlStages |= (MTL::StageVertex | MTL::StageFragment);596}597598return mtlStages;599}600601GODOT_CLANG_WARNING_POP602603#pragma mark - Command Buffer Base604605enum class MDCommandBufferStateType {606None,607Render,608Compute,609Blit, // Only used by Metal 3610};611612/// Base struct for render state shared between MTL3 and MTL4 implementations.613struct RenderStateBase {614LocalVector<MTL::Viewport> viewports;615LocalVector<MTL::ScissorRect> scissors;616std::optional<Color> blend_constants;617618// clang-format off619enum DirtyFlag : uint16_t {620DIRTY_NONE = 0,621DIRTY_PIPELINE = 1 << 0,622DIRTY_UNIFORMS = 1 << 1,623DIRTY_PUSH = 1 << 2,624DIRTY_DEPTH = 1 << 3,625DIRTY_VERTEX = 1 << 4,626DIRTY_VIEWPORT = 1 << 5,627DIRTY_SCISSOR = 1 << 6,628DIRTY_BLEND = 1 << 7,629DIRTY_RASTER = 1 << 8,630DIRTY_ALL = (1 << 9) - 1,631};632// clang-format on633BitField<DirtyFlag> dirty = DIRTY_NONE;634};635636/// Abstract base class for Metal command buffers, shared between MTL3 and MTL4 implementations.637class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDCommandBufferBase {638LocalVector<CFTypeRef> _retained_resources;639640protected:641// From RenderingDevice642static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;643644MDCommandBufferStateType type = MDCommandBufferStateType::None;645646uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];647uint32_t push_constant_data_len = 0;648uint32_t push_constant_binding = UINT32_MAX;649650::RenderingDeviceDriverMetal *device_driver = nullptr;651652void release_resources();653654/// Called when push constants are modified to mark the appropriate dirty flags.655virtual void mark_push_constants_dirty() = 0;656657/// Returns a reference to the render state base for viewport/scissor/blend operations.658virtual RenderStateBase &get_render_state_base() = 0;659660/// Returns the view count for the current subpass.661virtual uint32_t get_current_view_count() const = 0;662663/// Accessors for render pass state.664virtual MDRenderPass *get_render_pass() const = 0;665virtual MDFrameBuffer *get_frame_buffer() const = 0;666virtual const MDSubpass &get_current_subpass() const = 0;667virtual LocalVector<RDD::RenderPassClearValue> &get_clear_values() = 0;668virtual const Rect2i &get_render_area() const = 0;669virtual void end_render_encoding() = 0;670671void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);672uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);673void _end_render_pass();674void _render_clear_render_area();675676public:677virtual ~MDCommandBufferBase() { release_resources(); }678679virtual void begin() = 0;680virtual void commit() = 0;681virtual void end() = 0;682683virtual void bind_pipeline(RDD::PipelineID p_pipeline) = 0;684void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);685686void retain_resource(CFTypeRef p_resource);687688#pragma mark - Render Commands689690virtual void render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0;691virtual void render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) = 0;692void render_set_viewport(VectorView<Rect2i> p_viewports);693void render_set_scissor(VectorView<Rect2i> p_scissors);694void render_set_blend_constants(const Color &p_constants);695virtual void render_begin_pass(RDD::RenderPassID p_render_pass,696RDD::FramebufferID p_frameBuffer,697RDD::CommandBufferType p_cmd_buffer_type,698const Rect2i &p_rect,699VectorView<RDD::RenderPassClearValue> p_clear_values) = 0;700virtual void render_next_subpass() = 0;701virtual void render_draw(uint32_t p_vertex_count,702uint32_t p_instance_count,703uint32_t p_base_vertex,704uint32_t p_first_instance) = 0;705virtual void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) = 0;706virtual void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) = 0;707708virtual void render_draw_indexed(uint32_t p_index_count,709uint32_t p_instance_count,710uint32_t p_first_index,711int32_t p_vertex_offset,712uint32_t p_first_instance) = 0;713714virtual void render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0;715virtual void render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0;716virtual void render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0;717virtual void render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0;718719virtual void render_end_pass() = 0;720721#pragma mark - Compute Commands722723virtual void compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0;724virtual void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0;725virtual void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) = 0;726727#pragma mark - Transfer728729virtual void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) = 0;730virtual void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) = 0;731virtual void clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) = 0;732virtual void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) = 0;733virtual void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferCopyRegion> p_regions) = 0;734virtual void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView<RDD::TextureCopyRegion> p_regions) = 0;735virtual void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView<RDD::BufferTextureCopyRegion> p_regions) = 0;736virtual void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferTextureCopyRegion> p_regions) = 0;737738#pragma mark - Synchronization739740virtual void pipeline_barrier(BitField<RDD::PipelineStageBits> p_src_stages,741BitField<RDD::PipelineStageBits> p_dst_stages,742VectorView<RDD::MemoryAccessBarrier> p_memory_barriers,743VectorView<RDD::BufferBarrier> p_buffer_barriers,744VectorView<RDD::TextureBarrier> p_texture_barriers,745VectorView<RDD::AccelerationStructureBarrier> p_acceleration_structure_barriers) = 0;746747#pragma mark - Debugging748749virtual void begin_label(const char *p_label_name, const Color &p_color) = 0;750virtual void end_label() = 0;751};752753#pragma mark - Uniform Types754755struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) UniformInfo {756uint32_t binding;757BitField<RDD::ShaderStage> active_stages;758MTL::DataType dataType = MTL::DataTypeNone;759MTL::BindingAccess access = MTL::BindingAccessReadOnly;760MTL::ResourceUsage usage = 0;761MTL::TextureType textureType = MTL::TextureType2D;762uint32_t imageFormat = 0;763uint32_t arrayLength = 0;764bool isMultisampled = 0;765766struct Indexes {767uint32_t buffer = UINT32_MAX;768uint32_t texture = UINT32_MAX;769uint32_t sampler = UINT32_MAX;770};771Indexes slot;772Indexes arg_buffer;773774enum class IndexType {775SLOT,776ARG,777};778779_FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) {780switch (p_type) {781case IndexType::SLOT:782return slot;783case IndexType::ARG:784return arg_buffer;785}786}787};788789struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) UniformSet {790LocalVector<UniformInfo> uniforms;791LocalVector<uint32_t> dynamic_uniforms;792uint32_t buffer_size = 0;793};794795class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) DynamicOffsetLayout {796struct Data {797uint8_t offset : 4;798uint8_t count : 4;799};800801union {802Data data[MAX_DYNAMIC_BUFFERS];803uint64_t _val = 0;804};805806public:807_FORCE_INLINE_ bool is_empty() const { return _val == 0; }808809_FORCE_INLINE_ uint32_t get_count(uint32_t p_set_index) const {810return data[p_set_index].count;811}812813_FORCE_INLINE_ uint32_t get_offset(uint32_t p_set_index) const {814return data[p_set_index].offset;815}816817_FORCE_INLINE_ void set_offset_count(uint32_t p_set_index, uint8_t p_offset, uint8_t p_count) {818data[p_set_index].offset = p_offset;819data[p_set_index].count = p_count;820}821822_FORCE_INLINE_ uint32_t get_offset_index_shift(uint32_t p_set_index, uint32_t p_dynamic_index = 0) const {823return (data[p_set_index].offset + p_dynamic_index) * 4u;824}825};826827#pragma mark - Shader Types828829class MDLibrary; // Forward declaration for C++ code830struct ShaderCacheEntry; // Forward declaration for C++ code831832enum class ShaderLoadStrategy {833IMMEDIATE,834LAZY,835836/// The default strategy is to load the shader immediately.837DEFAULT = IMMEDIATE,838};839840/// A Metal shader library.841class MDLibrary : public std::enable_shared_from_this<MDLibrary> {842protected:843ShaderCacheEntry *_entry = nullptr;844#ifdef DEV_ENABLED845NS::SharedPtr<NS::String> _original_source = nullptr;846#endif847848MDLibrary(ShaderCacheEntry *p_entry849#ifdef DEV_ENABLED850,851NS::String *p_source852#endif853);854855public:856virtual ~MDLibrary();857858virtual MTL::Library *get_library() = 0;859virtual NS::Error *get_error() = 0;860virtual void set_label(NS::String *p_label);861#ifdef DEV_ENABLED862NS::String *get_original_source() const { return _original_source.get(); }863#endif864865static std::shared_ptr<MDLibrary> create(ShaderCacheEntry *p_entry,866MTL::Device *p_device,867NS::String *p_source,868MTL::CompileOptions *p_options,869ShaderLoadStrategy p_strategy);870871static std::shared_ptr<MDLibrary> create(ShaderCacheEntry *p_entry,872MTL::Device *p_device,873#ifdef DEV_ENABLED874NS::String *p_source,875#endif876dispatch_data_t p_data);877};878879/// A cache entry for a Metal shader library.880struct ShaderCacheEntry {881RenderingDeviceDriverMetal &owner;882/// A hash of the Metal shader source code.883SHA256Digest key;884CharString name;885RD::ShaderStage stage = RD::SHADER_STAGE_VERTEX;886/// Weak reference to the library; allows cache lookup without preventing cleanup.887std::weak_ptr<MDLibrary> library;888889/// Notify the cache that this entry is no longer needed.890void notify_free() const;891892ShaderCacheEntry(RenderingDeviceDriverMetal &p_owner, SHA256Digest p_key) :893owner(p_owner), key(p_key) {894}895~ShaderCacheEntry() = default;896};897898class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDShader {899public:900CharString name;901Vector<UniformSet> sets;902struct {903BitField<RDD::ShaderStage> stages = {};904uint32_t binding = UINT32_MAX;905uint32_t size = 0;906} push_constants;907DynamicOffsetLayout dynamic_offset_layout;908bool uses_argument_buffers = true;909910MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :911name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}912virtual ~MDShader() = default;913};914915class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDComputeShader final : public MDShader {916public:917MTL::Size local = {};918919std::shared_ptr<MDLibrary> kernel;920921MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, std::shared_ptr<MDLibrary> p_kernel);922};923924class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderShader final : public MDShader {925public:926bool needs_view_mask_buffer = false;927928std::shared_ptr<MDLibrary> vert;929std::shared_ptr<MDLibrary> frag;930931MDRenderShader(CharString p_name,932Vector<UniformSet> p_sets,933bool p_needs_view_mask_buffer,934bool p_uses_argument_buffers,935std::shared_ptr<MDLibrary> p_vert, std::shared_ptr<MDLibrary> p_frag);936};937938#pragma mark - Uniform Set939940enum StageResourceUsage : uint32_t {941ResourceUnused = 0,942VertexRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2),943VertexWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2),944FragmentRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2),945FragmentWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_FRAGMENT * 2),946TesselationControlRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),947TesselationControlWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_CONTROL * 2),948TesselationEvaluationRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),949TesselationEvaluationWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_TESSELATION_EVALUATION * 2),950ComputeRead = (MTL::ResourceUsageRead << RDD::SHADER_STAGE_COMPUTE * 2),951ComputeWrite = (MTL::ResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2),952};953954typedef LocalVector<MTL::Resource *> ResourceVector;955typedef HashMap<StageResourceUsage, ResourceVector> ResourceUsageMap;956957_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) {958p_a = StageResourceUsage(uint32_t(p_a) | p_b);959return p_a;960}961962_FORCE_INLINE_ StageResourceUsage stage_resource_usage(RDC::ShaderStage p_stage, MTL::ResourceUsage p_usage) {963return StageResourceUsage(p_usage << (p_stage * 2));964}965966_FORCE_INLINE_ MTL::ResourceUsage resource_usage_for_stage(StageResourceUsage p_usage, RDC::ShaderStage p_stage) {967return MTL::ResourceUsage((p_usage >> (p_stage * 2)) & 0b11);968}969970class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDUniformSet {971public:972NS::SharedPtr<MTL::Buffer> arg_buffer;973Vector<uint8_t> arg_buffer_data; // Stored for dynamic uniform sets.974ResourceUsageMap usage_to_resources; // Used by Metal 3 for resource tracking.975Vector<RDD::BoundUniform> uniforms;976};977978#pragma mark - Pipeline Types979980enum class MDPipelineType {981None,982Render,983Compute,984};985986class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDPipeline {987public:988MDPipelineType type;989990explicit MDPipeline(MDPipelineType p_type) :991type(p_type) {}992virtual ~MDPipeline() = default;993};994995class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDRenderPipeline final : public MDPipeline {996public:997NS::SharedPtr<MTL::RenderPipelineState> state;998NS::SharedPtr<MTL::DepthStencilState> depth_stencil;999uint32_t push_constant_size = 0;1000uint32_t push_constant_stages_mask = 0;1001SampleCount sample_count = SampleCount1;10021003struct {1004MTL::CullMode cull_mode = MTL::CullModeNone;1005MTL::TriangleFillMode fill_mode = MTL::TriangleFillModeFill;1006MTL::DepthClipMode clip_mode = MTL::DepthClipModeClip;1007MTL::Winding winding = MTL::WindingClockwise;1008MTL::PrimitiveType render_primitive = MTL::PrimitiveTypePoint;10091010struct {1011bool enabled = false;1012} depth_test;10131014struct {1015bool enabled = false;1016float depth_bias = 0.0;1017float slope_scale = 0.0;1018float clamp = 0.0;10191020template <typename T>1021_FORCE_INLINE_ void apply(T *p_enc) const {1022if (!enabled) {1023return;1024}1025p_enc->setDepthBias(depth_bias, slope_scale, clamp);1026}1027} depth_bias;10281029struct {1030bool enabled = false;1031uint32_t front_reference = 0;1032uint32_t back_reference = 0;10331034template <typename T>1035_FORCE_INLINE_ void apply(T *p_enc) const {1036if (!enabled) {1037return;1038}1039p_enc->setStencilReferenceValues(front_reference, back_reference);1040}1041} stencil;10421043struct {1044bool enabled = false;1045float r = 0.0;1046float g = 0.0;1047float b = 0.0;1048float a = 0.0;10491050template <typename T>1051_FORCE_INLINE_ void apply(T *p_enc) const {1052p_enc->setBlendColor(r, g, b, a);1053}1054} blend;10551056template <typename T>1057_FORCE_INLINE_ void apply(T *p_enc) const {1058p_enc->setCullMode(cull_mode);1059p_enc->setTriangleFillMode(fill_mode);1060p_enc->setDepthClipMode(clip_mode);1061p_enc->setFrontFacingWinding(winding);1062depth_bias.apply(p_enc);1063stencil.apply(p_enc);1064blend.apply(p_enc);1065}10661067} raster_state;10681069MDRenderShader *shader = nullptr;10701071MDRenderPipeline() :1072MDPipeline(MDPipelineType::Render) {}1073~MDRenderPipeline() final = default;1074};10751076class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(2.0)) MDComputePipeline final : public MDPipeline {1077public:1078NS::SharedPtr<MTL::ComputePipelineState> state;1079struct {1080MTL::Size local = {};1081} compute_state;10821083MDComputeShader *shader = nullptr;10841085explicit MDComputePipeline(NS::SharedPtr<MTL::ComputePipelineState> p_state) :1086MDPipeline(MDPipelineType::Compute), state(std::move(p_state)) {}1087~MDComputePipeline() final = default;1088};108910901091