Path: blob/master/drivers/metal/rendering_device_driver_metal.cpp
20919 views
/**************************************************************************/1/* rendering_device_driver_metal.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930/**************************************************************************/31/* */32/* Portions of this code were derived from MoltenVK. */33/* */34/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */35/* (http://www.brenwill.com) */36/* */37/* Licensed under the Apache License, Version 2.0 (the "License"); */38/* you may not use this file except in compliance with the License. */39/* You may obtain a copy of the License at */40/* */41/* http://www.apache.org/licenses/LICENSE-2.0 */42/* */43/* Unless required by applicable law or agreed to in writing, software */44/* distributed under the License is distributed on an "AS IS" BASIS, */45/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */46/* implied. See the License for the specific language governing */47/* permissions and limitations under the License. */48/**************************************************************************/4950#include "rendering_device_driver_metal.h"5152#include "pixel_formats.h"53#include "rendering_context_driver_metal.h"54#include "rendering_shader_container_metal.h"5556#include "core/config/project_settings.h"57#include "core/io/marshalls.h"58#include "core/string/ustring.h"59#include "core/templates/hash_map.h"60#include "drivers/apple/foundation_helpers.h"6162#include <os/log.h>63#include <os/signpost.h>64#include <Metal/Metal.hpp>65#include <algorithm>6667#ifndef MTLGPUAddress68typedef uint64_t MTLGPUAddress;69#endif7071#pragma mark - Logging7273extern os_log_t LOG_DRIVER;74// Used for dynamic tracing.75extern os_log_t LOG_INTERVALS;7677/*****************/78/**** GENERIC ****/79/*****************/8081// RDD::CompareOperator == VkCompareOp.82static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTL::CompareFunctionNever));83static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTL::CompareFunctionLess));84static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTL::CompareFunctionEqual));85static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTL::CompareFunctionLessEqual));86static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTL::CompareFunctionGreater));87static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTL::CompareFunctionNotEqual));88static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTL::CompareFunctionGreaterEqual));89static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTL::CompareFunctionAlways));9091/*****************/92/**** BUFFERS ****/93/*****************/9495RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {96const uint64_t original_size = p_size;97if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {98p_size = round_up_to_alignment(p_size, 16u) * _frame_count;99}100101MTL::ResourceOptions options = 0;102switch (p_allocation_type) {103case MEMORY_ALLOCATION_TYPE_CPU:104options = base_hazard_tracking | MTL::ResourceStorageModeShared;105break;106case MEMORY_ALLOCATION_TYPE_GPU:107if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {108options = MTL::ResourceHazardTrackingModeUntracked | MTL::ResourceStorageModeShared | MTL::ResourceCPUCacheModeWriteCombined;109} else {110options = base_hazard_tracking | MTL::ResourceStorageModePrivate;111}112break;113}114115MTL::Buffer *obj = device->newBuffer(p_size, options);116ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size));117118BufferInfo *buf_info;119if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {120MetalBufferDynamicInfo *dyn_buffer = memnew(MetalBufferDynamicInfo);121buf_info = dyn_buffer;122#ifdef DEBUG_ENABLED123dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;124#endif125dyn_buffer->set_frame_index(0u);126dyn_buffer->size_bytes = round_up_to_alignment(original_size, 16u);127} else {128buf_info = memnew(BufferInfo);129}130buf_info->metal_buffer = NS::TransferPtr(obj);131132_track_resource(buf_info->metal_buffer.get());133134return BufferID(buf_info);135}136137bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {138// Nothing to do.139return true;140}141142void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) {143BufferInfo *buf_info = (BufferInfo *)p_buffer.id;144145_untrack_resource(buf_info->metal_buffer.get());146147if (buf_info->is_dynamic()) {148memdelete((MetalBufferDynamicInfo *)buf_info);149} else {150memdelete(buf_info);151}152}153154uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) {155const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;156return buf_info->metal_buffer.get()->allocatedSize();157}158159uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) {160const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;161ERR_FAIL_COND_V_MSG(buf_info->metal_buffer.get()->storageMode() != MTL::StorageModeShared, nullptr, "Unable to map private buffers");162return (uint8_t *)buf_info->metal_buffer.get()->contents();163}164165void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) {166// Nothing to do.167}168169uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {170MetalBufferDynamicInfo *buf_info = (MetalBufferDynamicInfo *)p_buffer.id;171ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");172#ifdef DEBUG_ENABLED173ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");174buf_info->last_frame_mapped = p_frames_drawn;175#endif176return (uint8_t *)buf_info->metal_buffer.get()->contents() + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes;177}178179uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span<BufferID> p_buffers) {180uint64_t mask = 0u;181uint64_t shift = 0u;182183for (const BufferID &buf : p_buffers) {184const BufferInfo *buf_info = (const BufferInfo *)buf.id;185if (!buf_info->is_dynamic()) {186continue;187}188mask |= buf_info->frame_index() << shift;189// We can encode the frame index in 2 bits since frame_count won't be > 4.190shift += 2UL;191}192193return mask;194}195196uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer) {197if (__builtin_available(iOS 16.0, macOS 13.0, *)) {198const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;199return buf_info->metal_buffer.get()->gpuAddress();200} else {201#if DEV_ENABLED202WARN_PRINT_ONCE("buffer_get_device_address is not supported on this OS version.");203#endif204return 0;205}206}207208#pragma mark - Texture209210#pragma mark - Format Conversions211212static const MTL::TextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = {213MTL::TextureType1D,214MTL::TextureType2D,215MTL::TextureType3D,216MTL::TextureTypeCube,217MTL::TextureType1DArray,218MTL::TextureType2DArray,219MTL::TextureTypeCubeArray,220};221222bool RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) const {223MTLFormatType ft = pixel_formats->getFormatType(p_format.format);224225return p_format.texture_type == TEXTURE_TYPE_2D // Linear textures must be 2D textures.226&& ft != MTLFormatType::DepthStencil && ft != MTLFormatType::Compressed // Linear textures must not be depth/stencil or compressed formats.)227&& p_format.mipmaps == 1 // Linear textures must have 1 mipmap level.228&& p_format.array_layers == 1 // Linear textures must have 1 array layer.229&& p_format.samples == TEXTURE_SAMPLES_1; // Linear textures must have 1 sample.230}231232RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p_format, const TextureView &p_view) {233NS::SharedPtr<MTL::TextureDescriptor> desc = NS::TransferPtr(MTL::TextureDescriptor::alloc()->init());234desc->setTextureType(TEXTURE_TYPE[p_format.texture_type]);235236PixelFormats &formats = *pixel_formats;237desc->setPixelFormat((MTL::PixelFormat)formats.getMTLPixelFormat(p_format.format));238MTLFmtCaps format_caps = formats.getCapabilities(desc->pixelFormat());239240desc->setWidth(p_format.width);241desc->setHeight(p_format.height);242desc->setDepth(p_format.depth);243desc->setMipmapLevelCount(p_format.mipmaps);244245if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY ||246p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) {247desc->setArrayLength(p_format.array_layers);248} else if (p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {249desc->setArrayLength(p_format.array_layers / 6);250}251252// TODO(sgc): Evaluate lossy texture support (perhaps as a project option?)253// https://developer.apple.com/videos/play/tech-talks/10876?time=459254// desc->setCompressionType(MTL::TextureCompressionTypeLossy);255256if (p_format.samples > TEXTURE_SAMPLES_1) {257SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples);258259if (supported > SampleCount1) {260bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY;261if (ok) {262switch (p_format.texture_type) {263case TEXTURE_TYPE_2D:264desc->setTextureType(MTL::TextureType2DMultisample);265break;266case TEXTURE_TYPE_2D_ARRAY:267desc->setTextureType(MTL::TextureType2DMultisampleArray);268break;269default:270break;271}272desc->setSampleCount((NS::UInteger)supported);273if (p_format.mipmaps > 1) {274// For a buffer-backed or multi-sample texture, the value must be 1.275WARN_PRINT("mipmaps == 1 for multi-sample textures");276desc->setMipmapLevelCount(1);277}278} else {279WARN_PRINT("Unsupported multi-sample texture type; disabling multi-sample");280}281}282}283284static const MTL::TextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = {285static_cast<MTL::TextureSwizzle>(255), // IDENTITY286MTL::TextureSwizzleZero,287MTL::TextureSwizzleOne,288MTL::TextureSwizzleRed,289MTL::TextureSwizzleGreen,290MTL::TextureSwizzleBlue,291MTL::TextureSwizzleAlpha,292};293294MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(295p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTL::TextureSwizzleRed,296p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTL::TextureSwizzleGreen,297p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTL::TextureSwizzleBlue,298p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTL::TextureSwizzleAlpha);299300// Represents a swizzle operation that is a no-op.301static MTL::TextureSwizzleChannels IDENTITY_SWIZZLE = MTL::TextureSwizzleChannels::Default();302303bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTL::TextureSwizzleChannels)) == 0;304if (!no_swizzle) {305desc->setSwizzle(swizzle);306}307308// Usage.309310MTL::ResourceOptions options = 0;311bool is_linear = false;312#if defined(VISIONOS_ENABLED)313const bool supports_memoryless = true;314#else315GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")316const bool supports_memoryless = (*device_properties).features.highestFamily >= MTL::GPUFamilyApple2 && (*device_properties).features.highestFamily < MTL::GPUFamilyMac1;317GODOT_CLANG_WARNING_POP318#endif319if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {320options = base_hazard_tracking | MTL::ResourceStorageModeMemoryless;321desc->setStorageMode(MTL::StorageModeMemoryless);322} else {323options = base_hazard_tracking | MTL::ResourceCPUCacheModeDefaultCache;324if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {325options |= MTL::ResourceStorageModeShared;326// The user has indicated they want to read from the texture on the CPU,327// so we'll see if we can use a linear format.328// A linear format is a texture that is backed by a buffer,329// which allows for CPU access to the texture data via a pointer.330is_linear = is_valid_linear(p_format);331} else {332options |= MTL::ResourceStorageModePrivate;333}334}335desc->setResourceOptions(options);336337MTL::TextureUsage usage = desc->usage();338if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {339usage |= MTL::TextureUsageShaderRead;340}341342if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {343usage |= MTL::TextureUsageShaderWrite;344}345346bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));347348if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&349can_be_attachment) {350usage |= MTL::TextureUsageRenderTarget;351}352353if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {354usage |= MTL::TextureUsageShaderRead;355}356357if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {358ERR_FAIL_COND_V_MSG((format_caps & kMTLFmtCapsAtomic) == 0, RDD::TextureID(), "Atomic operations on this texture format are not supported.");359ERR_FAIL_COND_V_MSG(!device_properties->features.supports_native_image_atomics, RDD::TextureID(), "Atomic operations on textures are not supported on this OS version. Check SUPPORTS_IMAGE_ATOMIC_32_BIT.");360// If supports_native_image_atomics is true, this condition should always succeed, as it is set the same.361if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {362usage |= MTL::TextureUsageShaderAtomic;363}364}365366if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {367ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");368}369370if (flags::any(p_format.usage_bits, TEXTURE_USAGE_CAN_UPDATE_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT) &&371can_be_attachment && no_swizzle) {372// Per MoltenVK, can be cleared as a render attachment.373usage |= MTL::TextureUsageRenderTarget;374}375if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {376// Covered by blits.377}378379// Create texture views with a different component layout.380if (!p_format.shareable_formats.is_empty()) {381usage |= MTL::TextureUsagePixelFormatView;382}383384desc->setUsage(usage);385386// Allocate memory.387388MTL::Texture *obj = nullptr;389if (is_linear) {390// Linear textures are restricted to 2D textures, a single mipmap level and a single array layer.391MTL::PixelFormat pixel_format = desc->pixelFormat();392size_t row_alignment = get_texel_buffer_alignment_for_format(p_format.format);393size_t bytes_per_row = formats.getBytesPerRow(pixel_format, p_format.width);394bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);395size_t bytes_per_layer = formats.getBytesPerLayer(pixel_format, bytes_per_row, p_format.height);396size_t byte_count = bytes_per_layer * p_format.depth * p_format.array_layers;397398MTL::Buffer *buf = device->newBuffer(byte_count, options);399obj = buf->newTexture(desc.get(), 0, bytes_per_row);400buf->release();401402_track_resource(buf);403} else {404obj = device->newTexture(desc.get());405}406ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create texture.");407408_track_resource(obj);409410return TextureID(reinterpret_cast<uint64_t>(obj));411}412413RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) {414MTL::Texture *res = reinterpret_cast<MTL::Texture *>(p_native_texture);415416// If the requested format is different, we need to create a view.417MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_format);418if (res->pixelFormat() != format) {419MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Default();420res = res->newTextureView(format, res->textureType(), NS::Range::Make(0, res->mipmapLevelCount()), NS::Range::Make(0, p_array_layers), swizzle);421ERR_FAIL_NULL_V_MSG(res, TextureID(), "Unable to create texture view.");422}423424_track_resource(res);425426return TextureID(reinterpret_cast<uint64_t>(res));427}428429RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {430MTL::Texture *src_texture = reinterpret_cast<MTL::Texture *>(p_original_texture.id);431432NS::UInteger slices = src_texture->arrayLength();433if (src_texture->textureType() == MTL::TextureTypeCube) {434// Metal expects Cube textures to have a slice count of 6.435slices = 6;436} else if (src_texture->textureType() == MTL::TextureTypeCubeArray) {437// Metal expects Cube Array textures to have 6 slices per layer.438slices *= 6;439}440441#if DEV_ENABLED442if (src_texture->sampleCount() > 1) {443// TODO(sgc): is it ok to create a shared texture from a multi-sample texture?444WARN_PRINT("Is it safe to create a shared texture from multi-sample texture?");445}446#endif447448MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format);449450static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {451static_cast<MTL::TextureSwizzle>(255), // IDENTITY452MTL::TextureSwizzleZero,453MTL::TextureSwizzleOne,454MTL::TextureSwizzleRed,455MTL::TextureSwizzleGreen,456MTL::TextureSwizzleBlue,457MTL::TextureSwizzleAlpha,458};459460#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN)461MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha));462#undef SWIZZLE463MTL::Texture *obj = src_texture->newTextureView(format, src_texture->textureType(), NS::Range::Make(0, src_texture->mipmapLevelCount()), NS::Range::Make(0, slices), swizzle);464ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");465_track_resource(obj);466return TextureID(reinterpret_cast<uint64_t>(obj));467}468469RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {470MTL::Texture *src_texture = reinterpret_cast<MTL::Texture *>(p_original_texture.id);471472static const MTL::TextureType VIEW_TYPES[] = {473MTL::TextureType1D, // MTLTextureType1D474MTL::TextureType1D, // MTLTextureType1DArray475MTL::TextureType2D, // MTLTextureType2D476MTL::TextureType2D, // MTLTextureType2DArray477MTL::TextureType2D, // MTLTextureType2DMultisample478MTL::TextureType2D, // MTLTextureTypeCube479MTL::TextureType2D, // MTLTextureTypeCubeArray480MTL::TextureType2D, // MTLTextureType3D481MTL::TextureType2D, // MTLTextureType2DMultisampleArray482};483484MTL::TextureType textureType = VIEW_TYPES[src_texture->textureType()];485switch (p_slice_type) {486case TEXTURE_SLICE_2D: {487textureType = MTL::TextureType2D;488} break;489case TEXTURE_SLICE_3D: {490textureType = MTL::TextureType3D;491} break;492case TEXTURE_SLICE_CUBEMAP: {493textureType = MTL::TextureTypeCube;494} break;495case TEXTURE_SLICE_2D_ARRAY: {496textureType = MTL::TextureType2DArray;497} break;498case TEXTURE_SLICE_MAX: {499ERR_FAIL_V_MSG(TextureID(), "Invalid texture slice type");500} break;501}502503MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format);504505static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {506static_cast<MTL::TextureSwizzle>(255), // IDENTITY507MTL::TextureSwizzleZero,508MTL::TextureSwizzleOne,509MTL::TextureSwizzleRed,510MTL::TextureSwizzleGreen,511MTL::TextureSwizzleBlue,512MTL::TextureSwizzleAlpha,513};514515#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN)516MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha));517#undef SWIZZLE518MTL::Texture *obj = src_texture->newTextureView(format, textureType, NS::Range::Make(p_mipmap, p_mipmaps), NS::Range::Make(p_layer, p_layers), swizzle);519ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");520_track_resource(obj);521return TextureID(reinterpret_cast<uint64_t>(obj));522}523524void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) {525MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);526_untrack_resource(obj);527obj->release();528}529530uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) {531MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);532return obj->allocatedSize();533}534535void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {536MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);537538PixelFormats &pf = *pixel_formats;539DataFormat format = pf.getDataFormat(obj->pixelFormat());540541uint32_t w = MAX(1u, obj->width() >> p_subresource.mipmap);542uint32_t h = MAX(1u, obj->height() >> p_subresource.mipmap);543uint32_t d = MAX(1u, obj->depth() >> p_subresource.mipmap);544545uint32_t bw = 0, bh = 0;546get_compressed_image_format_block_dimensions(format, bw, bh);547548uint32_t sbw = 0, sbh = 0;549*r_layout = {};550r_layout->size = get_image_format_required_size(format, w, h, d, 1, &sbw, &sbh);551r_layout->row_pitch = r_layout->size / ((sbh / bh) * d);552}553554Vector<uint8_t> RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture, uint32_t p_layer) {555MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);556ERR_FAIL_COND_V_MSG(obj->storageMode() != MTL::StorageModeShared, Vector<uint8_t>(), "Texture must be created with TEXTURE_USAGE_CPU_READ_BIT set.");557558MTL::Buffer *buf = obj->buffer();559if (buf) {560ERR_FAIL_COND_V_MSG(p_layer > 0, Vector<uint8_t>(), "A linear texture has a single layer.");561ERR_FAIL_COND_V_MSG(obj->mipmapLevelCount() > 1, Vector<uint8_t>(), "A linear texture has a single mipmap level.");562Vector<uint8_t> image_data;563image_data.resize_uninitialized(buf->length());564memcpy(image_data.ptrw(), buf->contents(), buf->length());565return image_data;566}567568DataFormat tex_format = pixel_formats->getDataFormat(obj->pixelFormat());569uint32_t tex_w = obj->width();570uint32_t tex_h = obj->height();571uint32_t tex_d = obj->depth();572uint32_t tex_mipmaps = obj->mipmapLevelCount();573574// Must iteratively copy the texture data to a buffer.575576uint32_t tight_mip_size = get_image_format_required_size(tex_format, tex_w, tex_h, tex_d, tex_mipmaps);577578Vector<uint8_t> image_data;579image_data.resize(tight_mip_size);580581uint32_t pixel_size = get_image_format_pixel_size(tex_format);582uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(tex_format);583uint32_t blockw = 0, blockh = 0;584get_compressed_image_format_block_dimensions(tex_format, blockw, blockh);585586uint8_t *dest_ptr = image_data.ptrw();587588for (uint32_t mm_i = 0; mm_i < tex_mipmaps; mm_i++) {589uint32_t bw = STEPIFY(tex_w, blockw);590uint32_t bh = STEPIFY(tex_h, blockh);591592uint32_t bytes_per_row = (bw * pixel_size) >> pixel_rshift;593uint32_t bytes_per_img = bytes_per_row * bh;594uint32_t mip_size = bytes_per_img * tex_d;595596obj->getBytes(dest_ptr, bytes_per_row, bytes_per_img, MTL::Region(0, 0, 0, bw, bh, tex_d), mm_i, p_layer);597598dest_ptr += mip_size;599600// Next mipmap level.601tex_w = MAX(blockw, tex_w >> 1);602tex_h = MAX(blockh, tex_h >> 1);603tex_d = MAX(1u, tex_d >> 1);604}605606// Ensure that the destination pointer is at the end of the image data.607DEV_ASSERT(dest_ptr - image_data.ptr() == image_data.size());608609return image_data;610}611612BitField<RDD::TextureUsageBits> RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {613PixelFormats &pf = *pixel_formats;614if (pf.getMTLPixelFormat(p_format) == MTL::PixelFormatInvalid) {615return 0;616}617618MTLFmtCaps caps = pf.getCapabilities(p_format);619620// Everything supported by default makes an all-or-nothing check easier for the caller.621BitField<RDD::TextureUsageBits> supported = INT64_MAX;622supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); // No VRS support for Metal.623624if (!flags::any(caps, kMTLFmtCapsColorAtt)) {625supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);626}627if (!flags::any(caps, kMTLFmtCapsDSAtt)) {628supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);629}630if (!flags::any(caps, kMTLFmtCapsRead)) {631supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT);632}633if (!flags::any(caps, kMTLFmtCapsAtomic)) {634supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT);635}636637return supported;638}639640bool RenderingDeviceDriverMetal::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {641r_raw_reinterpretation = false;642return true;643}644645#pragma mark - Sampler646647static const MTL::CompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = {648MTL::CompareFunctionNever,649MTL::CompareFunctionLess,650MTL::CompareFunctionEqual,651MTL::CompareFunctionLessEqual,652MTL::CompareFunctionGreater,653MTL::CompareFunctionNotEqual,654MTL::CompareFunctionGreaterEqual,655MTL::CompareFunctionAlways,656};657658static const MTL::StencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = {659MTL::StencilOperationKeep,660MTL::StencilOperationZero,661MTL::StencilOperationReplace,662MTL::StencilOperationIncrementClamp,663MTL::StencilOperationDecrementClamp,664MTL::StencilOperationInvert,665MTL::StencilOperationIncrementWrap,666MTL::StencilOperationDecrementWrap,667};668669static const MTL::BlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = {670MTL::BlendFactorZero,671MTL::BlendFactorOne,672MTL::BlendFactorSourceColor,673MTL::BlendFactorOneMinusSourceColor,674MTL::BlendFactorDestinationColor,675MTL::BlendFactorOneMinusDestinationColor,676MTL::BlendFactorSourceAlpha,677MTL::BlendFactorOneMinusSourceAlpha,678MTL::BlendFactorDestinationAlpha,679MTL::BlendFactorOneMinusDestinationAlpha,680MTL::BlendFactorBlendColor,681MTL::BlendFactorOneMinusBlendColor,682MTL::BlendFactorBlendAlpha,683MTL::BlendFactorOneMinusBlendAlpha,684MTL::BlendFactorSourceAlphaSaturated,685MTL::BlendFactorSource1Color,686MTL::BlendFactorOneMinusSource1Color,687MTL::BlendFactorSource1Alpha,688MTL::BlendFactorOneMinusSource1Alpha,689};690static const MTL::BlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = {691MTL::BlendOperationAdd,692MTL::BlendOperationSubtract,693MTL::BlendOperationReverseSubtract,694MTL::BlendOperationMin,695MTL::BlendOperationMax,696};697698static const MTL::SamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = {699MTL::SamplerAddressModeRepeat,700MTL::SamplerAddressModeMirrorRepeat,701MTL::SamplerAddressModeClampToEdge,702MTL::SamplerAddressModeClampToBorderColor,703MTL::SamplerAddressModeMirrorClampToEdge,704};705706static const MTL::SamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = {707MTL::SamplerBorderColorTransparentBlack,708MTL::SamplerBorderColorTransparentBlack,709MTL::SamplerBorderColorOpaqueBlack,710MTL::SamplerBorderColorOpaqueBlack,711MTL::SamplerBorderColorOpaqueWhite,712MTL::SamplerBorderColorOpaqueWhite,713};714715RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_state) {716NS::SharedPtr<MTL::SamplerDescriptor> desc = NS::TransferPtr(MTL::SamplerDescriptor::alloc()->init());717desc->setSupportArgumentBuffers(true);718719desc->setMagFilter(p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest);720desc->setMinFilter(p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest);721desc->setMipFilter(p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMipFilterLinear : MTL::SamplerMipFilterNearest);722723desc->setSAddressMode(ADDRESS_MODES[p_state.repeat_u]);724desc->setTAddressMode(ADDRESS_MODES[p_state.repeat_v]);725desc->setRAddressMode(ADDRESS_MODES[p_state.repeat_w]);726727if (p_state.use_anisotropy) {728desc->setMaxAnisotropy(p_state.anisotropy_max);729}730731desc->setCompareFunction(COMPARE_OPERATORS[p_state.compare_op]);732733desc->setLodMinClamp(p_state.min_lod);734desc->setLodMaxClamp(p_state.max_lod);735736desc->setBorderColor(SAMPLER_BORDER_COLORS[p_state.border_color]);737738desc->setNormalizedCoordinates(!p_state.unnormalized_uvw);739740#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 260000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 260000 || __TV_OS_VERSION_MAX_ALLOWED >= 260000 || __VISION_OS_VERSION_MAX_ALLOWED >= 260000741if (p_state.lod_bias != 0.0) {742if (__builtin_available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) {743desc->setLodBias(p_state.lod_bias);744}745}746#endif747748MTL::SamplerState *obj = device->newSamplerState(desc.get());749ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerState failed");750return SamplerID(reinterpret_cast<uint64_t>(obj));751}752753void RenderingDeviceDriverMetal::sampler_free(SamplerID p_sampler) {754MTL::SamplerState *obj = reinterpret_cast<MTL::SamplerState *>(p_sampler.id);755obj->release();756}757758bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {759switch (p_filter) {760case SAMPLER_FILTER_NEAREST:761return true;762case SAMPLER_FILTER_LINEAR: {763MTLFmtCaps caps = pixel_formats->getCapabilities(p_format);764return flags::any(caps, kMTLFmtCapsFilter);765}766}767}768769#pragma mark - Vertex Array770771RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) {772MTL::VertexDescriptor *desc = MTL::VertexDescriptor::vertexDescriptor();773774for (const VertexAttributeBindingsMap::KV &kv : p_vertex_bindings) {775uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(kv.key);776MTL::VertexBufferLayoutDescriptor *ld = desc->layouts()->object(idx);777if (kv.value.stride != 0) {778ld->setStepFunction(kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTL::VertexStepFunctionPerVertex : MTL::VertexStepFunctionPerInstance);779ld->setStepRate(1);780ld->setStride(kv.value.stride);781} else {782ld->setStepFunction(MTL::VertexStepFunctionConstant);783ld->setStepRate(0);784ld->setStride(0);785}786DEV_ASSERT(ld->stride() == desc->layouts()->object(idx)->stride());787}788789for (const VertexAttribute &vf : p_vertex_attribs) {790MTL::VertexAttributeDescriptor *attr = desc->attributes()->object(vf.location);791attr->setFormat((MTL::VertexFormat)pixel_formats->getMTLVertexFormat(vf.format));792attr->setOffset(vf.offset);793uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(vf.binding);794attr->setBufferIndex(idx);795if (vf.stride == 0) {796// Constant attribute, so we must determine the stride to satisfy Metal API.797uint32_t stride = desc->layouts()->object(idx)->stride();798desc->layouts()->object(idx)->setStride(std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format)));799}800}801802desc->retain();803return VertexFormatID(reinterpret_cast<uint64_t>(desc));804}805806void RenderingDeviceDriverMetal::vertex_format_free(VertexFormatID p_vertex_format) {807MTL::VertexDescriptor *obj = reinterpret_cast<MTL::VertexDescriptor *>(p_vertex_format.id);808obj->release();809}810811#pragma mark - Barriers812813void RenderingDeviceDriverMetal::command_pipeline_barrier(814CommandBufferID p_cmd_buffer,815BitField<PipelineStageBits> p_src_stages,816BitField<PipelineStageBits> p_dst_stages,817VectorView<MemoryAccessBarrier> p_memory_barriers,818VectorView<BufferBarrier> p_buffer_barriers,819VectorView<TextureBarrier> p_texture_barriers,820VectorView<AccelerationStructureBarrier> p_acceleration_structure_barriers) {821MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);822obj->pipeline_barrier(p_src_stages, p_dst_stages, p_memory_barriers, p_buffer_barriers, p_texture_barriers, p_acceleration_structure_barriers);823}824825#pragma mark - Queues826827RDD::CommandQueueFamilyID RenderingDeviceDriverMetal::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) {828if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) {829return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT);830} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) {831return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_COMPUTE_BIT);832} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) {833return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_TRANSFER_BIT);834} else {835return CommandQueueFamilyID();836}837}838839#pragma mark - Command Buffers840841bool RenderingDeviceDriverMetal::command_buffer_begin(CommandBufferID p_cmd_buffer) {842MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);843obj->begin();844return true;845}846847bool RenderingDeviceDriverMetal::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) {848ERR_FAIL_V_MSG(false, "not implemented");849}850851void RenderingDeviceDriverMetal::command_buffer_end(CommandBufferID p_cmd_buffer) {852MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);853obj->end();854}855856void RenderingDeviceDriverMetal::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) {857ERR_FAIL_MSG("not implemented");858}859860#pragma mark - Swap Chain861862void RenderingDeviceDriverMetal::_swap_chain_release(SwapChain *p_swap_chain) {863_swap_chain_release_buffers(p_swap_chain);864}865866void RenderingDeviceDriverMetal::_swap_chain_release_buffers(SwapChain *p_swap_chain) {867}868869RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) {870RenderingContextDriverMetal::Surface const *surface = (RenderingContextDriverMetal::Surface *)(p_surface);871if (use_barriers) {872GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")873add_residency_set_to_main_queue(surface->get_residency_set());874GODOT_CLANG_WARNING_POP875}876877// Create the render pass that will be used to draw to the swap chain's framebuffers.878RDD::Attachment attachment;879attachment.format = pixel_formats->getDataFormat(surface->get_pixel_format());880attachment.samples = RDD::TEXTURE_SAMPLES_1;881attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR;882attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE;883884RDD::Subpass subpass;885RDD::AttachmentReference color_ref;886color_ref.attachment = 0;887color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT);888subpass.color_references.push_back(color_ref);889890RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, RDD::AttachmentReference());891ERR_FAIL_COND_V(!render_pass, SwapChainID());892893// Create the empty swap chain until it is resized.894SwapChain *swap_chain = memnew(SwapChain);895swap_chain->surface = p_surface;896swap_chain->data_format = attachment.format;897swap_chain->render_pass = render_pass;898return SwapChainID(swap_chain);899}900901Error RenderingDeviceDriverMetal::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) {902DEV_ASSERT(p_cmd_queue.id != 0);903DEV_ASSERT(p_swap_chain.id != 0);904905SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);906RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);907surface->resize(p_desired_framebuffer_count);908909// Once everything's been created correctly, indicate the surface no longer needs to be resized.910context_driver->surface_set_needs_resize(swap_chain->surface, false);911912return OK;913}914915RDD::FramebufferID RenderingDeviceDriverMetal::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) {916DEV_ASSERT(p_cmd_queue.id != 0);917DEV_ASSERT(p_swap_chain.id != 0);918919SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);920if (context_driver->surface_get_needs_resize(swap_chain->surface)) {921r_resize_required = true;922return FramebufferID();923}924925RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);926return metal_surface->acquire_next_frame_buffer();927}928929RDD::RenderPassID RenderingDeviceDriverMetal::swap_chain_get_render_pass(SwapChainID p_swap_chain) {930const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);931return swap_chain->render_pass;932}933934RDD::DataFormat RenderingDeviceDriverMetal::swap_chain_get_format(SwapChainID p_swap_chain) {935const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);936return swap_chain->data_format;937}938939void RenderingDeviceDriverMetal::swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) {940SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);941RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);942metal_surface->set_max_fps(p_max_fps);943}944945void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) {946SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);947if (use_barriers) {948GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")949RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);950remove_residency_set_to_main_queue(surface->get_residency_set());951GODOT_CLANG_WARNING_POP952}953_swap_chain_release(swap_chain);954render_pass_free(swap_chain->render_pass);955memdelete(swap_chain);956}957958#pragma mark - Frame buffer959960RDD::FramebufferID RenderingDeviceDriverMetal::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) {961MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);962963Vector<MTL::Texture *> textures;964textures.resize(p_attachments.size());965966for (uint32_t i = 0; i < p_attachments.size(); i += 1) {967MDAttachment const &a = pass->attachments[i];968MTL::Texture *tex = reinterpret_cast<MTL::Texture *>(p_attachments[i].id);969if (tex == nullptr) {970#if DEV_ENABLED971WARN_PRINT("Invalid texture for attachment " + itos(i));972#endif973}974if (a.samples > 1) {975if (tex->sampleCount() != a.samples) {976#if DEV_ENABLED977WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex->sampleCount()));978#endif979}980}981textures.write[i] = tex;982}983984MDFrameBuffer *fb = memnew(MDFrameBuffer(textures, Size2i(p_width, p_height)));985return FramebufferID(fb);986}987988void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {989MDFrameBuffer *obj = (MDFrameBuffer *)(p_framebuffer.id);990memdelete(obj);991}992993#pragma mark - Shader994995void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) {996if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) {997ShaderCacheEntry *entry = *pentry;998_shader_cache.erase(key);999entry->library.reset();1000memdelete(entry);1001}1002}10031004template <typename T, typename U>1005struct is_layout_compatible1006: std::bool_constant<1007sizeof(T) == sizeof(U) &&1008alignof(T) == alignof(U) &&1009std::is_trivially_copyable_v<T> &&1010std::is_trivially_copyable_v<U>> {};1011static_assert(is_layout_compatible<UniformInfo::Indexes, RenderingShaderContainerMetal::UniformData::Indexes>::value, "UniformInfo::Indexes layout does not match RenderingShaderContainerMetal::UniformData::Indexes layout");10121013API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))1014static void update_uniform_info(const RenderingShaderContainerMetal::UniformData &p_data, UniformInfo &r_ui) {1015r_ui.active_stages = p_data.active_stages;1016r_ui.dataType = static_cast<MTL::DataType>(p_data.data_type);1017memcpy(&r_ui.slot, &p_data.slot, sizeof(UniformInfo::Indexes));1018memcpy(&r_ui.arg_buffer, &p_data.arg_buffer, sizeof(UniformInfo::Indexes));1019r_ui.access = static_cast<MTL::BindingAccess>(p_data.access);1020r_ui.usage = static_cast<MTL::ResourceUsage>(p_data.usage);1021r_ui.textureType = static_cast<MTL::TextureType>(p_data.texture_type);1022r_ui.imageFormat = p_data.image_format;1023r_ui.arrayLength = p_data.array_length;1024r_ui.isMultisampled = p_data.is_multisampled;1025}10261027RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {1028Ref<RenderingShaderContainerMetal> shader_container = p_shader_container;1029using RSCM = RenderingShaderContainerMetal;10301031CharString shader_name = shader_container->shader_name;1032RSCM::HeaderData &mtl_reflection_data = shader_container->mtl_reflection_data;1033Vector<RenderingShaderContainer::Shader> &shaders = shader_container->shaders;1034Vector<RSCM::StageData> &mtl_shaders = shader_container->mtl_shaders;10351036// We need to regenerate the shader if the cache is moved to an incompatible device or argument buffer support differs.1037ERR_FAIL_COND_V_MSG(!device_properties->features.argument_buffers_supported() && mtl_reflection_data.uses_argument_buffers(),1038RDD::ShaderID(),1039"Shader was compiled with argument buffers enabled, but this device does not support them");10401041ERR_FAIL_COND_V_MSG(device_properties->features.msl_max_version < mtl_reflection_data.msl_version,1042RDD::ShaderID(),1043"Shader was compiled for a newer version of Metal");10441045MTL::GPUFamily compiled_gpu_family = static_cast<MTL::GPUFamily>(mtl_reflection_data.profile.gpu);1046ERR_FAIL_COND_V_MSG(device_properties->features.highestFamily < compiled_gpu_family,1047RDD::ShaderID(),1048"Shader was generated for a newer Apple GPU");10491050NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());1051uint32_t major = mtl_reflection_data.msl_version / 10000;1052uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100;1053options->setLanguageVersion(MTL::LanguageVersion((major << 0x10) + minor));1054if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {1055options->setEnableLogging(mtl_reflection_data.needs_debug_logging());1056}10571058HashMap<RD::ShaderStage, std::shared_ptr<MDLibrary>> libraries;10591060PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION;1061Vector<uint8_t> decompressed_code;1062for (uint32_t shader_index = 0; shader_index < shaders.size(); shader_index++) {1063const RenderingShaderContainer::Shader &shader = shaders[shader_index];1064const RSCM::StageData &shader_data = mtl_shaders[shader_index];10651066if (shader.shader_stage == RD::ShaderStage::SHADER_STAGE_COMPUTE) {1067pipeline_type = PIPELINE_TYPE_COMPUTE;1068}10691070if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) {1071if (std::shared_ptr<MDLibrary> lib = (*p)->library.lock()) {1072libraries[shader.shader_stage] = lib;1073continue;1074}1075// Library was released; remove stale cache entry and recreate.1076_shader_cache.erase(shader_data.hash);1077}10781079if (shader.code_decompressed_size > 0) {1080decompressed_code.resize(shader.code_decompressed_size);1081bool decompressed = shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size());1082ERR_FAIL_COND_V_MSG(!decompressed, RDD::ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(RDD::SHADER_STAGE_NAMES[shader.shader_stage])));1083} else {1084decompressed_code = shader.code_compressed_bytes;1085}10861087ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, shader_data.hash));1088cd->name = shader_name;1089cd->stage = shader.shader_stage;10901091NS::SharedPtr<NS::String> source = NS::TransferPtr(NS::String::alloc()->init((void *)decompressed_code.ptr(), shader_data.source_size, NS::UTF8StringEncoding));10921093std::shared_ptr<MDLibrary> library;1094if (shader_data.library_size > 0) {1095ERR_FAIL_COND_V_MSG(mtl_reflection_data.os_min_version > device_properties->os_version,1096RDD::ShaderID(),1097"Metal shader binary was generated for a newer target OS");1098dispatch_data_t binary = dispatch_data_create(decompressed_code.ptr() + shader_data.source_size, shader_data.library_size, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT);1099library = MDLibrary::create(cd, device,1100#if DEV_ENABLED1101source.get(),1102#endif1103binary);1104} else {1105options->setPreserveInvariance(shader_data.is_position_invariant);1106#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 150000 || __IPHONE_OS_VERSION_MIN_REQUIRED >= 180000 || __TV_OS_VERSION_MIN_REQUIRED >= 180000 || defined(VISIONOS_ENABLED)1107options->setMathMode(MTL::MathModeFast);1108#else1109options->setFastMathEnabled(true);1110#endif1111library = MDLibrary::create(cd, device, source.get(), options.get(), _shader_load_strategy);1112}11131114_shader_cache[shader_data.hash] = cd;1115libraries[shader.shader_stage] = library;1116}11171118ShaderReflection refl = shader_container->get_shader_reflection();1119RSCM::MetalShaderReflection mtl_refl = shader_container->get_metal_shader_reflection();11201121Vector<UniformSet> uniform_sets;1122uint32_t uniform_sets_count = mtl_refl.uniform_sets.size();1123uniform_sets.resize(uniform_sets_count);11241125DynamicOffsetLayout dynamic_offset_layout;1126uint8_t dynamic_offset = 0;11271128// Create sets.1129for (uint32_t i = 0; i < uniform_sets_count; i++) {1130UniformSet &set = uniform_sets.write[i];1131const Vector<ShaderUniform> &refl_set = refl.uniform_sets.ptr()[i];1132const Vector<RSCM::UniformData> &mtl_set = mtl_refl.uniform_sets.ptr()[i];1133uint32_t set_size = mtl_set.size();1134set.uniforms.resize(set_size);11351136uint8_t dynamic_count = 0;11371138LocalVector<UniformInfo>::Iterator iter = set.uniforms.begin();1139for (uint32_t j = 0; j < set_size; j++) {1140const ShaderUniform &uniform = refl_set.ptr()[j];1141const RSCM::UniformData &bind = mtl_set.ptr()[j];11421143switch (uniform.type) {1144case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC:1145case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {1146set.dynamic_uniforms.push_back(j);1147dynamic_count++;1148} break;1149default: {1150} break;1151}11521153UniformInfo &ui = *iter;1154++iter;1155update_uniform_info(bind, ui);1156ui.binding = uniform.binding;11571158if (ui.arg_buffer.texture == UINT32_MAX && ui.arg_buffer.buffer == UINT32_MAX && ui.arg_buffer.sampler == UINT32_MAX) {1159// No bindings.1160continue;1161}1162#define VAL(x) (x == UINT32_MAX ? 0 : x)1163uint32_t max = std::max({ VAL(ui.arg_buffer.texture), VAL(ui.arg_buffer.buffer), VAL(ui.arg_buffer.sampler) });1164max += ui.arrayLength > 0 ? ui.arrayLength - 1 : 0;1165set.buffer_size = std::max(set.buffer_size, (max + 1) * (uint32_t)sizeof(uint64_t));1166#undef VAL1167}11681169if (dynamic_count > 0) {1170dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count);1171dynamic_offset += dynamic_count;1172}1173}11741175MDShader *shader = nullptr;1176if (pipeline_type == PIPELINE_TYPE_COMPUTE) {1177MDComputeShader *cs = new MDComputeShader(1178shader_name,1179uniform_sets,1180mtl_reflection_data.uses_argument_buffers(),1181libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]);11821183cs->local = MTL::Size(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]);1184shader = cs;1185} else {1186MDRenderShader *rs = new MDRenderShader(1187shader_name,1188uniform_sets,1189mtl_reflection_data.needs_view_mask_buffer(),1190mtl_reflection_data.uses_argument_buffers(),1191libraries[RD::ShaderStage::SHADER_STAGE_VERTEX],1192libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]);1193shader = rs;1194}11951196shader->push_constants.stages = refl.push_constant_stages;1197shader->push_constants.size = refl.push_constant_size;1198shader->push_constants.binding = mtl_reflection_data.push_constant_binding;1199shader->dynamic_offset_layout = dynamic_offset_layout;12001201return RDD::ShaderID(shader);1202}12031204void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) {1205MDShader *obj = (MDShader *)p_shader.id;1206delete obj;1207}12081209void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {1210// TODO.1211}12121213/*********************/1214/**** UNIFORM SET ****/1215/*********************/12161217RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {1218//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.12191220MDShader *shader = (MDShader *)(p_shader.id);1221ERR_FAIL_INDEX_V_MSG(p_set_index, shader->sets.size(), UniformSetID(), "Set index out of range");1222const UniformSet &shader_set = shader->sets.get(p_set_index);1223MDUniformSet *set = memnew(MDUniformSet);1224// Determine if there are any dynamic uniforms in this set.1225bool is_dynamic = !shader_set.dynamic_uniforms.is_empty();12261227Vector<uint8_t> arg_buffer_data;12281229if (device_properties->features.argument_buffers_supported()) {1230arg_buffer_data.resize(shader_set.buffer_size);12311232// If argument buffers are enabled, we have already verified availability, so we can skip the runtime check.1233GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new")1234uint64_t *ptr = (uint64_t *)arg_buffer_data.ptrw();12351236HashMap<MTL::Resource *, StageResourceUsage, HashMapHasherDefault> bound_resources;1237auto add_usage = [&bound_resources](MTL::Resource *res, BitField<RDD::ShaderStage> stage, MTL::ResourceUsage usage) {1238StageResourceUsage *sru = bound_resources.getptr(res);1239if (sru == nullptr) {1240sru = &bound_resources.insert(res, ResourceUnused)->value;1241}1242if (stage.has_flag(RDD::SHADER_STAGE_VERTEX_BIT)) {1243*sru |= stage_resource_usage(RDD::SHADER_STAGE_VERTEX, usage);1244}1245if (stage.has_flag(RDD::SHADER_STAGE_FRAGMENT_BIT)) {1246*sru |= stage_resource_usage(RDD::SHADER_STAGE_FRAGMENT, usage);1247}1248if (stage.has_flag(RDD::SHADER_STAGE_COMPUTE_BIT)) {1249*sru |= stage_resource_usage(RDD::SHADER_STAGE_COMPUTE, usage);1250}1251};1252#define ADD_USAGE(res, stage, usage) \1253if (!use_barriers) { \1254add_usage(res, stage, usage); \1255}12561257// Ensure the argument buffer exists for this set as some shader pipelines may1258// have been generated with argument buffers enabled.1259for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {1260const BoundUniform &uniform = p_uniforms[i];1261const UniformInfo &ui = shader_set.uniforms[i];1262const UniformInfo::Indexes &idx = ui.arg_buffer;12631264switch (uniform.type) {1265case UNIFORM_TYPE_SAMPLER: {1266size_t count = uniform.ids.size();1267for (size_t j = 0; j < count; j += 1) {1268MTL::SamplerState *sampler = reinterpret_cast<MTL::SamplerState *>(uniform.ids[j].id);1269*(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID();1270}1271} break;1272case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {1273uint32_t count = uniform.ids.size() / 2;1274for (uint32_t j = 0; j < count; j += 1) {1275MTL::SamplerState *sampler = reinterpret_cast<MTL::SamplerState *>(uniform.ids[j * 2 + 0].id);1276MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j * 2 + 1].id);1277*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();1278*(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID();12791280ADD_USAGE(texture, ui.active_stages, ui.usage);1281}1282} break;1283case UNIFORM_TYPE_TEXTURE: {1284size_t count = uniform.ids.size();1285for (size_t j = 0; j < count; j += 1) {1286MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);1287*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();12881289ADD_USAGE(texture, ui.active_stages, ui.usage);1290}1291} break;1292case UNIFORM_TYPE_IMAGE: {1293size_t count = uniform.ids.size();1294for (size_t j = 0; j < count; j += 1) {1295MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);1296*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();1297ADD_USAGE(texture, ui.active_stages, ui.usage);12981299if (idx.buffer != UINT32_MAX) {1300// Emulated atomic image access.1301MTL::Texture *parent = texture->parentTexture();1302MTL::Buffer *buffer = (parent ? parent : texture)->buffer();1303*(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer->gpuAddress();13041305ADD_USAGE(buffer, ui.active_stages, ui.usage);1306}1307}1308} break;1309case UNIFORM_TYPE_TEXTURE_BUFFER: {1310ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");1311} break;1312case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {1313ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");1314} break;1315case UNIFORM_TYPE_IMAGE_BUFFER: {1316CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");1317} break;1318case UNIFORM_TYPE_STORAGE_BUFFER:1319case UNIFORM_TYPE_UNIFORM_BUFFER: {1320const BufferInfo *buffer = (const BufferInfo *)uniform.ids[0].id;1321*(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress();13221323ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage);1324} break;1325case UNIFORM_TYPE_INPUT_ATTACHMENT: {1326size_t count = uniform.ids.size();1327for (size_t j = 0; j < count; j += 1) {1328MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);1329*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();13301331ADD_USAGE(texture, ui.active_stages, ui.usage);1332}1333} break;1334case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:1335case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {1336// Encode the base GPU address (frame 0); it will be updated at bind time.1337const MetalBufferDynamicInfo *buffer = (const MetalBufferDynamicInfo *)uniform.ids[0].id;1338*(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress();13391340ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage);1341} break;1342default: {1343DEV_ASSERT(false);1344}1345}1346}13471348#undef ADD_USAGE13491350if (!use_barriers) {1351for (KeyValue<MTL::Resource *, StageResourceUsage> const &keyval : bound_resources) {1352ResourceVector *resources = set->usage_to_resources.getptr(keyval.value);1353if (resources == nullptr) {1354resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value;1355}1356int64_t pos = resources->span().bisect(keyval.key, true);1357if (pos == resources->size() || (*resources)[pos] != keyval.key) {1358resources->insert(pos, keyval.key);1359}1360}1361}13621363if (!is_dynamic) {1364set->arg_buffer = NS::TransferPtr(device->newBuffer(shader_set.buffer_size, base_hazard_tracking | MTL::ResourceStorageModePrivate));1365#if DEV_ENABLED1366char label[64];1367snprintf(label, sizeof(label), "Uniform Set %u", p_set_index);1368set->arg_buffer->setLabel(NS::String::string(label, NS::UTF8StringEncoding));1369#endif1370_track_resource(set->arg_buffer.get());1371_copy_queue_copy_to_buffer(arg_buffer_data, set->arg_buffer.get());1372} else {1373// Store the arg buffer data for dynamic uniform sets.1374// It will be copied and updated at bind time.1375set->arg_buffer_data = arg_buffer_data;1376}13771378GODOT_CLANG_WARNING_POP1379}1380Vector<BoundUniform> bound_uniforms;1381bound_uniforms.resize(p_uniforms.size());1382for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {1383bound_uniforms.write[i] = p_uniforms[i];1384}1385set->uniforms = bound_uniforms;13861387return UniformSetID(set);1388}13891390void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {1391MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id;1392if (obj->arg_buffer) {1393_untrack_resource(obj->arg_buffer.get());1394}1395memdelete(obj);1396}13971398uint32_t RenderingDeviceDriverMetal::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {1399const MDShader *shader = (const MDShader *)p_shader.id;1400const DynamicOffsetLayout layout = shader->dynamic_offset_layout;14011402if (layout.is_empty()) {1403return 0u;1404}14051406uint32_t mask = 0u;14071408for (uint32_t i = 0; i < p_set_count; i++) {1409const uint32_t index = p_first_set_index + i;1410uint32_t shift = layout.get_offset_index_shift(index);1411const uint32_t count = layout.get_count(index);1412DEV_ASSERT(shader->sets[index].dynamic_uniforms.size() == count);1413if (count == 0) {1414continue;1415}14161417const MDUniformSet *usi = (const MDUniformSet *)p_uniform_sets[i].id;1418for (uint32_t uniform_index : shader->sets[index].dynamic_uniforms) {1419const RDD::BoundUniform &uniform = usi->uniforms[uniform_index];1420DEV_ASSERT(uniform.is_dynamic());1421const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;1422mask |= buf_info->frame_index() << shift;1423shift += 4u;1424}1425}14261427return mask;1428}14291430void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {1431}14321433#pragma mark - Transfer14341435void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {1436MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);1437cmd->clear_buffer(p_buffer, p_offset, p_size);1438}14391440void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {1441MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);1442cmd->copy_buffer(p_src_buffer, p_dst_buffer, p_regions);1443}14441445void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {1446MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);1447cmd->copy_texture(p_src_texture, p_dst_texture, p_regions);1448}14491450void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {1451MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1452cb->resolve_texture(p_src_texture, p_src_texture_layout, p_src_layer, p_src_mipmap, p_dst_texture, p_dst_texture_layout, p_dst_layer, p_dst_mipmap);1453}14541455void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {1456MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1457cb->clear_color_texture(p_texture, p_texture_layout, p_color, p_subresources);1458}14591460void RenderingDeviceDriverMetal::command_clear_depth_stencil_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const TextureSubresourceRange &p_subresources) {1461MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1462cb->clear_depth_stencil_texture(p_texture, p_texture_layout, p_depth, p_stencil, p_subresources);1463}14641465void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {1466MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);1467cmd->copy_buffer_to_texture(p_src_buffer, p_dst_texture, p_regions);1468}14691470void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {1471MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);1472cmd->copy_texture_to_buffer(p_src_texture, p_dst_buffer, p_regions);1473}14741475#pragma mark - Pipeline14761477void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {1478MDPipeline *obj = (MDPipeline *)(p_pipeline_id.id);1479delete obj;1480}14811482// ----- BINDING -----14831484void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {1485MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1486cb->encode_push_constant_data(p_shader, p_data);1487}14881489// ----- CACHE -----14901491String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const {1492String path = OS::get_singleton()->get_user_data_dir() + "/metal/pipelines";1493path += "." + context_device.name.validate_filename().replace_char(' ', '_').to_lower();1494if (Engine::get_singleton()->is_editor_hint()) {1495path += ".editor";1496}1497path += ".cache";14981499return path;1500}15011502bool RenderingDeviceDriverMetal::pipeline_cache_create(const Vector<uint8_t> &p_data) {1503return false;1504// TODO: Convert to metal-cpp when pipeline caching is re-enabled1505// CharString path = _pipeline_get_cache_path().utf8();1506// NS::SharedPtr<MTL::BinaryArchiveDescriptor> desc = NS::TransferPtr(MTL::BinaryArchiveDescriptor::alloc()->init());1507// NS::Error *error = nullptr;1508// archive = NS::TransferPtr(device->newBinaryArchive(desc.get(), &error));1509// return true;1510}15111512void RenderingDeviceDriverMetal::pipeline_cache_free() {1513archive = nullptr;1514}15151516size_t RenderingDeviceDriverMetal::pipeline_cache_query_size() {1517return archive_count * 1024;1518}15191520Vector<uint8_t> RenderingDeviceDriverMetal::pipeline_cache_serialize() {1521if (!archive) {1522return Vector<uint8_t>();1523}15241525// TODO: Convert to metal-cpp when pipeline caching is re-enabled1526// CharString path = _pipeline_get_cache_path().utf8();1527// NS::URL *target = NS::URL::fileURLWithPath(NS::String::string(path.get_data(), NS::UTF8StringEncoding));1528// NS::Error *error = nullptr;1529// if (archive->serializeToURL(target, &error)) {1530// return Vector<uint8_t>();1531// } else {1532// print_line(error->localizedDescription()->utf8String());1533// return Vector<uint8_t>();1534// }1535return Vector<uint8_t>();1536}15371538#pragma mark - Rendering15391540// ----- SUBPASS -----15411542RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) {1543PixelFormats &pf = *pixel_formats;15441545size_t subpass_count = p_subpasses.size();15461547Vector<MDSubpass> subpasses;1548subpasses.resize(subpass_count);1549for (uint32_t i = 0; i < subpass_count; i++) {1550MDSubpass &subpass = subpasses.write[i];1551subpass.subpass_index = i;1552subpass.view_count = p_view_count;1553subpass.input_references = p_subpasses[i].input_references;1554subpass.color_references = p_subpasses[i].color_references;1555subpass.depth_stencil_reference = p_subpasses[i].depth_stencil_reference;1556subpass.resolve_references = p_subpasses[i].resolve_references;1557}15581559static const MTL::LoadAction LOAD_ACTIONS[] = {1560[ATTACHMENT_LOAD_OP_LOAD] = MTL::LoadActionLoad,1561[ATTACHMENT_LOAD_OP_CLEAR] = MTL::LoadActionClear,1562[ATTACHMENT_LOAD_OP_DONT_CARE] = MTL::LoadActionDontCare,1563};15641565static const MTL::StoreAction STORE_ACTIONS[] = {1566[ATTACHMENT_STORE_OP_STORE] = MTL::StoreActionStore,1567[ATTACHMENT_STORE_OP_DONT_CARE] = MTL::StoreActionDontCare,1568};15691570Vector<MDAttachment> attachments;1571attachments.resize(p_attachments.size());15721573for (uint32_t i = 0; i < p_attachments.size(); i++) {1574Attachment const &a = p_attachments[i];1575MDAttachment &mda = attachments.write[i];1576MTL::PixelFormat format = pf.getMTLPixelFormat(a.format);1577mda.format = format;1578if (a.samples > TEXTURE_SAMPLES_1) {1579mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples);1580}1581mda.loadAction = LOAD_ACTIONS[a.load_op];1582mda.storeAction = STORE_ACTIONS[a.store_op];1583bool is_depth = pf.isDepthFormat(format);1584if (is_depth) {1585mda.type |= MDAttachmentType::Depth;1586}1587bool is_stencil = pf.isStencilFormat(format);1588if (is_stencil) {1589mda.type |= MDAttachmentType::Stencil;1590mda.stencilLoadAction = LOAD_ACTIONS[a.stencil_load_op];1591mda.stencilStoreAction = STORE_ACTIONS[a.stencil_store_op];1592}1593if (!is_depth && !is_stencil) {1594mda.type |= MDAttachmentType::Color;1595}1596}1597MDRenderPass *obj = memnew(MDRenderPass(attachments, subpasses));1598return RenderPassID(obj);1599}16001601void RenderingDeviceDriverMetal::render_pass_free(RenderPassID p_render_pass) {1602MDRenderPass *obj = (MDRenderPass *)(p_render_pass.id);1603memdelete(obj);1604}16051606// ----- COMMANDS -----16071608void RenderingDeviceDriverMetal::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) {1609MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1610cb->render_begin_pass(p_render_pass, p_framebuffer, p_cmd_buffer_type, p_rect, p_clear_values);1611}16121613void RenderingDeviceDriverMetal::command_end_render_pass(CommandBufferID p_cmd_buffer) {1614MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1615cb->render_end_pass();1616}16171618void RenderingDeviceDriverMetal::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {1619MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1620cb->render_next_subpass();1621}16221623void RenderingDeviceDriverMetal::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) {1624MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1625cb->render_set_viewport(p_viewports);1626}16271628void RenderingDeviceDriverMetal::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) {1629MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1630cb->render_set_scissor(p_scissors);1631}16321633void RenderingDeviceDriverMetal::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {1634MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1635cb->render_clear_attachments(p_attachment_clears, p_rects);1636}16371638void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {1639MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1640cb->bind_pipeline(p_pipeline);1641}16421643void RenderingDeviceDriverMetal::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {1644MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1645cb->render_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);1646}16471648void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {1649MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1650cb->render_draw(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance);1651}16521653void RenderingDeviceDriverMetal::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) {1654MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1655cb->render_draw_indexed(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance);1656}16571658void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {1659MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1660cb->render_draw_indexed_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);1661}16621663void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {1664MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1665cb->render_draw_indexed_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);1666}16671668void RenderingDeviceDriverMetal::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {1669MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1670cb->render_draw_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);1671}16721673void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {1674MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1675cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);1676}16771678void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {1679MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1680cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets, p_dynamic_offsets);1681}16821683void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {1684MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1685cb->render_bind_index_buffer(p_buffer, p_format, p_offset);1686}16871688void RenderingDeviceDriverMetal::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) {1689MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);1690cb->render_set_blend_constants(p_constants);1691}16921693void RenderingDeviceDriverMetal::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) {1694if (!Math::is_equal_approx(p_width, 1.0f)) {1695ERR_FAIL_MSG("Setting line widths other than 1.0 is not supported by the Metal rendering driver.");1696}1697}16981699// ----- PIPELINE -----17001701RenderingDeviceDriverMetal::Result<NS::SharedPtr<MTL::Function>> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NS::String *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants) {1702MTL::Library *library = p_library->get_library();1703if (!library) {1704ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to compile Metal library");1705}17061707MTL::Function *function = library->newFunction(p_name);1708ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, "No function named main0");17091710NS::Dictionary *constants_dict = function->functionConstantsDictionary();1711if (constants_dict->count() == 0) {1712return NS::TransferPtr(function);1713}17141715LocalVector<MTL::FunctionConstant *> constants;1716NS::Enumerator<NS::String> *keys = constants_dict->keyEnumerator<NS::String>();1717while (NS::String *key = keys->nextObject()) {1718constants.push_back(constants_dict->object<MTL::FunctionConstant>(key));1719}17201721// Check if already sorted by index.1722bool is_sorted = true;1723for (NS::UInteger i = 1; i < constants.size(); i++) {1724MTL::FunctionConstant *prev = constants[i - 1];1725MTL::FunctionConstant *curr = constants[i];1726if (prev->index() > curr->index()) {1727is_sorted = false;1728break;1729}1730}17311732if (!is_sorted) {1733struct Comparator {1734bool operator()(const MTL::FunctionConstant *p, const MTL::FunctionConstant *q) const {1735return p->index() < q->index();1736}1737};17381739constants.sort_custom<Comparator>();1740}17411742// Build a sorted list of specialization constants by constant_id.1743uint32_t *indexes = (uint32_t *)alloca(p_specialization_constants.size() * sizeof(uint32_t));1744for (uint32_t i = 0; i < p_specialization_constants.size(); i++) {1745indexes[i] = i;1746}1747std::sort(indexes, &indexes[p_specialization_constants.size()], [&](int a, int b) {1748return p_specialization_constants[a].constant_id < p_specialization_constants[b].constant_id;1749});17501751NS::SharedPtr<MTL::FunctionConstantValues> constantValues = NS::TransferPtr(MTL::FunctionConstantValues::alloc()->init());17521753// Merge the sorted constants from the function with the sorted user constants.1754NS::UInteger i = 0;1755uint32_t j = 0;1756while (i < constants.size() && j < p_specialization_constants.size()) {1757MTL::FunctionConstant *curr = (MTL::FunctionConstant *)constants[i];1758PipelineSpecializationConstant const &sc = p_specialization_constants[indexes[j]];1759if (curr->index() == sc.constant_id) {1760switch (curr->type()) {1761case MTL::DataTypeBool:1762case MTL::DataTypeFloat:1763case MTL::DataTypeInt:1764case MTL::DataTypeUInt: {1765constantValues->setConstantValue(&sc.int_value, curr->type(), sc.constant_id);1766} break;1767default:1768ERR_FAIL_V_MSG(NS::TransferPtr(function), "Invalid specialization constant type");1769}1770i++;1771j++;1772} else if (curr->index() < sc.constant_id) {1773i++;1774} else {1775j++;1776}1777}17781779// Handle R32UI_ALIGNMENT_CONSTANT_ID if present.1780if (i < constants.size()) {1781MTL::FunctionConstant *curr = constants[i];1782if (curr->index() == R32UI_ALIGNMENT_CONSTANT_ID) {1783uint32_t alignment = 16; // TODO(sgc): is this always correct?1784constantValues->setConstantValue(&alignment, curr->type(), curr->index());1785i++;1786}1787}17881789NS::Error *err = nullptr;1790function->release();1791function = library->newFunction(p_name, constantValues.get(), &err);1792ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + (err ? err->localizedDescription()->utf8String() : "unknown error"));17931794return NS::TransferPtr(function);1795}17961797// RDD::PolygonCullMode == MTL::CullMode.1798static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTL::CullModeNone));1799static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTL::CullModeFront));1800static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTL::CullModeBack));18011802// RDD::StencilOperation == MTL::StencilOperation.1803static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTL::StencilOperationKeep));1804static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTL::StencilOperationZero));1805static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTL::StencilOperationReplace));1806static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTL::StencilOperationIncrementClamp));1807static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTL::StencilOperationDecrementClamp));1808static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTL::StencilOperationInvert));1809static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTL::StencilOperationIncrementWrap));1810static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTL::StencilOperationDecrementWrap));18111812// RDD::BlendOperation == MTL::BlendOperation.1813static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTL::BlendOperationAdd));1814static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTL::BlendOperationSubtract));1815static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTL::BlendOperationReverseSubtract));1816static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTL::BlendOperationMin));1817static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTL::BlendOperationMax));18181819RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create(1820ShaderID p_shader,1821VertexFormatID p_vertex_format,1822RenderPrimitive p_render_primitive,1823PipelineRasterizationState p_rasterization_state,1824PipelineMultisampleState p_multisample_state,1825PipelineDepthStencilState p_depth_stencil_state,1826PipelineColorBlendState p_blend_state,1827VectorView<int32_t> p_color_attachments,1828BitField<PipelineDynamicStateFlags> p_dynamic_state,1829RenderPassID p_render_pass,1830uint32_t p_render_subpass,1831VectorView<PipelineSpecializationConstant> p_specialization_constants) {1832MDRenderShader *shader = (MDRenderShader *)(p_shader.id);1833MTL::VertexDescriptor *vert_desc = reinterpret_cast<MTL::VertexDescriptor *>(p_vertex_format.id);1834MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);18351836os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);1837os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "render_pipeline_create", "shader_name=%{public}s", shader->name.get_data());1838DEFER([=]() {1839os_signpost_interval_end(LOG_INTERVALS, reflect_id, "render_pipeline_create");1840});18411842os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");18431844NS::SharedPtr<MTL::RenderPipelineDescriptor> desc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());18451846{1847MDSubpass const &subpass = pass->subpasses[p_render_subpass];1848for (uint32_t i = 0; i < subpass.color_references.size(); i++) {1849uint32_t attachment = subpass.color_references[i].attachment;1850if (attachment != AttachmentReference::UNUSED) {1851MDAttachment const &a = pass->attachments[attachment];1852desc->colorAttachments()->object(i)->setPixelFormat(a.format);1853}1854}18551856if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) {1857uint32_t attachment = subpass.depth_stencil_reference.attachment;1858MDAttachment const &a = pass->attachments[attachment];18591860if (a.type & MDAttachmentType::Depth) {1861desc->setDepthAttachmentPixelFormat(a.format);1862}18631864if (a.type & MDAttachmentType::Stencil) {1865desc->setStencilAttachmentPixelFormat(a.format);1866}1867}1868}18691870desc->setVertexDescriptor(vert_desc);1871desc->setLabel(conv::to_nsstring(shader->name));18721873if (shader->uses_argument_buffers) {1874// Set mutability of argument buffers.1875for (uint32_t i = 0; i < shader->sets.size(); i++) {1876const UniformSet &set = shader->sets[i];1877const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable;1878desc->vertexBuffers()->object(i)->setMutability(mutability);1879desc->fragmentBuffers()->object(i)->setMutability(mutability);1880}1881}18821883// Input assembly & tessellation.18841885MDRenderPipeline *pipeline = new MDRenderPipeline();18861887switch (p_render_primitive) {1888case RENDER_PRIMITIVE_POINTS:1889desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassPoint);1890break;1891case RENDER_PRIMITIVE_LINES:1892case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:1893case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:1894case RENDER_PRIMITIVE_LINESTRIPS:1895desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassLine);1896break;1897case RENDER_PRIMITIVE_TRIANGLES:1898case RENDER_PRIMITIVE_TRIANGLE_STRIPS:1899case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:1900case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:1901case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:1902desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);1903break;1904case RENDER_PRIMITIVE_TESSELATION_PATCH:1905desc->setMaxTessellationFactor(p_rasterization_state.patch_control_points);1906desc->setTessellationPartitionMode(MTL::TessellationPartitionModeInteger);1907ERR_FAIL_V_MSG(PipelineID(), "tessellation not implemented");1908break;1909case RENDER_PRIMITIVE_MAX:1910default:1911desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassUnspecified);1912break;1913}19141915switch (p_render_primitive) {1916case RENDER_PRIMITIVE_POINTS:1917pipeline->raster_state.render_primitive = MTL::PrimitiveTypePoint;1918break;1919case RENDER_PRIMITIVE_LINES:1920case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:1921pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLine;1922break;1923case RENDER_PRIMITIVE_LINESTRIPS:1924case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:1925pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLineStrip;1926break;1927case RENDER_PRIMITIVE_TRIANGLES:1928case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:1929pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangle;1930break;1931case RENDER_PRIMITIVE_TRIANGLE_STRIPS:1932case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:1933case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:1934pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangleStrip;1935break;1936default:1937break;1938}19391940// Rasterization.1941desc->setRasterizationEnabled(!p_rasterization_state.discard_primitives);1942pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTL::DepthClipModeClamp : MTL::DepthClipModeClip;1943pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTL::TriangleFillModeLines : MTL::TriangleFillModeFill;19441945static const MTL::CullMode CULL_MODE[3] = {1946MTL::CullModeNone,1947MTL::CullModeFront,1948MTL::CullModeBack,1949};1950pipeline->raster_state.cull_mode = CULL_MODE[p_rasterization_state.cull_mode];1951pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTL::WindingClockwise : MTL::WindingCounterClockwise;1952pipeline->raster_state.depth_bias.enabled = p_rasterization_state.depth_bias_enabled;1953pipeline->raster_state.depth_bias.depth_bias = p_rasterization_state.depth_bias_constant_factor;1954pipeline->raster_state.depth_bias.slope_scale = p_rasterization_state.depth_bias_slope_factor;1955pipeline->raster_state.depth_bias.clamp = p_rasterization_state.depth_bias_clamp;1956// In Metal there is no line width.1957if (!Math::is_equal_approx(p_rasterization_state.line_width, 1.0f)) {1958WARN_PRINT("unsupported: line width");1959}19601961// Multisample.1962if (p_multisample_state.enable_sample_shading) {1963WARN_PRINT("unsupported: multi-sample shading");1964}19651966if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) {1967pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);1968}1969desc->setRasterSampleCount(static_cast<NS::UInteger>(pipeline->sample_count));1970desc->setAlphaToCoverageEnabled(p_multisample_state.enable_alpha_to_coverage);1971desc->setAlphaToOneEnabled(p_multisample_state.enable_alpha_to_one);19721973// Depth buffer.1974bool depth_enabled = p_depth_stencil_state.enable_depth_test && desc->depthAttachmentPixelFormat() != MTL::PixelFormatInvalid;1975bool stencil_enabled = p_depth_stencil_state.enable_stencil && desc->stencilAttachmentPixelFormat() != MTL::PixelFormatInvalid;19761977if (depth_enabled || stencil_enabled) {1978NS::SharedPtr<MTL::DepthStencilDescriptor> ds_desc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init());19791980pipeline->raster_state.depth_test.enabled = depth_enabled;1981ds_desc->setDepthWriteEnabled(p_depth_stencil_state.enable_depth_write);1982ds_desc->setDepthCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]);1983if (p_depth_stencil_state.enable_depth_range) {1984WARN_PRINT("unsupported: depth range");1985}19861987if (stencil_enabled) {1988pipeline->raster_state.stencil.enabled = true;1989pipeline->raster_state.stencil.front_reference = p_depth_stencil_state.front_op.reference;1990pipeline->raster_state.stencil.back_reference = p_depth_stencil_state.back_op.reference;19911992{1993// Front.1994NS::SharedPtr<MTL::StencilDescriptor> sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());1995sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail]);1996sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass]);1997sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail]);1998sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare]);1999sd->setReadMask(p_depth_stencil_state.front_op.compare_mask);2000sd->setWriteMask(p_depth_stencil_state.front_op.write_mask);2001ds_desc->setFrontFaceStencil(sd.get());2002}2003{2004// Back.2005NS::SharedPtr<MTL::StencilDescriptor> sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());2006sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail]);2007sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass]);2008sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail]);2009sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare]);2010sd->setReadMask(p_depth_stencil_state.back_op.compare_mask);2011sd->setWriteMask(p_depth_stencil_state.back_op.write_mask);2012ds_desc->setBackFaceStencil(sd.get());2013}2014}20152016pipeline->depth_stencil = NS::TransferPtr(device->newDepthStencilState(ds_desc.get()));2017ERR_FAIL_COND_V_MSG(!pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state");2018} else {2019// TODO(sgc): FB13671991 raised as Apple docs state calling setDepthStencilState:nil is valid, but currently generates an exception2020pipeline->depth_stencil = NS::RetainPtr(get_resource_cache().get_depth_stencil_state(false, false));2021}20222023// Blend state.2024{2025for (uint32_t i = 0; i < p_color_attachments.size(); i++) {2026if (p_color_attachments[i] == ATTACHMENT_UNUSED) {2027continue;2028}20292030const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i];20312032MTL::RenderPipelineColorAttachmentDescriptor *ca_desc = desc->colorAttachments()->object(p_color_attachments[i]);2033ca_desc->setBlendingEnabled(bs.enable_blend);20342035ca_desc->setSourceRGBBlendFactor(BLEND_FACTORS[bs.src_color_blend_factor]);2036ca_desc->setDestinationRGBBlendFactor(BLEND_FACTORS[bs.dst_color_blend_factor]);2037ca_desc->setRgbBlendOperation(BLEND_OPERATIONS[bs.color_blend_op]);20382039ca_desc->setSourceAlphaBlendFactor(BLEND_FACTORS[bs.src_alpha_blend_factor]);2040ca_desc->setDestinationAlphaBlendFactor(BLEND_FACTORS[bs.dst_alpha_blend_factor]);2041ca_desc->setAlphaBlendOperation(BLEND_OPERATIONS[bs.alpha_blend_op]);20422043MTL::ColorWriteMask writeMask = MTL::ColorWriteMaskNone;2044if (bs.write_r) {2045writeMask |= MTL::ColorWriteMaskRed;2046}2047if (bs.write_g) {2048writeMask |= MTL::ColorWriteMaskGreen;2049}2050if (bs.write_b) {2051writeMask |= MTL::ColorWriteMaskBlue;2052}2053if (bs.write_a) {2054writeMask |= MTL::ColorWriteMaskAlpha;2055}2056ca_desc->setWriteMask(writeMask);2057}20582059pipeline->raster_state.blend.r = p_blend_state.blend_constant.r;2060pipeline->raster_state.blend.g = p_blend_state.blend_constant.g;2061pipeline->raster_state.blend.b = p_blend_state.blend_constant.b;2062pipeline->raster_state.blend.a = p_blend_state.blend_constant.a;2063}20642065// Dynamic state.20662067if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) {2068pipeline->raster_state.depth_bias.enabled = true;2069}20702071if (p_dynamic_state.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) {2072pipeline->raster_state.blend.enabled = true;2073}20742075if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) {2076// TODO(sgc): ??2077}20782079if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {2080// TODO(sgc): ??2081}20822083if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) {2084// TODO(sgc): ??2085}20862087if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) {2088pipeline->raster_state.stencil.enabled = true;2089}20902091if (shader->vert) {2092Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->vert.get(), MTLSTR("main0"), p_specialization_constants);2093ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());2094desc->setVertexFunction(std::get<NS::SharedPtr<MTL::Function>>(function_or_err).get());2095}20962097if (shader->frag) {2098Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->frag.get(), MTLSTR("main0"), p_specialization_constants);2099ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());2100desc->setFragmentFunction(std::get<NS::SharedPtr<MTL::Function>>(function_or_err).get());2101}21022103MTL::PipelineOption options = MTL::PipelineOptionNone;2104MTL::BinaryArchive *arc = archive.get();2105if (arc) {2106NS::SharedPtr<NS::Array> archives = NS::TransferPtr(NS::Array::array(reinterpret_cast<NS::Object *const *>(&arc), 1)->retain());2107desc->setBinaryArchives(archives.get());2108if (archive_fail_on_miss) {2109options |= MTL::PipelineOptionFailOnBinaryArchiveMiss;2110}2111}21122113NS::Error *error = nullptr;2114pipeline->state = NS::TransferPtr(device->newRenderPipelineState(desc.get(), options, nullptr, &error));2115pipeline->shader = shader;21162117ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String());2118ERR_FAIL_COND_V_MSG(!pipeline->state, PipelineID(), "Failed to create render pipeline state");21192120if (arc) {2121if (arc->addRenderPipelineFunctions(desc.get(), &error)) {2122archive_count += 1;2123} else {2124print_error(error->localizedDescription()->utf8String());2125}2126}21272128return PipelineID(pipeline);2129}21302131#pragma mark - Compute21322133// ----- COMMANDS -----21342135void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {2136MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2137cb->bind_pipeline(p_pipeline);2138}21392140void RenderingDeviceDriverMetal::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {2141MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2142cb->compute_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);2143}21442145void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {2146MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2147cb->compute_dispatch(p_x_groups, p_y_groups, p_z_groups);2148}21492150void RenderingDeviceDriverMetal::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {2151MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2152cb->compute_dispatch_indirect(p_indirect_buffer, p_offset);2153}21542155// ----- PIPELINE -----21562157RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {2158MDComputeShader *shader = (MDComputeShader *)(p_shader.id);21592160os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);2161os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "compute_pipeline_create", "shader_name=%{public}s", shader->name.get_data());2162DEFER([=]() {2163os_signpost_interval_end(LOG_INTERVALS, reflect_id, "compute_pipeline_create");2164});21652166os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");21672168Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->kernel.get(), MTLSTR("main0"), p_specialization_constants);2169ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());2170NS::SharedPtr<MTL::Function> function = std::get<NS::SharedPtr<MTL::Function>>(function_or_err);21712172NS::SharedPtr<MTL::ComputePipelineDescriptor> desc = NS::TransferPtr(MTL::ComputePipelineDescriptor::alloc()->init());2173desc->setComputeFunction(function.get());2174desc->setLabel(conv::to_nsstring(shader->name));21752176if (shader->uses_argument_buffers) {2177// Set mutability of argument buffers.2178for (uint32_t i = 0; i < shader->sets.size(); i++) {2179const UniformSet &set = shader->sets[i];2180const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable;2181desc->buffers()->object(i)->setMutability(mutability);2182}2183}21842185MTL::PipelineOption options = MTL::PipelineOptionNone;2186MTL::BinaryArchive *arc = archive.get();2187if (arc) {2188NS::SharedPtr<NS::Array> archives = NS::TransferPtr(NS::Array::array(reinterpret_cast<NS::Object *const *>(&arc), 1)->retain());2189desc->setBinaryArchives(archives.get());2190if (archive_fail_on_miss) {2191options |= MTL::PipelineOptionFailOnBinaryArchiveMiss;2192}2193}21942195NS::Error *error = nullptr;2196NS::SharedPtr<MTL::ComputePipelineState> state = NS::TransferPtr(device->newComputePipelineState(desc.get(), options, nullptr, &error));2197ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String());2198ERR_FAIL_COND_V_MSG(!state, PipelineID(), "Failed to create compute pipeline state");21992200MDComputePipeline *pipeline = new MDComputePipeline(state);2201pipeline->compute_state.local = shader->local;2202pipeline->shader = shader;22032204if (arc) {2205if (arc->addComputePipelineFunctions(desc.get(), &error)) {2206archive_count += 1;2207} else {2208print_error(error->localizedDescription()->utf8String());2209}2210}22112212return PipelineID(pipeline);2213}22142215#pragma mark - Raytracing22162217// ----- ACCELERATION STRUCTURE -----22182219RDD::AccelerationStructureID RenderingDeviceDriverMetal::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_coun, BitField<AccelerationStructureGeometryBits> p_geometry_bits) {2220ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver.");2221}22222223uint32_t RenderingDeviceDriverMetal::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) {2224ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver.");2225}22262227void RenderingDeviceDriverMetal::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView<AccelerationStructureID> p_blases, VectorView<Transform3D> p_transforms) {2228ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2229}22302231RDD::AccelerationStructureID RenderingDeviceDriverMetal::tlas_create(BufferID p_instance_buffer) {2232ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver.");2233}22342235void RenderingDeviceDriverMetal::acceleration_structure_free(RDD::AccelerationStructureID p_acceleration_structure) {2236ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2237}22382239uint32_t RenderingDeviceDriverMetal::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) {2240ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver.");2241}22422243// ----- PIPELINE -----22442245RDD::RaytracingPipelineID RenderingDeviceDriverMetal::raytracing_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {2246ERR_FAIL_V_MSG(RaytracingPipelineID(), "Ray tracing is not currently supported by the Metal driver.");2247}22482249void RenderingDeviceDriverMetal::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) {2250ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2251}22522253// ----- COMMANDS -----22542255void RenderingDeviceDriverMetal::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) {2256ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2257}22582259void RenderingDeviceDriverMetal::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) {2260ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2261}22622263void RenderingDeviceDriverMetal::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {2264ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2265}22662267void RenderingDeviceDriverMetal::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) {2268ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");2269}22702271#pragma mark - Queries22722273// ----- TIMESTAMP -----22742275RDD::QueryPoolID RenderingDeviceDriverMetal::timestamp_query_pool_create(uint32_t p_query_count) {2276return QueryPoolID(1);2277}22782279void RenderingDeviceDriverMetal::timestamp_query_pool_free(QueryPoolID p_pool_id) {2280}22812282void RenderingDeviceDriverMetal::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {2283// Metal doesn't support timestamp queries, so we just clear the buffer.2284bzero(r_results, p_query_count * sizeof(uint64_t));2285}22862287uint64_t RenderingDeviceDriverMetal::timestamp_query_result_to_time(uint64_t p_result) {2288return p_result;2289}22902291void RenderingDeviceDriverMetal::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) {2292}22932294void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) {2295}22962297#pragma mark - Labels22982299void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {2300MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2301cb->begin_label(p_label_name, p_color);2302}23032304void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) {2305MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);2306cb->end_label();2307}23082309#pragma mark - Debug23102311void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {2312// TODO: Implement.2313}23142315#pragma mark - Submission23162317void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {2318_frame_index = p_frame_index;2319_frames_drawn = p_frames_drawn;2320}23212322void RenderingDeviceDriverMetal::end_segment() {2323MutexLock lock(copy_queue_mutex);2324_copy_queue_flush();2325}23262327#pragma mark - Misc23282329void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) {2330NS::String *label = conv::to_nsstring(p_name);23312332switch (p_type) {2333case OBJECT_TYPE_TEXTURE: {2334MTL::Texture *tex = reinterpret_cast<MTL::Texture *>(p_driver_id.id);2335tex->setLabel(label);2336} break;2337case OBJECT_TYPE_SAMPLER: {2338// Can't set label after creation.2339} break;2340case OBJECT_TYPE_BUFFER: {2341const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id;2342buf_info->metal_buffer.get()->setLabel(label);2343} break;2344case OBJECT_TYPE_SHADER: {2345MDShader *shader = (MDShader *)(p_driver_id.id);2346if (MDRenderShader *rs = dynamic_cast<MDRenderShader *>(shader); rs != nullptr) {2347rs->vert->set_label(label);2348rs->frag->set_label(label);2349} else if (MDComputeShader *cs = dynamic_cast<MDComputeShader *>(shader); cs != nullptr) {2350cs->kernel->set_label(label);2351} else {2352DEV_ASSERT(false);2353}2354} break;2355case OBJECT_TYPE_UNIFORM_SET: {2356MDUniformSet *set = (MDUniformSet *)(p_driver_id.id);2357set->arg_buffer->setLabel(label);2358} break;2359case OBJECT_TYPE_PIPELINE: {2360// Can't set label after creation.2361} break;2362default: {2363DEV_ASSERT(false);2364}2365}2366}23672368uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {2369switch (p_type) {2370case DRIVER_RESOURCE_LOGICAL_DEVICE: {2371return (uint64_t)(uintptr_t)device;2372}2373case DRIVER_RESOURCE_PHYSICAL_DEVICE: {2374return 0;2375}2376case DRIVER_RESOURCE_TOPMOST_OBJECT: {2377return 0;2378}2379case DRIVER_RESOURCE_COMMAND_QUEUE: {2380return (uint64_t)(uintptr_t)get_command_queue();2381}2382case DRIVER_RESOURCE_QUEUE_FAMILY: {2383return 0;2384}2385case DRIVER_RESOURCE_TEXTURE: {2386return p_driver_id.id;2387}2388case DRIVER_RESOURCE_TEXTURE_VIEW: {2389return p_driver_id.id;2390}2391case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: {2392return 0;2393}2394case DRIVER_RESOURCE_SAMPLER: {2395return p_driver_id.id;2396}2397case DRIVER_RESOURCE_UNIFORM_SET: {2398return 0;2399}2400case DRIVER_RESOURCE_BUFFER: {2401return p_driver_id.id;2402}2403case DRIVER_RESOURCE_COMPUTE_PIPELINE: {2404MDComputePipeline *pipeline = (MDComputePipeline *)(p_driver_id.id);2405return (uint64_t)(uintptr_t)pipeline->state.get();2406}2407case DRIVER_RESOURCE_RENDER_PIPELINE: {2408MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id);2409return (uint64_t)(uintptr_t)pipeline->state.get();2410}2411default: {2412return 0;2413}2414}2415}24162417void RenderingDeviceDriverMetal::_copy_queue_copy_to_buffer(Span<uint8_t> p_src_data, MTL::Buffer *p_dst_buffer, uint64_t p_dst_offset) {2418MutexLock lock(copy_queue_mutex);2419if (_copy_queue_buffer_available() < p_src_data.size()) {2420_copy_queue_flush();2421}24222423MTL::BlitCommandEncoder *blit_encoder = _copy_queue_blit_encoder();24242425memcpy(_copy_queue_buffer_ptr(), p_src_data.ptr(), p_src_data.size());24262427copy_queue_rs.get()->addAllocation(p_dst_buffer);2428blit_encoder->copyFromBuffer(copy_queue_buffer.get(), copy_queue_buffer_offset, p_dst_buffer, p_dst_offset, p_src_data.size());24292430_copy_queue_buffer_consume(p_src_data.size());2431}24322433void RenderingDeviceDriverMetal::_copy_queue_flush() {2434if (!copy_queue_blit_encoder) {2435return;2436}24372438copy_queue_rs.get()->addAllocation(copy_queue_buffer.get());2439copy_queue_rs.get()->commit();24402441copy_queue_blit_encoder.get()->endEncoding();2442copy_queue_blit_encoder.reset();2443copy_queue_command_buffer.get()->commit();2444copy_queue_command_buffer.get()->waitUntilCompleted();2445copy_queue_command_buffer.reset();2446copy_queue_buffer_offset = 0;2447copy_queue_rs.get()->removeAllAllocations();2448}24492450Error RenderingDeviceDriverMetal::_copy_queue_initialize() {2451DEV_ASSERT(!copy_queue);24522453copy_queue = NS::TransferPtr(device->newCommandQueue());2454copy_queue.get()->setLabel(MTLSTR("Copy Command Queue"));2455ERR_FAIL_COND_V(!copy_queue, ERR_CANT_CREATE);24562457// Reserve 64 KiB for copy commands. If the buffer fills, it will be flushed automatically.2458copy_queue_buffer = NS::TransferPtr(device->newBuffer(64 * 1024, MTL::ResourceStorageModeShared | MTL::ResourceHazardTrackingModeUntracked));2459copy_queue_buffer.get()->setLabel(MTLSTR("Copy Command Scratch Buffer"));24602461if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 1.0, *)) {2462MTL::ResidencySetDescriptor *rs_desc = MTL::ResidencySetDescriptor::alloc()->init();2463rs_desc->setInitialCapacity(2);2464rs_desc->setLabel(MTLSTR("Copy Queue Residency Set"));2465NS::Error *error = nullptr;2466copy_queue_rs = NS::TransferPtr(device->newResidencySet(rs_desc, &error));2467rs_desc->release();2468copy_queue.get()->addResidencySet(copy_queue_rs.get());2469}24702471return OK;2472}24732474uint64_t RenderingDeviceDriverMetal::get_total_memory_used() {2475return device->currentAllocatedSize();2476}24772478uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {2479return 0; // TODO: Track this (grep for memoryless in Godot's Metal backend).2480}24812482uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {2483MetalDeviceProperties const &props = (*device_properties);2484MetalLimits const &limits = props.limits;2485uint64_t safe_unbounded = ((uint64_t)1 << 30);2486#if defined(DEV_ENABLED)2487#define UNKNOWN(NAME) \2488case NAME: \2489WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \2490return safe_unbounded;2491#else2492#define UNKNOWN(NAME) \2493case NAME: \2494return safe_unbounded2495#endif24962497// clang-format off2498switch (p_limit) {2499case LIMIT_MAX_BOUND_UNIFORM_SETS:2500return limits.maxBoundDescriptorSets;2501case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:2502return limits.maxColorAttachments;2503case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:2504return limits.maxTexturesPerArgumentBuffer;2505case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:2506return limits.maxSamplersPerArgumentBuffer;2507case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:2508return limits.maxBuffersPerArgumentBuffer;2509case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:2510return limits.maxTexturesPerArgumentBuffer;2511case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:2512return limits.maxBuffersPerArgumentBuffer;2513case LIMIT_MAX_DRAW_INDEXED_INDEX:2514return limits.maxDrawIndexedIndexValue;2515case LIMIT_MAX_FRAMEBUFFER_HEIGHT:2516return limits.maxFramebufferHeight;2517case LIMIT_MAX_FRAMEBUFFER_WIDTH:2518return limits.maxFramebufferWidth;2519case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:2520return limits.maxImageArrayLayers;2521case LIMIT_MAX_TEXTURE_SIZE_1D:2522return limits.maxImageDimension1D;2523case LIMIT_MAX_TEXTURE_SIZE_2D:2524return limits.maxImageDimension2D;2525case LIMIT_MAX_TEXTURE_SIZE_3D:2526return limits.maxImageDimension3D;2527case LIMIT_MAX_TEXTURE_SIZE_CUBE:2528return limits.maxImageDimensionCube;2529case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:2530return limits.maxTexturesPerArgumentBuffer;2531case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:2532return limits.maxSamplersPerArgumentBuffer;2533case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:2534return limits.maxBuffersPerArgumentBuffer;2535case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:2536return limits.maxTexturesPerArgumentBuffer;2537case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:2538return limits.maxBuffersPerArgumentBuffer;2539case LIMIT_MAX_PUSH_CONSTANT_SIZE:2540return limits.maxBufferLength;2541case LIMIT_MAX_UNIFORM_BUFFER_SIZE:2542return limits.maxBufferLength;2543case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:2544return limits.maxVertexDescriptorLayoutStride;2545case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:2546return limits.maxVertexInputAttributes;2547case LIMIT_MAX_VERTEX_INPUT_BINDINGS:2548return limits.maxVertexInputBindings;2549case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:2550return limits.maxVertexInputBindingStride;2551case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:2552return limits.minUniformBufferOffsetAlignment;2553case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:2554return limits.maxComputeWorkGroupCount.width;2555case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:2556return limits.maxComputeWorkGroupCount.height;2557case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:2558return limits.maxComputeWorkGroupCount.depth;2559case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:2560return std::max({ limits.maxThreadsPerThreadGroup.width, limits.maxThreadsPerThreadGroup.height, limits.maxThreadsPerThreadGroup.depth });2561case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:2562return limits.maxThreadsPerThreadGroup.width;2563case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:2564return limits.maxThreadsPerThreadGroup.height;2565case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:2566return limits.maxThreadsPerThreadGroup.depth;2567case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:2568return limits.maxThreadGroupMemoryAllocation;2569case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:2570return limits.maxViewportDimensionX;2571case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:2572return limits.maxViewportDimensionY;2573case LIMIT_SUBGROUP_SIZE:2574// MoltenVK sets the subgroupSize to the same as the maxSubgroupSize.2575return limits.maxSubgroupSize;2576case LIMIT_SUBGROUP_MIN_SIZE:2577return limits.minSubgroupSize;2578case LIMIT_SUBGROUP_MAX_SIZE:2579return limits.maxSubgroupSize;2580case LIMIT_SUBGROUP_IN_SHADERS:2581return (uint64_t)limits.subgroupSupportedShaderStages;2582case LIMIT_SUBGROUP_OPERATIONS:2583return (uint64_t)limits.subgroupSupportedOperations;2584case LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE:2585return (uint64_t)((1.0 / limits.temporalScalerInputContentMaxScale) * 1000'000);2586case LIMIT_METALFX_TEMPORAL_SCALER_MAX_SCALE:2587return (uint64_t)((1.0 / limits.temporalScalerInputContentMinScale) * 1000'000);2588case LIMIT_MAX_SHADER_VARYINGS:2589return limits.maxShaderVaryings;2590default: {2591#ifdef DEV_ENABLED2592WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");2593#endif2594return safe_unbounded;2595}2596}2597// clang-format on2598return 0;2599}26002601uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {2602switch (p_trait) {2603case API_TRAIT_HONORS_PIPELINE_BARRIERS:2604return use_barriers;2605case API_TRAIT_CLEARS_WITH_COPY_ENGINE:2606return false;2607default:2608return RenderingDeviceDriver::api_trait_get(p_trait);2609}2610}26112612bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {2613switch (p_feature) {2614case SUPPORTS_HALF_FLOAT:2615return true;2616case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:2617return true;2618case SUPPORTS_BUFFER_DEVICE_ADDRESS:2619return device_properties->features.supports_gpu_address;2620case SUPPORTS_METALFX_SPATIAL:2621return device_properties->features.metal_fx_spatial;2622case SUPPORTS_METALFX_TEMPORAL:2623return device_properties->features.metal_fx_temporal;2624case SUPPORTS_IMAGE_ATOMIC_32_BIT:2625return device_properties->features.supports_native_image_atomics;2626case SUPPORTS_VULKAN_MEMORY_MODEL:2627return true;2628case SUPPORTS_POINT_SIZE:2629return true;2630default:2631return false;2632}2633}26342635const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capabilities() {2636return multiview_capabilities;2637}26382639const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverMetal::get_fragment_shading_rate_capabilities() {2640return fsr_capabilities;2641}26422643const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragment_density_map_capabilities() {2644return fdm_capabilities;2645}26462647String RenderingDeviceDriverMetal::get_api_version() const {2648return vformat("%d.%d", capabilities.version_major, capabilities.version_minor);2649}26502651String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const {2652return pipeline_cache_id;2653}26542655const RDD::Capabilities &RenderingDeviceDriverMetal::get_capabilities() const {2656return capabilities;2657}26582659bool RenderingDeviceDriverMetal::is_composite_alpha_supported(CommandQueueID p_queue) const {2660// The CAMetalLayer.opaque property is configured according to this global setting.2661return OS::get_singleton()->is_layered_allowed();2662}26632664size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const {2665return device->minimumLinearTextureAlignmentForPixelFormat(pixel_formats->getMTLPixelFormat(p_format));2666}26672668size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTL::PixelFormat p_format) const {2669return device->minimumLinearTextureAlignmentForPixelFormat(p_format);2670}26712672/******************/26732674RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) :2675context_driver(p_context_driver) {2676DEV_ASSERT(p_context_driver != nullptr);2677if (String res = OS::get_singleton()->get_environment("GODOT_MTL_ARCHIVE_FAIL_ON_MISS"); res == "1") {2678archive_fail_on_miss = true;2679}26802681#if TARGET_OS_OSX2682if (String res = OS::get_singleton()->get_environment("GODOT_MTL_SHADER_LOAD_STRATEGY"); res == U"lazy") {2683_shader_load_strategy = ShaderLoadStrategy::LAZY;2684}2685#else2686// Always use the lazy strategy on other OSs like iOS, tvOS, or visionOS.2687_shader_load_strategy = ShaderLoadStrategy::LAZY;2688#endif2689}26902691RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() {2692for (KeyValue<SHA256Digest, ShaderCacheEntry *> &kv : _shader_cache) {2693memdelete(kv.value);2694}26952696if (shader_container_format != nullptr) {2697memdelete(shader_container_format);2698}26992700if (pixel_formats != nullptr) {2701memdelete(pixel_formats);2702}27032704if (device_properties != nullptr) {2705memdelete(device_properties);2706}2707}27082709#pragma mark - Initialization27102711Error RenderingDeviceDriverMetal::_create_device() {2712device = context_driver->get_metal_device();27132714device_scope = NS::TransferPtr(MTL::CaptureManager::sharedCaptureManager()->newCaptureScope(device));2715device_scope->setLabel(MTLSTR("Godot Frame"));2716device_scope->beginScope(); // Allow Xcode to capture the first frame, if desired.27172718return OK;2719}27202721void RenderingDeviceDriverMetal::_track_resource(MTL::Resource *p_resource) {2722if (use_barriers) {2723_residency_add.push_back(p_resource);2724}2725}27262727void RenderingDeviceDriverMetal::_untrack_resource(MTL::Resource *p_resource) {2728if (use_barriers) {2729_residency_del.push_back(p_resource);2730}2731}27322733void RenderingDeviceDriverMetal::_check_capabilities() {2734capabilities.device_family = DEVICE_METAL;2735parse_msl_version(device_properties->features.msl_target_version, capabilities.version_major, capabilities.version_minor);2736}27372738API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))2739static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) {2740using DP = MetalDeviceProfile;2741NS::OperatingSystemVersion os_version = NS::ProcessInfo::processInfo()->operatingSystemVersion();2742MetalDeviceProfile res;2743res.min_os_version = MinOsVersion(os_version.majorVersion, os_version.minorVersion, os_version.patchVersion);2744#if TARGET_OS_OSX2745res.platform = DP::Platform::macOS;2746#elif TARGET_OS_IPHONE2747res.platform = DP::Platform::iOS;2748#elif TARGET_OS_VISION2749res.platform = DP::Platform::visionOS;2750#else2751#error "Unsupported Apple platform"2752#endif2753res.features = {2754.msl_version = p_device_properties->features.msl_target_version,2755.use_argument_buffers = p_device_properties->features.argument_buffers_enabled(),2756.simdPermute = p_device_properties->features.simdPermute,2757};27582759// highestFamily will only be set to an Apple GPU family2760switch (p_device_properties->features.highestFamily) {2761case MTL::GPUFamilyApple1:2762res.gpu = DP::GPU::Apple1;2763break;2764case MTL::GPUFamilyApple2:2765res.gpu = DP::GPU::Apple2;2766break;2767case MTL::GPUFamilyApple3:2768res.gpu = DP::GPU::Apple3;2769break;2770case MTL::GPUFamilyApple4:2771res.gpu = DP::GPU::Apple4;2772break;2773case MTL::GPUFamilyApple5:2774res.gpu = DP::GPU::Apple5;2775break;2776case MTL::GPUFamilyApple6:2777res.gpu = DP::GPU::Apple6;2778break;2779case MTL::GPUFamilyApple7:2780res.gpu = DP::GPU::Apple7;2781break;2782case MTL::GPUFamilyApple8:2783res.gpu = DP::GPU::Apple8;2784break;2785case MTL::GPUFamilyApple9:2786res.gpu = DP::GPU::Apple9;2787break;2788default: {2789// Programming error if the default case is hit.2790CRASH_NOW_MSG("Unsupported GPU family");2791} break;2792}27932794return res;2795}27962797Error RenderingDeviceDriverMetal::_initialize(uint32_t p_device_index, uint32_t p_frame_count) {2798context_device = context_driver->device_get(p_device_index);2799Error err = _create_device();2800ERR_FAIL_COND_V(err, ERR_CANT_CREATE);28012802device_properties = memnew(MetalDeviceProperties(device));2803device_profile = device_profile_from_properties(device_properties);2804resource_cache = std::make_unique<MDResourceCache>(device, *pixel_formats, device_properties->limits.maxPerStageBufferCount);2805shader_container_format = memnew(RenderingShaderContainerFormatMetal(&device_profile));28062807_check_capabilities();28082809err = _copy_queue_initialize();2810ERR_FAIL_COND_V(err, ERR_CANT_CREATE);28112812_frame_count = p_frame_count;28132814// Set the pipeline cache ID based on the Metal version.2815pipeline_cache_id = "metal-driver-" + get_api_version();28162817pixel_formats = memnew(PixelFormats(device, device_properties->features));2818if (device_properties->features.layeredRendering) {2819multiview_capabilities.is_supported = true;2820multiview_capabilities.max_view_count = device_properties->limits.maxViewports;2821// NOTE: I'm not sure what the limit is as I don't see it referenced anywhere2822multiview_capabilities.max_instance_count = UINT32_MAX;28232824print_verbose("- Metal multiview supported:");2825print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count));2826print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count));2827} else {2828print_verbose("- Metal multiview not supported");2829}28302831// The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer.2832if (device_properties->features.highestFamily < MTL::GPUFamilyApple4) {2833String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n");2834if (!device_properties->features.imageCubeArray) {2835error_string += "- No support for image cube arrays.\n";2836}28372838#if defined(APPLE_EMBEDDED_ENABLED)2839// Apple Embedded platforms exports currently don't exit themselves when this method returns `ERR_CANT_CREATE`.2840OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible).");2841#else2842OS::get_singleton()->alert(error_string + "\nClick OK to exit.");2843#endif28442845return ERR_CANT_CREATE;2846}28472848return OK;2849}28502851const RenderingShaderContainerFormat &RenderingDeviceDriverMetal::get_shader_container_format() const {2852return *shader_container_format;2853}285428552856