Path: blob/master/drivers/metal/metal_objects_shared.cpp
20919 views
/**************************************************************************/1/* metal_objects_shared.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "metal_objects_shared.h"3132#include "rendering_device_driver_metal.h"3334#include <os/signpost.h>35#include <simd/simd.h>36#include <string>3738#pragma mark - Resource Factory3940NS::SharedPtr<MTL::Function> MDResourceFactory::new_func(NS::String *p_source, NS::String *p_name, NS::Error **p_error) {41NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());42NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());43NS::Error *err = nullptr;44NS::SharedPtr<MTL::Library> mtlLib = NS::TransferPtr(device->newLibrary(p_source, options.get(), &err));45if (err) {46if (p_error != nullptr) {47*p_error = err;48}49}50return NS::TransferPtr(mtlLib->newFunction(p_name));51}5253NS::SharedPtr<MTL::Function> MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) {54NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());55char msl[1024];56snprintf(msl, sizeof(msl), R"(57#include <metal_stdlib>58using namespace metal;5960typedef struct {61float4 a_position [[attribute(0)]];62} AttributesPos;6364typedef struct {65float4 colors[9];66} ClearColorsIn;6768typedef struct {69float4 v_position [[position]];70uint layer%s;71} VaryingsPos;7273vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {74VaryingsPos varyings;75varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0);76varyings.layer = uint(attributes.a_position.w);77return varyings;78}79)",80p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX);8182return new_func(NS::String::string(msl, NS::UTF8StringEncoding), MTLSTR("vertClear"), nullptr);83}8485NS::SharedPtr<MTL::Function> MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) {86NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());87std::string msl;88msl.reserve(2048);8990msl += R"(91#include <metal_stdlib>92using namespace metal;9394typedef struct {95float4 v_position [[position]];96} VaryingsPos;9798typedef struct {99float4 colors[9];100} ClearColorsIn;101102typedef struct {103)";104105char line[128];106for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {107if (p_key.is_enabled(caIdx)) {108const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]);109snprintf(line, sizeof(line), " %s4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx);110msl += line;111}112}113msl += R"(} ClearColorsOut;114115fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {116117ClearColorsOut ccOut;118)";119for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {120if (p_key.is_enabled(caIdx)) {121const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]);122snprintf(line, sizeof(line), " ccOut.color%u = %s4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx);123msl += line;124}125}126msl += R"( return ccOut;127})";128129return new_func(NS::String::string(msl.c_str(), NS::UTF8StringEncoding), MTLSTR("fragClear"), nullptr);130}131132const char *MDResourceFactory::get_format_type_string(MTL::PixelFormat p_fmt) const {133switch (pixel_formats.getFormatType(p_fmt)) {134case MTLFormatType::ColorInt8:135case MTLFormatType::ColorInt16:136return "short";137case MTLFormatType::ColorUInt8:138case MTLFormatType::ColorUInt16:139return "ushort";140case MTLFormatType::ColorInt32:141return "int";142case MTLFormatType::ColorUInt32:143return "uint";144case MTLFormatType::ColorHalf:145return "half";146case MTLFormatType::ColorFloat:147case MTLFormatType::DepthStencil:148case MTLFormatType::Compressed:149return "float";150case MTLFormatType::None:151default:152return "unexpected_MTLPixelFormatInvalid";153}154}155156NS::SharedPtr<MTL::DepthStencilState> MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {157NS::SharedPtr<MTL::DepthStencilDescriptor> dsDesc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init());158dsDesc->setDepthCompareFunction(MTL::CompareFunctionAlways);159dsDesc->setDepthWriteEnabled(p_use_depth);160161if (p_use_stencil) {162NS::SharedPtr<MTL::StencilDescriptor> sDesc = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());163sDesc->setStencilCompareFunction(MTL::CompareFunctionAlways);164sDesc->setStencilFailureOperation(MTL::StencilOperationReplace);165sDesc->setDepthFailureOperation(MTL::StencilOperationReplace);166sDesc->setDepthStencilPassOperation(MTL::StencilOperationReplace);167168dsDesc->setFrontFaceStencil(sDesc.get());169dsDesc->setBackFaceStencil(sDesc.get());170} else {171dsDesc->setFrontFaceStencil(nullptr);172dsDesc->setBackFaceStencil(nullptr);173}174175return NS::TransferPtr(device->newDepthStencilState(dsDesc.get()));176}177178NS::SharedPtr<MTL::RenderPipelineState> MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {179NS::SharedPtr<MTL::Function> vtxFunc = new_clear_vert_func(p_key);180NS::SharedPtr<MTL::Function> fragFunc = new_clear_frag_func(p_key);181NS::SharedPtr<MTL::RenderPipelineDescriptor> plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());182plDesc->setLabel(MTLSTR("ClearRenderAttachments"));183plDesc->setVertexFunction(vtxFunc.get());184plDesc->setFragmentFunction(fragFunc.get());185plDesc->setRasterSampleCount(p_key.sample_count);186plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);187188for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {189MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(caIdx);190colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[caIdx]);191colorDesc->setWriteMask(p_key.is_enabled(caIdx) ? MTL::ColorWriteMaskAll : MTL::ColorWriteMaskNone);192}193194MTL::PixelFormat mtlDepthFormat = (MTL::PixelFormat)p_key.depth_format();195if (pixel_formats.isDepthFormat(mtlDepthFormat)) {196plDesc->setDepthAttachmentPixelFormat(mtlDepthFormat);197}198199MTL::PixelFormat mtlStencilFormat = (MTL::PixelFormat)p_key.stencil_format();200if (pixel_formats.isStencilFormat(mtlStencilFormat)) {201plDesc->setStencilAttachmentPixelFormat(mtlStencilFormat);202}203204MTL::VertexDescriptor *vtxDesc = plDesc->vertexDescriptor();205206// Vertex attribute descriptors.207NS::UInteger vtxBuffIdx = get_vertex_buffer_index(VERT_CONTENT_BUFFER_INDEX);208NS::UInteger vtxStride = 0;209210// Vertex location.211MTL::VertexAttributeDescriptor *vaDesc = vtxDesc->attributes()->object(0);212vaDesc->setFormat(MTL::VertexFormatFloat4);213vaDesc->setBufferIndex(vtxBuffIdx);214vaDesc->setOffset(vtxStride);215vtxStride += sizeof(simd::float4);216217// Vertex attribute buffer.218MTL::VertexBufferLayoutDescriptor *vbDesc = vtxDesc->layouts()->object(vtxBuffIdx);219vbDesc->setStepFunction(MTL::VertexStepFunctionPerVertex);220vbDesc->setStepRate(1);221vbDesc->setStride(vtxStride);222223NS::Error *err = nullptr;224NS::SharedPtr<MTL::RenderPipelineState> state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err));225if (p_error != nullptr) {226*p_error = err;227}228return state;229}230231NS::SharedPtr<MTL::RenderPipelineState> MDResourceFactory::new_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {232DEV_ASSERT(!p_key.is_layered_rendering_enabled());233DEV_ASSERT(p_key.is_enabled(0));234DEV_ASSERT(!p_key.is_depth_enabled());235DEV_ASSERT(!p_key.is_stencil_enabled());236237NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());238static const char *msl = R"(#include <metal_stdlib>239using namespace metal;240241struct FullscreenNoopOut {242float4 position [[position]];243};244245vertex FullscreenNoopOut fullscreenNoopVert(uint vid [[vertex_id]]) {246float2 positions[3] = { float2(-1.0, -1.0), float2(3.0, -1.0), float2(-1.0, 3.0) };247float2 pos = positions[vid];248249FullscreenNoopOut out;250out.position = float4(pos, 0.0, 1.0);251return out;252}253254fragment void fullscreenNoopFrag(float4 gl_FragCoord [[position]]) {255}256)";257258NS::Error *err = nullptr;259NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());260NS::SharedPtr<MTL::Library> mtlLib = NS::TransferPtr(device->newLibrary(NS::String::string(msl, NS::UTF8StringEncoding), options.get(), &err));261if (err && p_error != nullptr) {262*p_error = err;263}264265if (mtlLib.get() == nullptr) {266return {};267}268269NS::SharedPtr<MTL::Function> vtxFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopVert")));270NS::SharedPtr<MTL::Function> fragFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopFrag")));271272NS::SharedPtr<MTL::RenderPipelineDescriptor> plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());273plDesc->setLabel(MTLSTR("EmptyDrawFullscreenTriangle"));274plDesc->setVertexFunction(vtxFunc.get());275plDesc->setFragmentFunction(fragFunc.get());276plDesc->setRasterSampleCount(p_key.sample_count ? p_key.sample_count : 1);277plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);278279MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(0);280colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[0]);281colorDesc->setWriteMask(MTL::ColorWriteMaskNone);282283err = nullptr;284NS::SharedPtr<MTL::RenderPipelineState> state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err));285if (p_error != nullptr && err != nullptr) {286*p_error = err;287}288return state;289}290291#pragma mark - Resource Cache292293MTL::RenderPipelineState *MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {294HashMap::ConstIterator it = clear_states.find(p_key);295if (it != clear_states.end()) {296return it->value.get();297}298299NS::SharedPtr<MTL::RenderPipelineState> state = resource_factory->new_clear_pipeline_state(p_key, p_error);300MTL::RenderPipelineState *result = state.get();301clear_states[p_key] = std::move(state);302return result;303}304305MTL::RenderPipelineState *MDResourceCache::get_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {306HashMap::ConstIterator it = empty_draw_states.find(p_key);307if (it != empty_draw_states.end()) {308return it->value.get();309}310311NS::SharedPtr<MTL::RenderPipelineState> state = resource_factory->new_empty_draw_pipeline_state(p_key, p_error);312MTL::RenderPipelineState *result = state.get();313empty_draw_states[p_key] = std::move(state);314return result;315}316317MTL::DepthStencilState *MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {318if (p_use_depth && p_use_stencil) {319if (!clear_depth_stencil_state.all) {320clear_depth_stencil_state.all = resource_factory->new_depth_stencil_state(true, true);321}322return clear_depth_stencil_state.all.get();323} else if (p_use_depth) {324if (!clear_depth_stencil_state.depth_only) {325clear_depth_stencil_state.depth_only = resource_factory->new_depth_stencil_state(true, false);326}327return clear_depth_stencil_state.depth_only.get();328} else if (p_use_stencil) {329if (!clear_depth_stencil_state.stencil_only) {330clear_depth_stencil_state.stencil_only = resource_factory->new_depth_stencil_state(false, true);331}332return clear_depth_stencil_state.stencil_only.get();333} else {334if (!clear_depth_stencil_state.none) {335clear_depth_stencil_state.none = resource_factory->new_depth_stencil_state(false, false);336}337return clear_depth_stencil_state.none.get();338}339}340341#pragma mark - Render Pass Types342343MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const {344MTLFmtCaps caps = kMTLFmtCapsNone;345346for (RDD::AttachmentReference const &ar : input_references) {347if (ar.attachment == p_index) {348flags::set(caps, kMTLFmtCapsRead);349break;350}351}352353for (RDD::AttachmentReference const &ar : color_references) {354if (ar.attachment == p_index) {355flags::set(caps, kMTLFmtCapsColorAtt);356break;357}358}359360for (RDD::AttachmentReference const &ar : resolve_references) {361if (ar.attachment == p_index) {362flags::set(caps, kMTLFmtCapsResolve);363break;364}365}366367if (depth_stencil_reference.attachment == p_index) {368flags::set(caps, kMTLFmtCapsDSAtt);369}370371return caps;372}373374void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) {375firstUseSubpassIndex = UINT32_MAX;376lastUseSubpassIndex = 0;377378for (MDSubpass const &subpass : p_pass.subpasses) {379MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index);380if (reqCaps) {381firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex);382lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex);383}384}385}386387MTL::StoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass,388bool p_is_rendering_entire_area,389bool p_has_resolve,390bool p_can_resolve,391bool p_is_stencil) const {392if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) {393return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore;394}395396switch (p_is_stencil ? stencilStoreAction : storeAction) {397case MTL::StoreActionStore:398return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore;399case MTL::StoreActionDontCare:400return p_has_resolve ? (p_can_resolve ? MTL::StoreActionMultisampleResolve : MTL::StoreActionStore) : MTL::StoreActionDontCare;401402default:403return MTL::StoreActionStore;404}405}406407bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const {408// If the subpass is not the first subpass to use this attachment, don't clear this attachment.409if (p_subpass.subpass_index != firstUseSubpassIndex) {410return false;411}412return (p_is_stencil ? stencilLoadAction : loadAction) == MTL::LoadActionClear;413}414415MDRenderPass::MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses) :416attachments(p_attachments), subpasses(p_subpasses) {417for (MDAttachment &att : attachments) {418att.linkToSubpass(*this);419}420}421422#pragma mark - Command Buffer Base423424void MDCommandBufferBase::retain_resource(CFTypeRef p_resource) {425CFRetain(p_resource);426_retained_resources.push_back(p_resource);427}428429void MDCommandBufferBase::release_resources() {430for (CFTypeRef r : _retained_resources) {431CFRelease(r);432}433_retained_resources.clear();434}435436void MDCommandBufferBase::render_set_viewport(VectorView<Rect2i> p_viewports) {437RenderStateBase &state = get_render_state_base();438state.viewports.resize(p_viewports.size());439for (uint32_t i = 0; i < p_viewports.size(); i += 1) {440Rect2i const &vp = p_viewports[i];441state.viewports[i] = {442.originX = static_cast<double>(vp.position.x),443.originY = static_cast<double>(vp.position.y),444.width = static_cast<double>(vp.size.width),445.height = static_cast<double>(vp.size.height),446.znear = 0.0,447.zfar = 1.0,448};449}450state.dirty.set_flag(RenderStateBase::DIRTY_VIEWPORT);451}452453void MDCommandBufferBase::render_set_scissor(VectorView<Rect2i> p_scissors) {454RenderStateBase &state = get_render_state_base();455state.scissors.resize(p_scissors.size());456for (uint32_t i = 0; i < p_scissors.size(); i += 1) {457Rect2i const &vp = p_scissors[i];458state.scissors[i] = {459.x = static_cast<NS::UInteger>(vp.position.x),460.y = static_cast<NS::UInteger>(vp.position.y),461.width = static_cast<NS::UInteger>(vp.size.width),462.height = static_cast<NS::UInteger>(vp.size.height),463};464}465state.dirty.set_flag(RenderStateBase::DIRTY_SCISSOR);466}467468void MDCommandBufferBase::render_set_blend_constants(const Color &p_constants) {469DEV_ASSERT(type == MDCommandBufferStateType::Render);470RenderStateBase &state = get_render_state_base();471if (state.blend_constants != p_constants) {472state.blend_constants = p_constants;473state.dirty.set_flag(RenderStateBase::DIRTY_BLEND);474}475}476477void MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects) {478uint32_t idx = 0;479for (uint32_t i = 0; i < p_rects.size(); i++) {480Rect2i const &rect = p_rects[i];481idx = _populate_vertices(p_vertices, idx, rect, p_fb_size);482}483}484485uint32_t MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) {486// Determine the positions of the four edges of the487// clear rectangle as a fraction of the attachment size.488float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width;489float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos;490float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height;491float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos;492493// Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space.494leftPos = (leftPos * 2.0f) - 1.0f;495rightPos = (rightPos * 2.0f) - 1.0f;496bottomPos = (bottomPos * 2.0f) - 1.0f;497topPos = (topPos * 2.0f) - 1.0f;498499simd::float4 vtx;500501uint32_t idx = p_index;502uint32_t endLayer = get_current_view_count();503504for (uint32_t layer = 0; layer < endLayer; layer++) {505vtx.z = 0.0;506vtx.w = (float)layer;507508// Top left vertex - First triangle.509vtx.y = topPos;510vtx.x = leftPos;511p_vertices[idx++] = vtx;512513// Bottom left vertex.514vtx.y = bottomPos;515vtx.x = leftPos;516p_vertices[idx++] = vtx;517518// Bottom right vertex.519vtx.y = bottomPos;520vtx.x = rightPos;521p_vertices[idx++] = vtx;522523// Bottom right vertex - Second triangle.524p_vertices[idx++] = vtx;525526// Top right vertex.527vtx.y = topPos;528vtx.x = rightPos;529p_vertices[idx++] = vtx;530531// Top left vertex.532vtx.y = topPos;533vtx.x = leftPos;534p_vertices[idx++] = vtx;535}536537return idx;538}539540void MDCommandBufferBase::_end_render_pass() {541MDFrameBuffer const &fb_info = *get_frame_buffer();542MDSubpass const &subpass = get_current_subpass();543544PixelFormats &pf = device_driver->get_pixel_formats();545546for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {547uint32_t color_index = subpass.color_references[i].attachment;548uint32_t resolve_index = subpass.resolve_references[i].attachment;549DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED));550if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) {551continue;552}553554MTL::Texture *resolve_tex = fb_info.get_texture(resolve_index);555556CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex->pixelFormat()), kMTLFmtCapsResolve), "not implemented: unresolvable texture types");557// see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407558}559560end_render_encoding();561}562563void MDCommandBufferBase::_render_clear_render_area() {564MDRenderPass const &pass = *get_render_pass();565MDSubpass const &subpass = get_current_subpass();566LocalVector<RDD::RenderPassClearValue> &clear_values = get_clear_values();567568uint32_t ds_index = subpass.depth_stencil_reference.attachment;569bool clear_depth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false));570bool clear_stencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true));571572uint32_t color_count = subpass.color_references.size();573uint32_t clears_size = color_count + (clear_depth || clear_stencil ? 1 : 0);574if (clears_size == 0) {575return;576}577578RDD::AttachmentClear *clears = ALLOCA_ARRAY(RDD::AttachmentClear, clears_size);579uint32_t clears_count = 0;580581for (uint32_t i = 0; i < color_count; i++) {582uint32_t idx = subpass.color_references[i].attachment;583if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) {584clears[clears_count++] = { .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = clear_values[idx] };585}586}587588if (clear_depth || clear_stencil) {589MDAttachment const &attachment = pass.attachments[ds_index];590BitField<RDD::TextureAspectBits> bits = {};591if (clear_depth && attachment.type & MDAttachmentType::Depth) {592bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT);593}594if (clear_stencil && attachment.type & MDAttachmentType::Stencil) {595bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT);596}597598clears[clears_count++] = { .aspect = bits, .color_attachment = ds_index, .value = clear_values[ds_index] };599}600601if (clears_count == 0) {602return;603}604605render_clear_attachments(VectorView(clears, clears_count), { get_render_area() });606}607608void MDCommandBufferBase::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {609switch (type) {610case MDCommandBufferStateType::Render:611case MDCommandBufferStateType::Compute: {612MDShader *shader = (MDShader *)(p_shader.id);613if (shader->push_constants.binding == UINT32_MAX) {614return;615}616push_constant_binding = shader->push_constants.binding;617void const *ptr = p_data.ptr();618push_constant_data_len = p_data.size() * sizeof(uint32_t);619DEV_ASSERT(push_constant_data_len <= sizeof(push_constant_data));620memcpy(push_constant_data, ptr, push_constant_data_len);621if (push_constant_data_len > 0) {622mark_push_constants_dirty();623}624} break;625case MDCommandBufferStateType::Blit:626case MDCommandBufferStateType::None:627return;628}629}630631#pragma mark - Metal Library632633static const char *SHADER_STAGE_NAMES[] = {634[RD::SHADER_STAGE_VERTEX] = "vert",635[RD::SHADER_STAGE_FRAGMENT] = "frag",636[RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl",637[RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval",638[RD::SHADER_STAGE_COMPUTE] = "comp",639};640641void ShaderCacheEntry::notify_free() const {642owner.shader_cache_free_entry(key);643}644645#pragma mark - MDLibrary646647MDLibrary::MDLibrary(ShaderCacheEntry *p_entry648#ifdef DEV_ENABLED649,650NS::String *p_source651#endif652) :653_entry(p_entry) {654#ifdef DEV_ENABLED655_original_source = NS::RetainPtr(p_source);656#endif657}658659MDLibrary::~MDLibrary() {660_entry->notify_free();661}662663void MDLibrary::set_label(NS::String *p_label) {664}665666#pragma mark - MDLazyLibrary667668/// Loads the MTLLibrary when the library is first accessed.669class MDLazyLibrary final : public MDLibrary {670NS::SharedPtr<MTL::Library> _library;671NS::Error *_error = nullptr;672std::shared_mutex _mu;673bool _loaded = false;674MTL::Device *_device = nullptr;675NS::SharedPtr<NS::String> _source;676NS::SharedPtr<MTL::CompileOptions> _options;677678void _load();679680public:681MDLazyLibrary(ShaderCacheEntry *p_entry,682MTL::Device *p_device,683NS::String *p_source,684MTL::CompileOptions *p_options);685686MTL::Library *get_library() override;687NS::Error *get_error() override;688};689690MDLazyLibrary::MDLazyLibrary(ShaderCacheEntry *p_entry,691MTL::Device *p_device,692NS::String *p_source,693MTL::CompileOptions *p_options) :694MDLibrary(p_entry695#ifdef DEV_ENABLED696,697p_source698#endif699),700_device(p_device),701_source(NS::RetainPtr(p_source)),702_options(NS::RetainPtr(p_options)) {703}704705void MDLazyLibrary::_load() {706{707std::shared_lock<std::shared_mutex> lock(_mu);708if (_loaded) {709return;710}711}712713std::unique_lock<std::shared_mutex> lock(_mu);714if (_loaded) {715return;716}717718os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this;719os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",720"shader_name=%{public}s stage=%{public}s hash=%X",721_entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha());722NS::Error *error = nullptr;723_library = NS::TransferPtr(_device->newLibrary(_source.get(), _options.get(), &error));724os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");725_error = error;726_device = nullptr;727_source.reset();728_options.reset();729_loaded = true;730}731732MTL::Library *MDLazyLibrary::get_library() {733_load();734return _library.get();735}736737NS::Error *MDLazyLibrary::get_error() {738_load();739return _error;740}741742#pragma mark - MDImmediateLibrary743744/// Loads the MTLLibrary immediately on initialization, using Metal's async compilation API.745class MDImmediateLibrary final : public MDLibrary {746NS::SharedPtr<MTL::Library> _library;747NS::Error *_error = nullptr;748std::mutex _cv_mutex;749std::condition_variable _cv;750std::atomic<bool> _complete{ false };751bool _ready = false;752753public:754MDImmediateLibrary(ShaderCacheEntry *p_entry,755MTL::Device *p_device,756NS::String *p_source,757MTL::CompileOptions *p_options);758759MTL::Library *get_library() override;760NS::Error *get_error() override;761};762763MDImmediateLibrary::MDImmediateLibrary(ShaderCacheEntry *p_entry,764MTL::Device *p_device,765NS::String *p_source,766MTL::CompileOptions *p_options) :767MDLibrary(p_entry768#ifdef DEV_ENABLED769,770p_source771#endif772) {773os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this;774os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",775"shader_name=%{public}s stage=%{public}s hash=%X",776p_entry->name.get_data(), SHADER_STAGE_NAMES[p_entry->stage], p_entry->key.short_sha());777778// Use Metal's async compilation API with std::function callback.779p_device->newLibrary(p_source, p_options, [this, compile_id, p_entry](MTL::Library *library, NS::Error *error) {780os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");781if (library) {782_library = NS::RetainPtr(library);783}784_error = error;785if (error) {786ERR_PRINT(vformat(U"Error compiling shader %s: %s", p_entry->name.get_data(), error->localizedDescription()->utf8String()));787}788789{790std::lock_guard<std::mutex> lock(_cv_mutex);791_ready = true;792}793_cv.notify_all();794_complete = true;795});796}797798MTL::Library *MDImmediateLibrary::get_library() {799if (!_complete) {800std::unique_lock<std::mutex> lock(_cv_mutex);801_cv.wait(lock, [this] { return _ready; });802}803return _library.get();804}805806NS::Error *MDImmediateLibrary::get_error() {807if (!_complete) {808std::unique_lock<std::mutex> lock(_cv_mutex);809_cv.wait(lock, [this] { return _ready; });810}811return _error;812}813814#pragma mark - MDBinaryLibrary815816/// Loads the MTLLibrary from pre-compiled binary data.817class MDBinaryLibrary final : public MDLibrary {818NS::SharedPtr<MTL::Library> _library;819NS::Error *_error = nullptr;820821public:822MDBinaryLibrary(ShaderCacheEntry *p_entry,823MTL::Device *p_device,824#ifdef DEV_ENABLED825NS::String *p_source,826#endif827dispatch_data_t p_data);828829MTL::Library *get_library() override;830NS::Error *get_error() override;831};832833MDBinaryLibrary::MDBinaryLibrary(ShaderCacheEntry *p_entry,834MTL::Device *p_device,835#ifdef DEV_ENABLED836NS::String *p_source,837#endif838dispatch_data_t p_data) :839MDLibrary(p_entry840#ifdef DEV_ENABLED841,842p_source843#endif844) {845NS::Error *error = nullptr;846_library = NS::TransferPtr(p_device->newLibrary(p_data, &error));847if (error != nullptr) {848_error = error;849ERR_PRINT(vformat("Unable to load shader library: %s", error->localizedDescription()->utf8String()));850}851}852853MTL::Library *MDBinaryLibrary::get_library() {854return _library.get();855}856857NS::Error *MDBinaryLibrary::get_error() {858return _error;859}860861#pragma mark - MDLibrary Factory Methods862863std::shared_ptr<MDLibrary> MDLibrary::create(ShaderCacheEntry *p_entry,864MTL::Device *p_device,865NS::String *p_source,866MTL::CompileOptions *p_options,867ShaderLoadStrategy p_strategy) {868std::shared_ptr<MDLibrary> lib;869switch (p_strategy) {870case ShaderLoadStrategy::IMMEDIATE:871[[fallthrough]];872default:873lib = std::make_shared<MDImmediateLibrary>(p_entry, p_device, p_source, p_options);874break;875case ShaderLoadStrategy::LAZY:876lib = std::make_shared<MDLazyLibrary>(p_entry, p_device, p_source, p_options);877break;878}879p_entry->library = lib;880return lib;881}882883std::shared_ptr<MDLibrary> MDLibrary::create(ShaderCacheEntry *p_entry,884MTL::Device *p_device,885#ifdef DEV_ENABLED886NS::String *p_source,887#endif888dispatch_data_t p_data) {889std::shared_ptr<MDLibrary> lib = std::make_shared<MDBinaryLibrary>(p_entry, p_device,890#ifdef DEV_ENABLED891p_source,892#endif893p_data);894p_entry->library = lib;895return lib;896}897898899