Path: blob/master/drivers/metal/metal_device_properties.cpp
20919 views
/**************************************************************************/1/* metal_device_properties.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930/**************************************************************************/31/* */32/* Portions of this code were derived from MoltenVK. */33/* */34/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */35/* (http://www.brenwill.com) */36/* */37/* Licensed under the Apache License, Version 2.0 (the "License"); */38/* you may not use this file except in compliance with the License. */39/* You may obtain a copy of the License at */40/* */41/* http://www.apache.org/licenses/LICENSE-2.0 */42/* */43/* Unless required by applicable law or agreed to in writing, software */44/* distributed under the License is distributed on an "AS IS" BASIS, */45/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */46/* implied. See the License for the specific language governing */47/* permissions and limitations under the License. */48/**************************************************************************/4950#include "metal_device_properties.h"5152#include "metal_utils.h"5354#include "servers/rendering/renderer_rd/effects/metal_fx.h"5556#include <MetalFX/MetalFX.hpp>57#include <spirv_cross.hpp>58#include <spirv_msl.hpp>5960#include <unistd.h>6162// Common scaling multipliers.63#define KIBI (1024)64#define MEBI (KIBI * KIBI)6566#if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 140000) || (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MAX_ALLOWED < 170000)67constexpr MTL::GPUFamily GPUFamilyApple9 = static_cast<MTL::GPUFamily>(1009);68#else69constexpr MTL::GPUFamily GPUFamilyApple9 = MTL::GPUFamilyApple9;70#endif7172API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0), visionos(1.0))73MTL::GPUFamily &operator--(MTL::GPUFamily &p_family) {74p_family = static_cast<MTL::GPUFamily>(static_cast<int>(p_family) - 1);75if (p_family < MTL::GPUFamilyApple1) {76p_family = GPUFamilyApple9;77}7879return p_family;80}8182void MetalDeviceProperties::init_features(MTL::Device *p_device) {83features = {};8485MTL::CompileOptions *opts = MTL::CompileOptions::alloc()->init();86MTL::LanguageVersion lang_version = opts->languageVersion();87features.msl_max_version = make_msl_version((static_cast<uint32_t>(lang_version) >> 0x10) & 0xff, (static_cast<uint32_t>(lang_version) >> 0x00) & 0xff);88features.msl_target_version = features.msl_max_version;89opts->release();90if (String version = OS::get_singleton()->get_environment("GODOT_MTL_TARGET_VERSION"); !version.is_empty()) {91if (version != "max") {92Vector<String> parts = version.split(".", true, 2);93if (parts.size() == 2) {94uint32_t major = parts[0].to_int();95uint32_t minor = parts[1].to_int();96uint32_t msl_version = make_msl_version(major, minor);97if (msl_version < MSL_VERSION_23 || msl_version > MSL_VERSION_40) {98WARN_PRINT(vformat("GODOT_MTL_TARGET_VERSION: invalid MSL version '%d.%d'", major, minor));99} else {100print_line(vformat("Override: Targeting Metal version %d.%d", major, minor));101features.msl_target_version = msl_version;102}103} else {104WARN_PRINT("GODOT_MTL_TARGET_VERSION: invalid version string format. Expected major.minor or 'max'.");105}106}107}108109features.highestFamily = MTL::GPUFamilyApple1;110for (MTL::GPUFamily family = GPUFamilyApple9; family >= MTL::GPUFamilyApple1; --family) {111if (p_device->supportsFamily(family)) {112features.highestFamily = family;113break;114}115}116117if (__builtin_available(macOS 11, iOS 16.4, tvOS 16.4, *)) {118features.supportsBCTextureCompression = p_device->supportsBCTextureCompression();119} else {120features.supportsBCTextureCompression = false;121}122123#if TARGET_OS_OSX124features.supportsDepth24Stencil8 = p_device->isDepth24Stencil8PixelFormatSupported();125#endif126127if (__builtin_available(macOS 11.0, iOS 14.0, tvOS 14.0, *)) {128features.supports32BitFloatFiltering = p_device->supports32BitFloatFiltering();129features.supports32BitMSAA = p_device->supports32BitMSAA();130}131132if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {133features.supports_gpu_address = true;134}135136features.hostMemoryPageSize = sysconf(_SC_PAGESIZE);137138for (SampleCount sc = SampleCount1; sc <= SampleCount64; sc <<= 1) {139if (p_device->supportsTextureSampleCount(sc)) {140features.supportedSampleCounts |= sc;141}142}143144features.layeredRendering = p_device->supportsFamily(MTL::GPUFamilyApple5);145features.multisampleLayeredRendering = p_device->supportsFamily(MTL::GPUFamilyApple7);146features.tessellationShader = p_device->supportsFamily(MTL::GPUFamilyApple3);147features.imageCubeArray = p_device->supportsFamily(MTL::GPUFamilyApple3);148features.quadPermute = p_device->supportsFamily(MTL::GPUFamilyApple4);149features.simdPermute = p_device->supportsFamily(MTL::GPUFamilyApple6);150features.simdReduction = p_device->supportsFamily(MTL::GPUFamilyApple7);151features.argument_buffers_tier = p_device->argumentBuffersSupport();152features.supports_image_atomic_32_bit = p_device->supportsFamily(MTL::GPUFamilyApple6);153features.supports_image_atomic_64_bit = p_device->supportsFamily(GPUFamilyApple9) || (p_device->supportsFamily(MTL::GPUFamilyApple8) && p_device->supportsFamily(MTL::GPUFamilyMac2));154155if (features.msl_target_version >= MSL_VERSION_31) {156// Native atomics are only supported on 3.1 and above.157if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) {158features.supports_native_image_atomics = true;159}160}161162if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {163features.supports_native_image_atomics = false;164}165166if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {167features.supports_residency_sets = true;168} else {169features.supports_residency_sets = false;170}171172if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {173features.needs_arg_encoders = !(p_device->supportsFamily(MTL::GPUFamilyMetal3) && features.argument_buffers_tier == MTL::ArgumentBuffersTier2);174}175176if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {177features.use_argument_buffers = false;178}179180if (__builtin_available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {181features.metal_fx_spatial = MTLFX::SpatialScalerDescriptor::supportsDevice(p_device);182#ifdef METAL_MFXTEMPORAL_ENABLED183features.metal_fx_temporal = MTLFX::TemporalScalerDescriptor::supportsDevice(p_device);184#else185features.metal_fx_temporal = false;186#endif187}188}189190void MetalDeviceProperties::init_limits(MTL::Device *p_device) {191using std::max;192using std::min;193194// FST: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf195196// FST: Maximum number of layers per 1D texture array, 2D texture array, or 3D texture.197limits.maxImageArrayLayers = 2048;198if (p_device->supportsFamily(MTL::GPUFamilyApple3)) {199// FST: Maximum 2D texture width and height.200limits.maxFramebufferWidth = 16384;201limits.maxFramebufferHeight = 16384;202limits.maxViewportDimensionX = 16384;203limits.maxViewportDimensionY = 16384;204// FST: Maximum 1D texture width.205limits.maxImageDimension1D = 16384;206// FST: Maximum 2D texture width and height.207limits.maxImageDimension2D = 16384;208// FST: Maximum cube map texture width and height.209limits.maxImageDimensionCube = 16384;210} else {211// FST: Maximum 2D texture width and height.212limits.maxFramebufferWidth = 8192;213limits.maxFramebufferHeight = 8192;214limits.maxViewportDimensionX = 8192;215limits.maxViewportDimensionY = 8192;216// FST: Maximum 1D texture width.217limits.maxImageDimension1D = 8192;218// FST: Maximum 2D texture width and height.219limits.maxImageDimension2D = 8192;220// FST: Maximum cube map texture width and height.221limits.maxImageDimensionCube = 8192;222}223// FST: Maximum 3D texture width, height, and depth.224limits.maxImageDimension3D = 2048;225226limits.maxThreadsPerThreadGroup = p_device->maxThreadsPerThreadgroup();227// No effective limits.228limits.maxComputeWorkGroupCount = { std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max() };229// https://github.com/KhronosGroup/MoltenVK/blob/568cc3acc0e2299931fdaecaaa1fc3ec5b4af281/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h#L85230limits.maxBoundDescriptorSets = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;231// FST: Maximum number of color render targets per render pass descriptor.232limits.maxColorAttachments = 8;233234// Maximum number of textures the device can access, per stage, from an argument buffer.235if (p_device->supportsFamily(MTL::GPUFamilyApple6)) {236limits.maxTexturesPerArgumentBuffer = 1'000'000;237} else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) {238limits.maxTexturesPerArgumentBuffer = 96;239} else {240limits.maxTexturesPerArgumentBuffer = 31;241}242243// Maximum number of samplers the device can access, per stage, from an argument buffer.244if (p_device->supportsFamily(MTL::GPUFamilyApple6)) {245limits.maxSamplersPerArgumentBuffer = 1024;246} else {247limits.maxSamplersPerArgumentBuffer = 16;248}249250// Maximum number of buffers the device can access, per stage, from an argument buffer.251if (p_device->supportsFamily(MTL::GPUFamilyApple6)) {252limits.maxBuffersPerArgumentBuffer = std::numeric_limits<uint64_t>::max();253} else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) {254limits.maxBuffersPerArgumentBuffer = 96;255} else {256limits.maxBuffersPerArgumentBuffer = 31;257}258259limits.minSubgroupSize = limits.maxSubgroupSize = 1;260// These values were taken from MoltenVK.261if (features.simdPermute) {262limits.minSubgroupSize = 4;263limits.maxSubgroupSize = 32;264} else if (features.quadPermute) {265limits.minSubgroupSize = limits.maxSubgroupSize = 4;266}267268limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_COMPUTE_BIT);269if (features.tessellationShader) {270limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_TESSELATION_CONTROL_BIT);271}272limits.subgroupSupportedShaderStages.set_flag(RDD::ShaderStage::SHADER_STAGE_FRAGMENT_BIT);273274limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BASIC_BIT);275if (features.simdPermute || features.quadPermute) {276limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_VOTE_BIT);277limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_BALLOT_BIT);278limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_BIT);279limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_SHUFFLE_RELATIVE_BIT);280}281282if (features.simdReduction) {283limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_ARITHMETIC_BIT);284}285286if (features.quadPermute) {287limits.subgroupSupportedOperations.set_flag(RD::SubgroupOperations::SUBGROUP_QUAD_BIT);288}289290limits.maxBufferLength = p_device->maxBufferLength();291292// FST: Maximum size of vertex descriptor layout stride.293limits.maxVertexDescriptorLayoutStride = std::numeric_limits<uint64_t>::max();294295// Maximum number of viewports.296if (p_device->supportsFamily(MTL::GPUFamilyApple5)) {297limits.maxViewports = 16;298} else {299limits.maxViewports = 1;300}301302limits.maxPerStageBufferCount = 31;303limits.maxPerStageSamplerCount = 16;304if (p_device->supportsFamily(MTL::GPUFamilyApple6)) {305limits.maxPerStageTextureCount = 128;306} else if (p_device->supportsFamily(MTL::GPUFamilyApple4)) {307limits.maxPerStageTextureCount = 96;308} else {309limits.maxPerStageTextureCount = 31;310}311312limits.maxVertexInputAttributes = 31;313limits.maxVertexInputBindings = 31;314limits.maxVertexInputBindingStride = (2 * KIBI);315limits.maxShaderVaryings = 31; // Accurate on Apple4 and above. See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf316317if (p_device->supportsFamily(MTL::GPUFamilyApple4)) {318limits.maxThreadGroupMemoryAllocation = 32768;319} else if (p_device->supportsFamily(MTL::GPUFamilyApple3)) {320limits.maxThreadGroupMemoryAllocation = 16384;321} else {322limits.maxThreadGroupMemoryAllocation = 16352;323}324325#if TARGET_OS_IOS && !TARGET_OS_MACCATALYST326limits.minUniformBufferOffsetAlignment = 64;327#endif328329#if TARGET_OS_OSX330// This is Apple Silicon specific.331limits.minUniformBufferOffsetAlignment = 16;332#endif333334limits.maxDrawIndexedIndexValue = std::numeric_limits<uint32_t>::max() - 1;335336#ifdef METAL_MFXTEMPORAL_ENABLED337if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {338limits.temporalScalerInputContentMinScale = MTLFX::TemporalScalerDescriptor::supportedInputContentMinScale(p_device);339limits.temporalScalerInputContentMaxScale = MTLFX::TemporalScalerDescriptor::supportedInputContentMaxScale(p_device);340} else {341// Defaults taken from macOS 14+342limits.temporalScalerInputContentMinScale = 1.0;343limits.temporalScalerInputContentMaxScale = 3.0;344}345#else346// Defaults taken from macOS 14+347limits.temporalScalerInputContentMinScale = 1.0;348limits.temporalScalerInputContentMaxScale = 3.0;349#endif350}351352void MetalDeviceProperties::init_os_props() {353NS::OperatingSystemVersion ver = NS::ProcessInfo::processInfo()->operatingSystemVersion();354os_version = (uint32_t)ver.majorVersion * 10000 + (uint32_t)ver.minorVersion * 100 + (uint32_t)ver.patchVersion;355}356357MetalDeviceProperties::MetalDeviceProperties(MTL::Device *p_device) {358init_features(p_device);359init_limits(p_device);360init_os_props();361}362363MetalDeviceProperties::~MetalDeviceProperties() {364}365366SampleCount MetalDeviceProperties::find_nearest_supported_sample_count(RenderingDevice::TextureSamples p_samples) const {367SampleCount supported = features.supportedSampleCounts;368if (supported & sample_count[p_samples]) {369return sample_count[p_samples];370}371372SampleCount requested_sample_count = sample_count[p_samples];373// Find the nearest supported sample count.374while (requested_sample_count > SampleCount1) {375if (supported & requested_sample_count) {376return requested_sample_count;377}378requested_sample_count = (SampleCount)(requested_sample_count >> 1);379}380381return SampleCount1;382}383384// region static members385386const SampleCount MetalDeviceProperties::sample_count[RenderingDevice::TextureSamples::TEXTURE_SAMPLES_MAX] = {387SampleCount1,388SampleCount2,389SampleCount4,390SampleCount8,391SampleCount16,392SampleCount32,393SampleCount64,394};395396// endregion397398399