Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/drivers/metal/rendering_device_driver_metal.cpp
20919 views
1
/**************************************************************************/
2
/* rendering_device_driver_metal.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
/**************************************************************************/
32
/* */
33
/* Portions of this code were derived from MoltenVK. */
34
/* */
35
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
36
/* (http://www.brenwill.com) */
37
/* */
38
/* Licensed under the Apache License, Version 2.0 (the "License"); */
39
/* you may not use this file except in compliance with the License. */
40
/* You may obtain a copy of the License at */
41
/* */
42
/* http://www.apache.org/licenses/LICENSE-2.0 */
43
/* */
44
/* Unless required by applicable law or agreed to in writing, software */
45
/* distributed under the License is distributed on an "AS IS" BASIS, */
46
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
47
/* implied. See the License for the specific language governing */
48
/* permissions and limitations under the License. */
49
/**************************************************************************/
50
51
#include "rendering_device_driver_metal.h"
52
53
#include "pixel_formats.h"
54
#include "rendering_context_driver_metal.h"
55
#include "rendering_shader_container_metal.h"
56
57
#include "core/config/project_settings.h"
58
#include "core/io/marshalls.h"
59
#include "core/string/ustring.h"
60
#include "core/templates/hash_map.h"
61
#include "drivers/apple/foundation_helpers.h"
62
63
#include <os/log.h>
64
#include <os/signpost.h>
65
#include <Metal/Metal.hpp>
66
#include <algorithm>
67
68
#ifndef MTLGPUAddress
69
typedef uint64_t MTLGPUAddress;
70
#endif
71
72
#pragma mark - Logging
73
74
extern os_log_t LOG_DRIVER;
75
// Used for dynamic tracing.
76
extern os_log_t LOG_INTERVALS;
77
78
/*****************/
79
/**** GENERIC ****/
80
/*****************/
81
82
// RDD::CompareOperator == VkCompareOp.
83
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, MTL::CompareFunctionNever));
84
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, MTL::CompareFunctionLess));
85
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, MTL::CompareFunctionEqual));
86
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, MTL::CompareFunctionLessEqual));
87
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, MTL::CompareFunctionGreater));
88
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTL::CompareFunctionNotEqual));
89
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTL::CompareFunctionGreaterEqual));
90
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTL::CompareFunctionAlways));
91
92
/*****************/
93
/**** BUFFERS ****/
94
/*****************/
95
96
RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
97
const uint64_t original_size = p_size;
98
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
99
p_size = round_up_to_alignment(p_size, 16u) * _frame_count;
100
}
101
102
MTL::ResourceOptions options = 0;
103
switch (p_allocation_type) {
104
case MEMORY_ALLOCATION_TYPE_CPU:
105
options = base_hazard_tracking | MTL::ResourceStorageModeShared;
106
break;
107
case MEMORY_ALLOCATION_TYPE_GPU:
108
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
109
options = MTL::ResourceHazardTrackingModeUntracked | MTL::ResourceStorageModeShared | MTL::ResourceCPUCacheModeWriteCombined;
110
} else {
111
options = base_hazard_tracking | MTL::ResourceStorageModePrivate;
112
}
113
break;
114
}
115
116
MTL::Buffer *obj = device->newBuffer(p_size, options);
117
ERR_FAIL_NULL_V_MSG(obj, BufferID(), "Can't create buffer of size: " + itos(p_size));
118
119
BufferInfo *buf_info;
120
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
121
MetalBufferDynamicInfo *dyn_buffer = memnew(MetalBufferDynamicInfo);
122
buf_info = dyn_buffer;
123
#ifdef DEBUG_ENABLED
124
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
125
#endif
126
dyn_buffer->set_frame_index(0u);
127
dyn_buffer->size_bytes = round_up_to_alignment(original_size, 16u);
128
} else {
129
buf_info = memnew(BufferInfo);
130
}
131
buf_info->metal_buffer = NS::TransferPtr(obj);
132
133
_track_resource(buf_info->metal_buffer.get());
134
135
return BufferID(buf_info);
136
}
137
138
bool RenderingDeviceDriverMetal::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {
139
// Nothing to do.
140
return true;
141
}
142
143
void RenderingDeviceDriverMetal::buffer_free(BufferID p_buffer) {
144
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
145
146
_untrack_resource(buf_info->metal_buffer.get());
147
148
if (buf_info->is_dynamic()) {
149
memdelete((MetalBufferDynamicInfo *)buf_info);
150
} else {
151
memdelete(buf_info);
152
}
153
}
154
155
uint64_t RenderingDeviceDriverMetal::buffer_get_allocation_size(BufferID p_buffer) {
156
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
157
return buf_info->metal_buffer.get()->allocatedSize();
158
}
159
160
uint8_t *RenderingDeviceDriverMetal::buffer_map(BufferID p_buffer) {
161
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
162
ERR_FAIL_COND_V_MSG(buf_info->metal_buffer.get()->storageMode() != MTL::StorageModeShared, nullptr, "Unable to map private buffers");
163
return (uint8_t *)buf_info->metal_buffer.get()->contents();
164
}
165
166
void RenderingDeviceDriverMetal::buffer_unmap(BufferID p_buffer) {
167
// Nothing to do.
168
}
169
170
uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
171
MetalBufferDynamicInfo *buf_info = (MetalBufferDynamicInfo *)p_buffer.id;
172
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
173
#ifdef DEBUG_ENABLED
174
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
175
buf_info->last_frame_mapped = p_frames_drawn;
176
#endif
177
return (uint8_t *)buf_info->metal_buffer.get()->contents() + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes;
178
}
179
180
uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span<BufferID> p_buffers) {
181
uint64_t mask = 0u;
182
uint64_t shift = 0u;
183
184
for (const BufferID &buf : p_buffers) {
185
const BufferInfo *buf_info = (const BufferInfo *)buf.id;
186
if (!buf_info->is_dynamic()) {
187
continue;
188
}
189
mask |= buf_info->frame_index() << shift;
190
// We can encode the frame index in 2 bits since frame_count won't be > 4.
191
shift += 2UL;
192
}
193
194
return mask;
195
}
196
197
uint64_t RenderingDeviceDriverMetal::buffer_get_device_address(BufferID p_buffer) {
198
if (__builtin_available(iOS 16.0, macOS 13.0, *)) {
199
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
200
return buf_info->metal_buffer.get()->gpuAddress();
201
} else {
202
#if DEV_ENABLED
203
WARN_PRINT_ONCE("buffer_get_device_address is not supported on this OS version.");
204
#endif
205
return 0;
206
}
207
}
208
209
#pragma mark - Texture
210
211
#pragma mark - Format Conversions
212
213
static const MTL::TextureType TEXTURE_TYPE[RD::TEXTURE_TYPE_MAX] = {
214
MTL::TextureType1D,
215
MTL::TextureType2D,
216
MTL::TextureType3D,
217
MTL::TextureTypeCube,
218
MTL::TextureType1DArray,
219
MTL::TextureType2DArray,
220
MTL::TextureTypeCubeArray,
221
};
222
223
bool RenderingDeviceDriverMetal::is_valid_linear(TextureFormat const &p_format) const {
224
MTLFormatType ft = pixel_formats->getFormatType(p_format.format);
225
226
return p_format.texture_type == TEXTURE_TYPE_2D // Linear textures must be 2D textures.
227
&& ft != MTLFormatType::DepthStencil && ft != MTLFormatType::Compressed // Linear textures must not be depth/stencil or compressed formats.)
228
&& p_format.mipmaps == 1 // Linear textures must have 1 mipmap level.
229
&& p_format.array_layers == 1 // Linear textures must have 1 array layer.
230
&& p_format.samples == TEXTURE_SAMPLES_1; // Linear textures must have 1 sample.
231
}
232
233
RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p_format, const TextureView &p_view) {
234
NS::SharedPtr<MTL::TextureDescriptor> desc = NS::TransferPtr(MTL::TextureDescriptor::alloc()->init());
235
desc->setTextureType(TEXTURE_TYPE[p_format.texture_type]);
236
237
PixelFormats &formats = *pixel_formats;
238
desc->setPixelFormat((MTL::PixelFormat)formats.getMTLPixelFormat(p_format.format));
239
MTLFmtCaps format_caps = formats.getCapabilities(desc->pixelFormat());
240
241
desc->setWidth(p_format.width);
242
desc->setHeight(p_format.height);
243
desc->setDepth(p_format.depth);
244
desc->setMipmapLevelCount(p_format.mipmaps);
245
246
if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY ||
247
p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) {
248
desc->setArrayLength(p_format.array_layers);
249
} else if (p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {
250
desc->setArrayLength(p_format.array_layers / 6);
251
}
252
253
// TODO(sgc): Evaluate lossy texture support (perhaps as a project option?)
254
// https://developer.apple.com/videos/play/tech-talks/10876?time=459
255
// desc->setCompressionType(MTL::TextureCompressionTypeLossy);
256
257
if (p_format.samples > TEXTURE_SAMPLES_1) {
258
SampleCount supported = (*device_properties).find_nearest_supported_sample_count(p_format.samples);
259
260
if (supported > SampleCount1) {
261
bool ok = p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY;
262
if (ok) {
263
switch (p_format.texture_type) {
264
case TEXTURE_TYPE_2D:
265
desc->setTextureType(MTL::TextureType2DMultisample);
266
break;
267
case TEXTURE_TYPE_2D_ARRAY:
268
desc->setTextureType(MTL::TextureType2DMultisampleArray);
269
break;
270
default:
271
break;
272
}
273
desc->setSampleCount((NS::UInteger)supported);
274
if (p_format.mipmaps > 1) {
275
// For a buffer-backed or multi-sample texture, the value must be 1.
276
WARN_PRINT("mipmaps == 1 for multi-sample textures");
277
desc->setMipmapLevelCount(1);
278
}
279
} else {
280
WARN_PRINT("Unsupported multi-sample texture type; disabling multi-sample");
281
}
282
}
283
}
284
285
static const MTL::TextureSwizzle COMPONENT_SWIZZLE[TEXTURE_SWIZZLE_MAX] = {
286
static_cast<MTL::TextureSwizzle>(255), // IDENTITY
287
MTL::TextureSwizzleZero,
288
MTL::TextureSwizzleOne,
289
MTL::TextureSwizzleRed,
290
MTL::TextureSwizzleGreen,
291
MTL::TextureSwizzleBlue,
292
MTL::TextureSwizzleAlpha,
293
};
294
295
MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(
296
p_view.swizzle_r != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_r] : MTL::TextureSwizzleRed,
297
p_view.swizzle_g != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_g] : MTL::TextureSwizzleGreen,
298
p_view.swizzle_b != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_b] : MTL::TextureSwizzleBlue,
299
p_view.swizzle_a != TEXTURE_SWIZZLE_IDENTITY ? COMPONENT_SWIZZLE[p_view.swizzle_a] : MTL::TextureSwizzleAlpha);
300
301
// Represents a swizzle operation that is a no-op.
302
static MTL::TextureSwizzleChannels IDENTITY_SWIZZLE = MTL::TextureSwizzleChannels::Default();
303
304
bool no_swizzle = memcmp(&IDENTITY_SWIZZLE, &swizzle, sizeof(MTL::TextureSwizzleChannels)) == 0;
305
if (!no_swizzle) {
306
desc->setSwizzle(swizzle);
307
}
308
309
// Usage.
310
311
MTL::ResourceOptions options = 0;
312
bool is_linear = false;
313
#if defined(VISIONOS_ENABLED)
314
const bool supports_memoryless = true;
315
#else
316
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
317
const bool supports_memoryless = (*device_properties).features.highestFamily >= MTL::GPUFamilyApple2 && (*device_properties).features.highestFamily < MTL::GPUFamilyMac1;
318
GODOT_CLANG_WARNING_POP
319
#endif
320
if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {
321
options = base_hazard_tracking | MTL::ResourceStorageModeMemoryless;
322
desc->setStorageMode(MTL::StorageModeMemoryless);
323
} else {
324
options = base_hazard_tracking | MTL::ResourceCPUCacheModeDefaultCache;
325
if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {
326
options |= MTL::ResourceStorageModeShared;
327
// The user has indicated they want to read from the texture on the CPU,
328
// so we'll see if we can use a linear format.
329
// A linear format is a texture that is backed by a buffer,
330
// which allows for CPU access to the texture data via a pointer.
331
is_linear = is_valid_linear(p_format);
332
} else {
333
options |= MTL::ResourceStorageModePrivate;
334
}
335
}
336
desc->setResourceOptions(options);
337
338
MTL::TextureUsage usage = desc->usage();
339
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
340
usage |= MTL::TextureUsageShaderRead;
341
}
342
343
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
344
usage |= MTL::TextureUsageShaderWrite;
345
}
346
347
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
348
349
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
350
can_be_attachment) {
351
usage |= MTL::TextureUsageRenderTarget;
352
}
353
354
if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
355
usage |= MTL::TextureUsageShaderRead;
356
}
357
358
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
359
ERR_FAIL_COND_V_MSG((format_caps & kMTLFmtCapsAtomic) == 0, RDD::TextureID(), "Atomic operations on this texture format are not supported.");
360
ERR_FAIL_COND_V_MSG(!device_properties->features.supports_native_image_atomics, RDD::TextureID(), "Atomic operations on textures are not supported on this OS version. Check SUPPORTS_IMAGE_ATOMIC_32_BIT.");
361
// If supports_native_image_atomics is true, this condition should always succeed, as it is set the same.
362
if (__builtin_available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
363
usage |= MTL::TextureUsageShaderAtomic;
364
}
365
}
366
367
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
368
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
369
}
370
371
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_CAN_UPDATE_BIT | TEXTURE_USAGE_CAN_COPY_TO_BIT) &&
372
can_be_attachment && no_swizzle) {
373
// Per MoltenVK, can be cleared as a render attachment.
374
usage |= MTL::TextureUsageRenderTarget;
375
}
376
if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
377
// Covered by blits.
378
}
379
380
// Create texture views with a different component layout.
381
if (!p_format.shareable_formats.is_empty()) {
382
usage |= MTL::TextureUsagePixelFormatView;
383
}
384
385
desc->setUsage(usage);
386
387
// Allocate memory.
388
389
MTL::Texture *obj = nullptr;
390
if (is_linear) {
391
// Linear textures are restricted to 2D textures, a single mipmap level and a single array layer.
392
MTL::PixelFormat pixel_format = desc->pixelFormat();
393
size_t row_alignment = get_texel_buffer_alignment_for_format(p_format.format);
394
size_t bytes_per_row = formats.getBytesPerRow(pixel_format, p_format.width);
395
bytes_per_row = round_up_to_alignment(bytes_per_row, row_alignment);
396
size_t bytes_per_layer = formats.getBytesPerLayer(pixel_format, bytes_per_row, p_format.height);
397
size_t byte_count = bytes_per_layer * p_format.depth * p_format.array_layers;
398
399
MTL::Buffer *buf = device->newBuffer(byte_count, options);
400
obj = buf->newTexture(desc.get(), 0, bytes_per_row);
401
buf->release();
402
403
_track_resource(buf);
404
} else {
405
obj = device->newTexture(desc.get());
406
}
407
ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create texture.");
408
409
_track_resource(obj);
410
411
return TextureID(reinterpret_cast<uint64_t>(obj));
412
}
413
414
RDD::TextureID RenderingDeviceDriverMetal::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) {
415
MTL::Texture *res = reinterpret_cast<MTL::Texture *>(p_native_texture);
416
417
// If the requested format is different, we need to create a view.
418
MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_format);
419
if (res->pixelFormat() != format) {
420
MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Default();
421
res = res->newTextureView(format, res->textureType(), NS::Range::Make(0, res->mipmapLevelCount()), NS::Range::Make(0, p_array_layers), swizzle);
422
ERR_FAIL_NULL_V_MSG(res, TextureID(), "Unable to create texture view.");
423
}
424
425
_track_resource(res);
426
427
return TextureID(reinterpret_cast<uint64_t>(res));
428
}
429
430
RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {
431
MTL::Texture *src_texture = reinterpret_cast<MTL::Texture *>(p_original_texture.id);
432
433
NS::UInteger slices = src_texture->arrayLength();
434
if (src_texture->textureType() == MTL::TextureTypeCube) {
435
// Metal expects Cube textures to have a slice count of 6.
436
slices = 6;
437
} else if (src_texture->textureType() == MTL::TextureTypeCubeArray) {
438
// Metal expects Cube Array textures to have 6 slices per layer.
439
slices *= 6;
440
}
441
442
#if DEV_ENABLED
443
if (src_texture->sampleCount() > 1) {
444
// TODO(sgc): is it ok to create a shared texture from a multi-sample texture?
445
WARN_PRINT("Is it safe to create a shared texture from multi-sample texture?");
446
}
447
#endif
448
449
MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format);
450
451
static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {
452
static_cast<MTL::TextureSwizzle>(255), // IDENTITY
453
MTL::TextureSwizzleZero,
454
MTL::TextureSwizzleOne,
455
MTL::TextureSwizzleRed,
456
MTL::TextureSwizzleGreen,
457
MTL::TextureSwizzleBlue,
458
MTL::TextureSwizzleAlpha,
459
};
460
461
#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN)
462
MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha));
463
#undef SWIZZLE
464
MTL::Texture *obj = src_texture->newTextureView(format, src_texture->textureType(), NS::Range::Make(0, src_texture->mipmapLevelCount()), NS::Range::Make(0, slices), swizzle);
465
ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");
466
_track_resource(obj);
467
return TextureID(reinterpret_cast<uint64_t>(obj));
468
}
469
470
RDD::TextureID RenderingDeviceDriverMetal::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {
471
MTL::Texture *src_texture = reinterpret_cast<MTL::Texture *>(p_original_texture.id);
472
473
static const MTL::TextureType VIEW_TYPES[] = {
474
MTL::TextureType1D, // MTLTextureType1D
475
MTL::TextureType1D, // MTLTextureType1DArray
476
MTL::TextureType2D, // MTLTextureType2D
477
MTL::TextureType2D, // MTLTextureType2DArray
478
MTL::TextureType2D, // MTLTextureType2DMultisample
479
MTL::TextureType2D, // MTLTextureTypeCube
480
MTL::TextureType2D, // MTLTextureTypeCubeArray
481
MTL::TextureType2D, // MTLTextureType3D
482
MTL::TextureType2D, // MTLTextureType2DMultisampleArray
483
};
484
485
MTL::TextureType textureType = VIEW_TYPES[src_texture->textureType()];
486
switch (p_slice_type) {
487
case TEXTURE_SLICE_2D: {
488
textureType = MTL::TextureType2D;
489
} break;
490
case TEXTURE_SLICE_3D: {
491
textureType = MTL::TextureType3D;
492
} break;
493
case TEXTURE_SLICE_CUBEMAP: {
494
textureType = MTL::TextureTypeCube;
495
} break;
496
case TEXTURE_SLICE_2D_ARRAY: {
497
textureType = MTL::TextureType2DArray;
498
} break;
499
case TEXTURE_SLICE_MAX: {
500
ERR_FAIL_V_MSG(TextureID(), "Invalid texture slice type");
501
} break;
502
}
503
504
MTL::PixelFormat format = (MTL::PixelFormat)pixel_formats->getMTLPixelFormat(p_view.format);
505
506
static const MTL::TextureSwizzle component_swizzle[TEXTURE_SWIZZLE_MAX] = {
507
static_cast<MTL::TextureSwizzle>(255), // IDENTITY
508
MTL::TextureSwizzleZero,
509
MTL::TextureSwizzleOne,
510
MTL::TextureSwizzleRed,
511
MTL::TextureSwizzleGreen,
512
MTL::TextureSwizzleBlue,
513
MTL::TextureSwizzleAlpha,
514
};
515
516
#define SWIZZLE(C, CHAN) (p_view.swizzle_##C != TEXTURE_SWIZZLE_IDENTITY ? component_swizzle[p_view.swizzle_##C] : MTL::TextureSwizzle##CHAN)
517
MTL::TextureSwizzleChannels swizzle = MTL::TextureSwizzleChannels::Make(SWIZZLE(r, Red), SWIZZLE(g, Green), SWIZZLE(b, Blue), SWIZZLE(a, Alpha));
518
#undef SWIZZLE
519
MTL::Texture *obj = src_texture->newTextureView(format, textureType, NS::Range::Make(p_mipmap, p_mipmaps), NS::Range::Make(p_layer, p_layers), swizzle);
520
ERR_FAIL_NULL_V_MSG(obj, TextureID(), "Unable to create shared texture");
521
_track_resource(obj);
522
return TextureID(reinterpret_cast<uint64_t>(obj));
523
}
524
525
void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) {
526
MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);
527
_untrack_resource(obj);
528
obj->release();
529
}
530
531
uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) {
532
MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);
533
return obj->allocatedSize();
534
}
535
536
void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {
537
MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);
538
539
PixelFormats &pf = *pixel_formats;
540
DataFormat format = pf.getDataFormat(obj->pixelFormat());
541
542
uint32_t w = MAX(1u, obj->width() >> p_subresource.mipmap);
543
uint32_t h = MAX(1u, obj->height() >> p_subresource.mipmap);
544
uint32_t d = MAX(1u, obj->depth() >> p_subresource.mipmap);
545
546
uint32_t bw = 0, bh = 0;
547
get_compressed_image_format_block_dimensions(format, bw, bh);
548
549
uint32_t sbw = 0, sbh = 0;
550
*r_layout = {};
551
r_layout->size = get_image_format_required_size(format, w, h, d, 1, &sbw, &sbh);
552
r_layout->row_pitch = r_layout->size / ((sbh / bh) * d);
553
}
554
555
Vector<uint8_t> RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture, uint32_t p_layer) {
556
MTL::Texture *obj = reinterpret_cast<MTL::Texture *>(p_texture.id);
557
ERR_FAIL_COND_V_MSG(obj->storageMode() != MTL::StorageModeShared, Vector<uint8_t>(), "Texture must be created with TEXTURE_USAGE_CPU_READ_BIT set.");
558
559
MTL::Buffer *buf = obj->buffer();
560
if (buf) {
561
ERR_FAIL_COND_V_MSG(p_layer > 0, Vector<uint8_t>(), "A linear texture has a single layer.");
562
ERR_FAIL_COND_V_MSG(obj->mipmapLevelCount() > 1, Vector<uint8_t>(), "A linear texture has a single mipmap level.");
563
Vector<uint8_t> image_data;
564
image_data.resize_uninitialized(buf->length());
565
memcpy(image_data.ptrw(), buf->contents(), buf->length());
566
return image_data;
567
}
568
569
DataFormat tex_format = pixel_formats->getDataFormat(obj->pixelFormat());
570
uint32_t tex_w = obj->width();
571
uint32_t tex_h = obj->height();
572
uint32_t tex_d = obj->depth();
573
uint32_t tex_mipmaps = obj->mipmapLevelCount();
574
575
// Must iteratively copy the texture data to a buffer.
576
577
uint32_t tight_mip_size = get_image_format_required_size(tex_format, tex_w, tex_h, tex_d, tex_mipmaps);
578
579
Vector<uint8_t> image_data;
580
image_data.resize(tight_mip_size);
581
582
uint32_t pixel_size = get_image_format_pixel_size(tex_format);
583
uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(tex_format);
584
uint32_t blockw = 0, blockh = 0;
585
get_compressed_image_format_block_dimensions(tex_format, blockw, blockh);
586
587
uint8_t *dest_ptr = image_data.ptrw();
588
589
for (uint32_t mm_i = 0; mm_i < tex_mipmaps; mm_i++) {
590
uint32_t bw = STEPIFY(tex_w, blockw);
591
uint32_t bh = STEPIFY(tex_h, blockh);
592
593
uint32_t bytes_per_row = (bw * pixel_size) >> pixel_rshift;
594
uint32_t bytes_per_img = bytes_per_row * bh;
595
uint32_t mip_size = bytes_per_img * tex_d;
596
597
obj->getBytes(dest_ptr, bytes_per_row, bytes_per_img, MTL::Region(0, 0, 0, bw, bh, tex_d), mm_i, p_layer);
598
599
dest_ptr += mip_size;
600
601
// Next mipmap level.
602
tex_w = MAX(blockw, tex_w >> 1);
603
tex_h = MAX(blockh, tex_h >> 1);
604
tex_d = MAX(1u, tex_d >> 1);
605
}
606
607
// Ensure that the destination pointer is at the end of the image data.
608
DEV_ASSERT(dest_ptr - image_data.ptr() == image_data.size());
609
610
return image_data;
611
}
612
613
BitField<RDD::TextureUsageBits> RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {
614
PixelFormats &pf = *pixel_formats;
615
if (pf.getMTLPixelFormat(p_format) == MTL::PixelFormatInvalid) {
616
return 0;
617
}
618
619
MTLFmtCaps caps = pf.getCapabilities(p_format);
620
621
// Everything supported by default makes an all-or-nothing check easier for the caller.
622
BitField<RDD::TextureUsageBits> supported = INT64_MAX;
623
supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT); // No VRS support for Metal.
624
625
if (!flags::any(caps, kMTLFmtCapsColorAtt)) {
626
supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);
627
}
628
if (!flags::any(caps, kMTLFmtCapsDSAtt)) {
629
supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
630
}
631
if (!flags::any(caps, kMTLFmtCapsRead)) {
632
supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT);
633
}
634
if (!flags::any(caps, kMTLFmtCapsAtomic)) {
635
supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT);
636
}
637
638
return supported;
639
}
640
641
bool RenderingDeviceDriverMetal::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
642
r_raw_reinterpretation = false;
643
return true;
644
}
645
646
#pragma mark - Sampler
647
648
static const MTL::CompareFunction COMPARE_OPERATORS[RD::COMPARE_OP_MAX] = {
649
MTL::CompareFunctionNever,
650
MTL::CompareFunctionLess,
651
MTL::CompareFunctionEqual,
652
MTL::CompareFunctionLessEqual,
653
MTL::CompareFunctionGreater,
654
MTL::CompareFunctionNotEqual,
655
MTL::CompareFunctionGreaterEqual,
656
MTL::CompareFunctionAlways,
657
};
658
659
static const MTL::StencilOperation STENCIL_OPERATIONS[RD::STENCIL_OP_MAX] = {
660
MTL::StencilOperationKeep,
661
MTL::StencilOperationZero,
662
MTL::StencilOperationReplace,
663
MTL::StencilOperationIncrementClamp,
664
MTL::StencilOperationDecrementClamp,
665
MTL::StencilOperationInvert,
666
MTL::StencilOperationIncrementWrap,
667
MTL::StencilOperationDecrementWrap,
668
};
669
670
static const MTL::BlendFactor BLEND_FACTORS[RD::BLEND_FACTOR_MAX] = {
671
MTL::BlendFactorZero,
672
MTL::BlendFactorOne,
673
MTL::BlendFactorSourceColor,
674
MTL::BlendFactorOneMinusSourceColor,
675
MTL::BlendFactorDestinationColor,
676
MTL::BlendFactorOneMinusDestinationColor,
677
MTL::BlendFactorSourceAlpha,
678
MTL::BlendFactorOneMinusSourceAlpha,
679
MTL::BlendFactorDestinationAlpha,
680
MTL::BlendFactorOneMinusDestinationAlpha,
681
MTL::BlendFactorBlendColor,
682
MTL::BlendFactorOneMinusBlendColor,
683
MTL::BlendFactorBlendAlpha,
684
MTL::BlendFactorOneMinusBlendAlpha,
685
MTL::BlendFactorSourceAlphaSaturated,
686
MTL::BlendFactorSource1Color,
687
MTL::BlendFactorOneMinusSource1Color,
688
MTL::BlendFactorSource1Alpha,
689
MTL::BlendFactorOneMinusSource1Alpha,
690
};
691
static const MTL::BlendOperation BLEND_OPERATIONS[RD::BLEND_OP_MAX] = {
692
MTL::BlendOperationAdd,
693
MTL::BlendOperationSubtract,
694
MTL::BlendOperationReverseSubtract,
695
MTL::BlendOperationMin,
696
MTL::BlendOperationMax,
697
};
698
699
static const MTL::SamplerAddressMode ADDRESS_MODES[RD::SAMPLER_REPEAT_MODE_MAX] = {
700
MTL::SamplerAddressModeRepeat,
701
MTL::SamplerAddressModeMirrorRepeat,
702
MTL::SamplerAddressModeClampToEdge,
703
MTL::SamplerAddressModeClampToBorderColor,
704
MTL::SamplerAddressModeMirrorClampToEdge,
705
};
706
707
static const MTL::SamplerBorderColor SAMPLER_BORDER_COLORS[RD::SAMPLER_BORDER_COLOR_MAX] = {
708
MTL::SamplerBorderColorTransparentBlack,
709
MTL::SamplerBorderColorTransparentBlack,
710
MTL::SamplerBorderColorOpaqueBlack,
711
MTL::SamplerBorderColorOpaqueBlack,
712
MTL::SamplerBorderColorOpaqueWhite,
713
MTL::SamplerBorderColorOpaqueWhite,
714
};
715
716
RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_state) {
717
NS::SharedPtr<MTL::SamplerDescriptor> desc = NS::TransferPtr(MTL::SamplerDescriptor::alloc()->init());
718
desc->setSupportArgumentBuffers(true);
719
720
desc->setMagFilter(p_state.mag_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest);
721
desc->setMinFilter(p_state.min_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest);
722
desc->setMipFilter(p_state.mip_filter == SAMPLER_FILTER_LINEAR ? MTL::SamplerMipFilterLinear : MTL::SamplerMipFilterNearest);
723
724
desc->setSAddressMode(ADDRESS_MODES[p_state.repeat_u]);
725
desc->setTAddressMode(ADDRESS_MODES[p_state.repeat_v]);
726
desc->setRAddressMode(ADDRESS_MODES[p_state.repeat_w]);
727
728
if (p_state.use_anisotropy) {
729
desc->setMaxAnisotropy(p_state.anisotropy_max);
730
}
731
732
desc->setCompareFunction(COMPARE_OPERATORS[p_state.compare_op]);
733
734
desc->setLodMinClamp(p_state.min_lod);
735
desc->setLodMaxClamp(p_state.max_lod);
736
737
desc->setBorderColor(SAMPLER_BORDER_COLORS[p_state.border_color]);
738
739
desc->setNormalizedCoordinates(!p_state.unnormalized_uvw);
740
741
#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 260000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 260000 || __TV_OS_VERSION_MAX_ALLOWED >= 260000 || __VISION_OS_VERSION_MAX_ALLOWED >= 260000
742
if (p_state.lod_bias != 0.0) {
743
if (__builtin_available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) {
744
desc->setLodBias(p_state.lod_bias);
745
}
746
}
747
#endif
748
749
MTL::SamplerState *obj = device->newSamplerState(desc.get());
750
ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerState failed");
751
return SamplerID(reinterpret_cast<uint64_t>(obj));
752
}
753
754
void RenderingDeviceDriverMetal::sampler_free(SamplerID p_sampler) {
755
MTL::SamplerState *obj = reinterpret_cast<MTL::SamplerState *>(p_sampler.id);
756
obj->release();
757
}
758
759
bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {
760
switch (p_filter) {
761
case SAMPLER_FILTER_NEAREST:
762
return true;
763
case SAMPLER_FILTER_LINEAR: {
764
MTLFmtCaps caps = pixel_formats->getCapabilities(p_format);
765
return flags::any(caps, kMTLFmtCapsFilter);
766
}
767
}
768
}
769
770
#pragma mark - Vertex Array
771
772
RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) {
773
MTL::VertexDescriptor *desc = MTL::VertexDescriptor::vertexDescriptor();
774
775
for (const VertexAttributeBindingsMap::KV &kv : p_vertex_bindings) {
776
uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(kv.key);
777
MTL::VertexBufferLayoutDescriptor *ld = desc->layouts()->object(idx);
778
if (kv.value.stride != 0) {
779
ld->setStepFunction(kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTL::VertexStepFunctionPerVertex : MTL::VertexStepFunctionPerInstance);
780
ld->setStepRate(1);
781
ld->setStride(kv.value.stride);
782
} else {
783
ld->setStepFunction(MTL::VertexStepFunctionConstant);
784
ld->setStepRate(0);
785
ld->setStride(0);
786
}
787
DEV_ASSERT(ld->stride() == desc->layouts()->object(idx)->stride());
788
}
789
790
for (const VertexAttribute &vf : p_vertex_attribs) {
791
MTL::VertexAttributeDescriptor *attr = desc->attributes()->object(vf.location);
792
attr->setFormat((MTL::VertexFormat)pixel_formats->getMTLVertexFormat(vf.format));
793
attr->setOffset(vf.offset);
794
uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(vf.binding);
795
attr->setBufferIndex(idx);
796
if (vf.stride == 0) {
797
// Constant attribute, so we must determine the stride to satisfy Metal API.
798
uint32_t stride = desc->layouts()->object(idx)->stride();
799
desc->layouts()->object(idx)->setStride(std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format)));
800
}
801
}
802
803
desc->retain();
804
return VertexFormatID(reinterpret_cast<uint64_t>(desc));
805
}
806
807
void RenderingDeviceDriverMetal::vertex_format_free(VertexFormatID p_vertex_format) {
808
MTL::VertexDescriptor *obj = reinterpret_cast<MTL::VertexDescriptor *>(p_vertex_format.id);
809
obj->release();
810
}
811
812
#pragma mark - Barriers
813
814
void RenderingDeviceDriverMetal::command_pipeline_barrier(
815
CommandBufferID p_cmd_buffer,
816
BitField<PipelineStageBits> p_src_stages,
817
BitField<PipelineStageBits> p_dst_stages,
818
VectorView<MemoryAccessBarrier> p_memory_barriers,
819
VectorView<BufferBarrier> p_buffer_barriers,
820
VectorView<TextureBarrier> p_texture_barriers,
821
VectorView<AccelerationStructureBarrier> p_acceleration_structure_barriers) {
822
MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);
823
obj->pipeline_barrier(p_src_stages, p_dst_stages, p_memory_barriers, p_buffer_barriers, p_texture_barriers, p_acceleration_structure_barriers);
824
}
825
826
#pragma mark - Queues
827
828
RDD::CommandQueueFamilyID RenderingDeviceDriverMetal::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) {
829
if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT) || (p_surface != 0)) {
830
return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_GRAPHICS_BIT);
831
} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_COMPUTE_BIT)) {
832
return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_COMPUTE_BIT);
833
} else if (p_cmd_queue_family_bits.has_flag(COMMAND_QUEUE_FAMILY_TRANSFER_BIT)) {
834
return CommandQueueFamilyID(COMMAND_QUEUE_FAMILY_TRANSFER_BIT);
835
} else {
836
return CommandQueueFamilyID();
837
}
838
}
839
840
#pragma mark - Command Buffers
841
842
bool RenderingDeviceDriverMetal::command_buffer_begin(CommandBufferID p_cmd_buffer) {
843
MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);
844
obj->begin();
845
return true;
846
}
847
848
bool RenderingDeviceDriverMetal::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) {
849
ERR_FAIL_V_MSG(false, "not implemented");
850
}
851
852
void RenderingDeviceDriverMetal::command_buffer_end(CommandBufferID p_cmd_buffer) {
853
MDCommandBufferBase *obj = (MDCommandBufferBase *)(p_cmd_buffer.id);
854
obj->end();
855
}
856
857
void RenderingDeviceDriverMetal::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) {
858
ERR_FAIL_MSG("not implemented");
859
}
860
861
#pragma mark - Swap Chain
862
863
void RenderingDeviceDriverMetal::_swap_chain_release(SwapChain *p_swap_chain) {
864
_swap_chain_release_buffers(p_swap_chain);
865
}
866
867
void RenderingDeviceDriverMetal::_swap_chain_release_buffers(SwapChain *p_swap_chain) {
868
}
869
870
RDD::SwapChainID RenderingDeviceDriverMetal::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) {
871
RenderingContextDriverMetal::Surface const *surface = (RenderingContextDriverMetal::Surface *)(p_surface);
872
if (use_barriers) {
873
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
874
add_residency_set_to_main_queue(surface->get_residency_set());
875
GODOT_CLANG_WARNING_POP
876
}
877
878
// Create the render pass that will be used to draw to the swap chain's framebuffers.
879
RDD::Attachment attachment;
880
attachment.format = pixel_formats->getDataFormat(surface->get_pixel_format());
881
attachment.samples = RDD::TEXTURE_SAMPLES_1;
882
attachment.load_op = RDD::ATTACHMENT_LOAD_OP_CLEAR;
883
attachment.store_op = RDD::ATTACHMENT_STORE_OP_STORE;
884
885
RDD::Subpass subpass;
886
RDD::AttachmentReference color_ref;
887
color_ref.attachment = 0;
888
color_ref.aspect.set_flag(RDD::TEXTURE_ASPECT_COLOR_BIT);
889
subpass.color_references.push_back(color_ref);
890
891
RenderPassID render_pass = render_pass_create(attachment, subpass, {}, 1, RDD::AttachmentReference());
892
ERR_FAIL_COND_V(!render_pass, SwapChainID());
893
894
// Create the empty swap chain until it is resized.
895
SwapChain *swap_chain = memnew(SwapChain);
896
swap_chain->surface = p_surface;
897
swap_chain->data_format = attachment.format;
898
swap_chain->render_pass = render_pass;
899
return SwapChainID(swap_chain);
900
}
901
902
Error RenderingDeviceDriverMetal::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) {
903
DEV_ASSERT(p_cmd_queue.id != 0);
904
DEV_ASSERT(p_swap_chain.id != 0);
905
906
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
907
RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
908
surface->resize(p_desired_framebuffer_count);
909
910
// Once everything's been created correctly, indicate the surface no longer needs to be resized.
911
context_driver->surface_set_needs_resize(swap_chain->surface, false);
912
913
return OK;
914
}
915
916
RDD::FramebufferID RenderingDeviceDriverMetal::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) {
917
DEV_ASSERT(p_cmd_queue.id != 0);
918
DEV_ASSERT(p_swap_chain.id != 0);
919
920
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
921
if (context_driver->surface_get_needs_resize(swap_chain->surface)) {
922
r_resize_required = true;
923
return FramebufferID();
924
}
925
926
RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
927
return metal_surface->acquire_next_frame_buffer();
928
}
929
930
RDD::RenderPassID RenderingDeviceDriverMetal::swap_chain_get_render_pass(SwapChainID p_swap_chain) {
931
const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);
932
return swap_chain->render_pass;
933
}
934
935
RDD::DataFormat RenderingDeviceDriverMetal::swap_chain_get_format(SwapChainID p_swap_chain) {
936
const SwapChain *swap_chain = (const SwapChain *)(p_swap_chain.id);
937
return swap_chain->data_format;
938
}
939
940
void RenderingDeviceDriverMetal::swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) {
941
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
942
RenderingContextDriverMetal::Surface *metal_surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
943
metal_surface->set_max_fps(p_max_fps);
944
}
945
946
void RenderingDeviceDriverMetal::swap_chain_free(SwapChainID p_swap_chain) {
947
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
948
if (use_barriers) {
949
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
950
RenderingContextDriverMetal::Surface *surface = (RenderingContextDriverMetal::Surface *)(swap_chain->surface);
951
remove_residency_set_to_main_queue(surface->get_residency_set());
952
GODOT_CLANG_WARNING_POP
953
}
954
_swap_chain_release(swap_chain);
955
render_pass_free(swap_chain->render_pass);
956
memdelete(swap_chain);
957
}
958
959
#pragma mark - Frame buffer
960
961
RDD::FramebufferID RenderingDeviceDriverMetal::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) {
962
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
963
964
Vector<MTL::Texture *> textures;
965
textures.resize(p_attachments.size());
966
967
for (uint32_t i = 0; i < p_attachments.size(); i += 1) {
968
MDAttachment const &a = pass->attachments[i];
969
MTL::Texture *tex = reinterpret_cast<MTL::Texture *>(p_attachments[i].id);
970
if (tex == nullptr) {
971
#if DEV_ENABLED
972
WARN_PRINT("Invalid texture for attachment " + itos(i));
973
#endif
974
}
975
if (a.samples > 1) {
976
if (tex->sampleCount() != a.samples) {
977
#if DEV_ENABLED
978
WARN_PRINT("Mismatched sample count for attachment " + itos(i) + "; expected " + itos(a.samples) + ", got " + itos(tex->sampleCount()));
979
#endif
980
}
981
}
982
textures.write[i] = tex;
983
}
984
985
MDFrameBuffer *fb = memnew(MDFrameBuffer(textures, Size2i(p_width, p_height)));
986
return FramebufferID(fb);
987
}
988
989
void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {
990
MDFrameBuffer *obj = (MDFrameBuffer *)(p_framebuffer.id);
991
memdelete(obj);
992
}
993
994
#pragma mark - Shader
995
996
void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) {
997
if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) {
998
ShaderCacheEntry *entry = *pentry;
999
_shader_cache.erase(key);
1000
entry->library.reset();
1001
memdelete(entry);
1002
}
1003
}
1004
1005
template <typename T, typename U>
1006
struct is_layout_compatible
1007
: std::bool_constant<
1008
sizeof(T) == sizeof(U) &&
1009
alignof(T) == alignof(U) &&
1010
std::is_trivially_copyable_v<T> &&
1011
std::is_trivially_copyable_v<U>> {};
1012
static_assert(is_layout_compatible<UniformInfo::Indexes, RenderingShaderContainerMetal::UniformData::Indexes>::value, "UniformInfo::Indexes layout does not match RenderingShaderContainerMetal::UniformData::Indexes layout");
1013
1014
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))
1015
static void update_uniform_info(const RenderingShaderContainerMetal::UniformData &p_data, UniformInfo &r_ui) {
1016
r_ui.active_stages = p_data.active_stages;
1017
r_ui.dataType = static_cast<MTL::DataType>(p_data.data_type);
1018
memcpy(&r_ui.slot, &p_data.slot, sizeof(UniformInfo::Indexes));
1019
memcpy(&r_ui.arg_buffer, &p_data.arg_buffer, sizeof(UniformInfo::Indexes));
1020
r_ui.access = static_cast<MTL::BindingAccess>(p_data.access);
1021
r_ui.usage = static_cast<MTL::ResourceUsage>(p_data.usage);
1022
r_ui.textureType = static_cast<MTL::TextureType>(p_data.texture_type);
1023
r_ui.imageFormat = p_data.image_format;
1024
r_ui.arrayLength = p_data.array_length;
1025
r_ui.isMultisampled = p_data.is_multisampled;
1026
}
1027
1028
RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {
1029
Ref<RenderingShaderContainerMetal> shader_container = p_shader_container;
1030
using RSCM = RenderingShaderContainerMetal;
1031
1032
CharString shader_name = shader_container->shader_name;
1033
RSCM::HeaderData &mtl_reflection_data = shader_container->mtl_reflection_data;
1034
Vector<RenderingShaderContainer::Shader> &shaders = shader_container->shaders;
1035
Vector<RSCM::StageData> &mtl_shaders = shader_container->mtl_shaders;
1036
1037
// We need to regenerate the shader if the cache is moved to an incompatible device or argument buffer support differs.
1038
ERR_FAIL_COND_V_MSG(!device_properties->features.argument_buffers_supported() && mtl_reflection_data.uses_argument_buffers(),
1039
RDD::ShaderID(),
1040
"Shader was compiled with argument buffers enabled, but this device does not support them");
1041
1042
ERR_FAIL_COND_V_MSG(device_properties->features.msl_max_version < mtl_reflection_data.msl_version,
1043
RDD::ShaderID(),
1044
"Shader was compiled for a newer version of Metal");
1045
1046
MTL::GPUFamily compiled_gpu_family = static_cast<MTL::GPUFamily>(mtl_reflection_data.profile.gpu);
1047
ERR_FAIL_COND_V_MSG(device_properties->features.highestFamily < compiled_gpu_family,
1048
RDD::ShaderID(),
1049
"Shader was generated for a newer Apple GPU");
1050
1051
NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());
1052
uint32_t major = mtl_reflection_data.msl_version / 10000;
1053
uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100;
1054
options->setLanguageVersion(MTL::LanguageVersion((major << 0x10) + minor));
1055
if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {
1056
options->setEnableLogging(mtl_reflection_data.needs_debug_logging());
1057
}
1058
1059
HashMap<RD::ShaderStage, std::shared_ptr<MDLibrary>> libraries;
1060
1061
PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION;
1062
Vector<uint8_t> decompressed_code;
1063
for (uint32_t shader_index = 0; shader_index < shaders.size(); shader_index++) {
1064
const RenderingShaderContainer::Shader &shader = shaders[shader_index];
1065
const RSCM::StageData &shader_data = mtl_shaders[shader_index];
1066
1067
if (shader.shader_stage == RD::ShaderStage::SHADER_STAGE_COMPUTE) {
1068
pipeline_type = PIPELINE_TYPE_COMPUTE;
1069
}
1070
1071
if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) {
1072
if (std::shared_ptr<MDLibrary> lib = (*p)->library.lock()) {
1073
libraries[shader.shader_stage] = lib;
1074
continue;
1075
}
1076
// Library was released; remove stale cache entry and recreate.
1077
_shader_cache.erase(shader_data.hash);
1078
}
1079
1080
if (shader.code_decompressed_size > 0) {
1081
decompressed_code.resize(shader.code_decompressed_size);
1082
bool decompressed = shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size());
1083
ERR_FAIL_COND_V_MSG(!decompressed, RDD::ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(RDD::SHADER_STAGE_NAMES[shader.shader_stage])));
1084
} else {
1085
decompressed_code = shader.code_compressed_bytes;
1086
}
1087
1088
ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, shader_data.hash));
1089
cd->name = shader_name;
1090
cd->stage = shader.shader_stage;
1091
1092
NS::SharedPtr<NS::String> source = NS::TransferPtr(NS::String::alloc()->init((void *)decompressed_code.ptr(), shader_data.source_size, NS::UTF8StringEncoding));
1093
1094
std::shared_ptr<MDLibrary> library;
1095
if (shader_data.library_size > 0) {
1096
ERR_FAIL_COND_V_MSG(mtl_reflection_data.os_min_version > device_properties->os_version,
1097
RDD::ShaderID(),
1098
"Metal shader binary was generated for a newer target OS");
1099
dispatch_data_t binary = dispatch_data_create(decompressed_code.ptr() + shader_data.source_size, shader_data.library_size, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT);
1100
library = MDLibrary::create(cd, device,
1101
#if DEV_ENABLED
1102
source.get(),
1103
#endif
1104
binary);
1105
} else {
1106
options->setPreserveInvariance(shader_data.is_position_invariant);
1107
#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 150000 || __IPHONE_OS_VERSION_MIN_REQUIRED >= 180000 || __TV_OS_VERSION_MIN_REQUIRED >= 180000 || defined(VISIONOS_ENABLED)
1108
options->setMathMode(MTL::MathModeFast);
1109
#else
1110
options->setFastMathEnabled(true);
1111
#endif
1112
library = MDLibrary::create(cd, device, source.get(), options.get(), _shader_load_strategy);
1113
}
1114
1115
_shader_cache[shader_data.hash] = cd;
1116
libraries[shader.shader_stage] = library;
1117
}
1118
1119
ShaderReflection refl = shader_container->get_shader_reflection();
1120
RSCM::MetalShaderReflection mtl_refl = shader_container->get_metal_shader_reflection();
1121
1122
Vector<UniformSet> uniform_sets;
1123
uint32_t uniform_sets_count = mtl_refl.uniform_sets.size();
1124
uniform_sets.resize(uniform_sets_count);
1125
1126
DynamicOffsetLayout dynamic_offset_layout;
1127
uint8_t dynamic_offset = 0;
1128
1129
// Create sets.
1130
for (uint32_t i = 0; i < uniform_sets_count; i++) {
1131
UniformSet &set = uniform_sets.write[i];
1132
const Vector<ShaderUniform> &refl_set = refl.uniform_sets.ptr()[i];
1133
const Vector<RSCM::UniformData> &mtl_set = mtl_refl.uniform_sets.ptr()[i];
1134
uint32_t set_size = mtl_set.size();
1135
set.uniforms.resize(set_size);
1136
1137
uint8_t dynamic_count = 0;
1138
1139
LocalVector<UniformInfo>::Iterator iter = set.uniforms.begin();
1140
for (uint32_t j = 0; j < set_size; j++) {
1141
const ShaderUniform &uniform = refl_set.ptr()[j];
1142
const RSCM::UniformData &bind = mtl_set.ptr()[j];
1143
1144
switch (uniform.type) {
1145
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC:
1146
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
1147
set.dynamic_uniforms.push_back(j);
1148
dynamic_count++;
1149
} break;
1150
default: {
1151
} break;
1152
}
1153
1154
UniformInfo &ui = *iter;
1155
++iter;
1156
update_uniform_info(bind, ui);
1157
ui.binding = uniform.binding;
1158
1159
if (ui.arg_buffer.texture == UINT32_MAX && ui.arg_buffer.buffer == UINT32_MAX && ui.arg_buffer.sampler == UINT32_MAX) {
1160
// No bindings.
1161
continue;
1162
}
1163
#define VAL(x) (x == UINT32_MAX ? 0 : x)
1164
uint32_t max = std::max({ VAL(ui.arg_buffer.texture), VAL(ui.arg_buffer.buffer), VAL(ui.arg_buffer.sampler) });
1165
max += ui.arrayLength > 0 ? ui.arrayLength - 1 : 0;
1166
set.buffer_size = std::max(set.buffer_size, (max + 1) * (uint32_t)sizeof(uint64_t));
1167
#undef VAL
1168
}
1169
1170
if (dynamic_count > 0) {
1171
dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count);
1172
dynamic_offset += dynamic_count;
1173
}
1174
}
1175
1176
MDShader *shader = nullptr;
1177
if (pipeline_type == PIPELINE_TYPE_COMPUTE) {
1178
MDComputeShader *cs = new MDComputeShader(
1179
shader_name,
1180
uniform_sets,
1181
mtl_reflection_data.uses_argument_buffers(),
1182
libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]);
1183
1184
cs->local = MTL::Size(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]);
1185
shader = cs;
1186
} else {
1187
MDRenderShader *rs = new MDRenderShader(
1188
shader_name,
1189
uniform_sets,
1190
mtl_reflection_data.needs_view_mask_buffer(),
1191
mtl_reflection_data.uses_argument_buffers(),
1192
libraries[RD::ShaderStage::SHADER_STAGE_VERTEX],
1193
libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]);
1194
shader = rs;
1195
}
1196
1197
shader->push_constants.stages = refl.push_constant_stages;
1198
shader->push_constants.size = refl.push_constant_size;
1199
shader->push_constants.binding = mtl_reflection_data.push_constant_binding;
1200
shader->dynamic_offset_layout = dynamic_offset_layout;
1201
1202
return RDD::ShaderID(shader);
1203
}
1204
1205
void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) {
1206
MDShader *obj = (MDShader *)p_shader.id;
1207
delete obj;
1208
}
1209
1210
void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {
1211
// TODO.
1212
}
1213
1214
/*********************/
1215
/**** UNIFORM SET ****/
1216
/*********************/
1217
1218
RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
1219
//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.
1220
1221
MDShader *shader = (MDShader *)(p_shader.id);
1222
ERR_FAIL_INDEX_V_MSG(p_set_index, shader->sets.size(), UniformSetID(), "Set index out of range");
1223
const UniformSet &shader_set = shader->sets.get(p_set_index);
1224
MDUniformSet *set = memnew(MDUniformSet);
1225
// Determine if there are any dynamic uniforms in this set.
1226
bool is_dynamic = !shader_set.dynamic_uniforms.is_empty();
1227
1228
Vector<uint8_t> arg_buffer_data;
1229
1230
if (device_properties->features.argument_buffers_supported()) {
1231
arg_buffer_data.resize(shader_set.buffer_size);
1232
1233
// If argument buffers are enabled, we have already verified availability, so we can skip the runtime check.
1234
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new")
1235
uint64_t *ptr = (uint64_t *)arg_buffer_data.ptrw();
1236
1237
HashMap<MTL::Resource *, StageResourceUsage, HashMapHasherDefault> bound_resources;
1238
auto add_usage = [&bound_resources](MTL::Resource *res, BitField<RDD::ShaderStage> stage, MTL::ResourceUsage usage) {
1239
StageResourceUsage *sru = bound_resources.getptr(res);
1240
if (sru == nullptr) {
1241
sru = &bound_resources.insert(res, ResourceUnused)->value;
1242
}
1243
if (stage.has_flag(RDD::SHADER_STAGE_VERTEX_BIT)) {
1244
*sru |= stage_resource_usage(RDD::SHADER_STAGE_VERTEX, usage);
1245
}
1246
if (stage.has_flag(RDD::SHADER_STAGE_FRAGMENT_BIT)) {
1247
*sru |= stage_resource_usage(RDD::SHADER_STAGE_FRAGMENT, usage);
1248
}
1249
if (stage.has_flag(RDD::SHADER_STAGE_COMPUTE_BIT)) {
1250
*sru |= stage_resource_usage(RDD::SHADER_STAGE_COMPUTE, usage);
1251
}
1252
};
1253
#define ADD_USAGE(res, stage, usage) \
1254
if (!use_barriers) { \
1255
add_usage(res, stage, usage); \
1256
}
1257
1258
// Ensure the argument buffer exists for this set as some shader pipelines may
1259
// have been generated with argument buffers enabled.
1260
for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
1261
const BoundUniform &uniform = p_uniforms[i];
1262
const UniformInfo &ui = shader_set.uniforms[i];
1263
const UniformInfo::Indexes &idx = ui.arg_buffer;
1264
1265
switch (uniform.type) {
1266
case UNIFORM_TYPE_SAMPLER: {
1267
size_t count = uniform.ids.size();
1268
for (size_t j = 0; j < count; j += 1) {
1269
MTL::SamplerState *sampler = reinterpret_cast<MTL::SamplerState *>(uniform.ids[j].id);
1270
*(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID();
1271
}
1272
} break;
1273
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
1274
uint32_t count = uniform.ids.size() / 2;
1275
for (uint32_t j = 0; j < count; j += 1) {
1276
MTL::SamplerState *sampler = reinterpret_cast<MTL::SamplerState *>(uniform.ids[j * 2 + 0].id);
1277
MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j * 2 + 1].id);
1278
*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();
1279
*(MTL::ResourceID *)(ptr + idx.sampler + j) = sampler->gpuResourceID();
1280
1281
ADD_USAGE(texture, ui.active_stages, ui.usage);
1282
}
1283
} break;
1284
case UNIFORM_TYPE_TEXTURE: {
1285
size_t count = uniform.ids.size();
1286
for (size_t j = 0; j < count; j += 1) {
1287
MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);
1288
*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();
1289
1290
ADD_USAGE(texture, ui.active_stages, ui.usage);
1291
}
1292
} break;
1293
case UNIFORM_TYPE_IMAGE: {
1294
size_t count = uniform.ids.size();
1295
for (size_t j = 0; j < count; j += 1) {
1296
MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);
1297
*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();
1298
ADD_USAGE(texture, ui.active_stages, ui.usage);
1299
1300
if (idx.buffer != UINT32_MAX) {
1301
// Emulated atomic image access.
1302
MTL::Texture *parent = texture->parentTexture();
1303
MTL::Buffer *buffer = (parent ? parent : texture)->buffer();
1304
*(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer->gpuAddress();
1305
1306
ADD_USAGE(buffer, ui.active_stages, ui.usage);
1307
}
1308
}
1309
} break;
1310
case UNIFORM_TYPE_TEXTURE_BUFFER: {
1311
ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
1312
} break;
1313
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
1314
ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
1315
} break;
1316
case UNIFORM_TYPE_IMAGE_BUFFER: {
1317
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
1318
} break;
1319
case UNIFORM_TYPE_STORAGE_BUFFER:
1320
case UNIFORM_TYPE_UNIFORM_BUFFER: {
1321
const BufferInfo *buffer = (const BufferInfo *)uniform.ids[0].id;
1322
*(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress();
1323
1324
ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage);
1325
} break;
1326
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
1327
size_t count = uniform.ids.size();
1328
for (size_t j = 0; j < count; j += 1) {
1329
MTL::Texture *texture = reinterpret_cast<MTL::Texture *>(uniform.ids[j].id);
1330
*(MTL::ResourceID *)(ptr + idx.texture + j) = texture->gpuResourceID();
1331
1332
ADD_USAGE(texture, ui.active_stages, ui.usage);
1333
}
1334
} break;
1335
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
1336
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
1337
// Encode the base GPU address (frame 0); it will be updated at bind time.
1338
const MetalBufferDynamicInfo *buffer = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
1339
*(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.get()->gpuAddress();
1340
1341
ADD_USAGE(buffer->metal_buffer.get(), ui.active_stages, ui.usage);
1342
} break;
1343
default: {
1344
DEV_ASSERT(false);
1345
}
1346
}
1347
}
1348
1349
#undef ADD_USAGE
1350
1351
if (!use_barriers) {
1352
for (KeyValue<MTL::Resource *, StageResourceUsage> const &keyval : bound_resources) {
1353
ResourceVector *resources = set->usage_to_resources.getptr(keyval.value);
1354
if (resources == nullptr) {
1355
resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value;
1356
}
1357
int64_t pos = resources->span().bisect(keyval.key, true);
1358
if (pos == resources->size() || (*resources)[pos] != keyval.key) {
1359
resources->insert(pos, keyval.key);
1360
}
1361
}
1362
}
1363
1364
if (!is_dynamic) {
1365
set->arg_buffer = NS::TransferPtr(device->newBuffer(shader_set.buffer_size, base_hazard_tracking | MTL::ResourceStorageModePrivate));
1366
#if DEV_ENABLED
1367
char label[64];
1368
snprintf(label, sizeof(label), "Uniform Set %u", p_set_index);
1369
set->arg_buffer->setLabel(NS::String::string(label, NS::UTF8StringEncoding));
1370
#endif
1371
_track_resource(set->arg_buffer.get());
1372
_copy_queue_copy_to_buffer(arg_buffer_data, set->arg_buffer.get());
1373
} else {
1374
// Store the arg buffer data for dynamic uniform sets.
1375
// It will be copied and updated at bind time.
1376
set->arg_buffer_data = arg_buffer_data;
1377
}
1378
1379
GODOT_CLANG_WARNING_POP
1380
}
1381
Vector<BoundUniform> bound_uniforms;
1382
bound_uniforms.resize(p_uniforms.size());
1383
for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
1384
bound_uniforms.write[i] = p_uniforms[i];
1385
}
1386
set->uniforms = bound_uniforms;
1387
1388
return UniformSetID(set);
1389
}
1390
1391
void RenderingDeviceDriverMetal::uniform_set_free(UniformSetID p_uniform_set) {
1392
MDUniformSet *obj = (MDUniformSet *)p_uniform_set.id;
1393
if (obj->arg_buffer) {
1394
_untrack_resource(obj->arg_buffer.get());
1395
}
1396
memdelete(obj);
1397
}
1398
1399
uint32_t RenderingDeviceDriverMetal::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
1400
const MDShader *shader = (const MDShader *)p_shader.id;
1401
const DynamicOffsetLayout layout = shader->dynamic_offset_layout;
1402
1403
if (layout.is_empty()) {
1404
return 0u;
1405
}
1406
1407
uint32_t mask = 0u;
1408
1409
for (uint32_t i = 0; i < p_set_count; i++) {
1410
const uint32_t index = p_first_set_index + i;
1411
uint32_t shift = layout.get_offset_index_shift(index);
1412
const uint32_t count = layout.get_count(index);
1413
DEV_ASSERT(shader->sets[index].dynamic_uniforms.size() == count);
1414
if (count == 0) {
1415
continue;
1416
}
1417
1418
const MDUniformSet *usi = (const MDUniformSet *)p_uniform_sets[i].id;
1419
for (uint32_t uniform_index : shader->sets[index].dynamic_uniforms) {
1420
const RDD::BoundUniform &uniform = usi->uniforms[uniform_index];
1421
DEV_ASSERT(uniform.is_dynamic());
1422
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
1423
mask |= buf_info->frame_index() << shift;
1424
shift += 4u;
1425
}
1426
}
1427
1428
return mask;
1429
}
1430
1431
void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
1432
}
1433
1434
#pragma mark - Transfer
1435
1436
void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
1437
MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);
1438
cmd->clear_buffer(p_buffer, p_offset, p_size);
1439
}
1440
1441
void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {
1442
MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);
1443
cmd->copy_buffer(p_src_buffer, p_dst_buffer, p_regions);
1444
}
1445
1446
void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {
1447
MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);
1448
cmd->copy_texture(p_src_texture, p_dst_texture, p_regions);
1449
}
1450
1451
void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
1452
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1453
cb->resolve_texture(p_src_texture, p_src_texture_layout, p_src_layer, p_src_mipmap, p_dst_texture, p_dst_texture_layout, p_dst_layer, p_dst_mipmap);
1454
}
1455
1456
void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {
1457
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1458
cb->clear_color_texture(p_texture, p_texture_layout, p_color, p_subresources);
1459
}
1460
1461
void RenderingDeviceDriverMetal::command_clear_depth_stencil_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const TextureSubresourceRange &p_subresources) {
1462
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1463
cb->clear_depth_stencil_texture(p_texture, p_texture_layout, p_depth, p_stencil, p_subresources);
1464
}
1465
1466
void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {
1467
MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);
1468
cmd->copy_buffer_to_texture(p_src_buffer, p_dst_texture, p_regions);
1469
}
1470
1471
void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {
1472
MDCommandBufferBase *cmd = (MDCommandBufferBase *)(p_cmd_buffer.id);
1473
cmd->copy_texture_to_buffer(p_src_texture, p_dst_buffer, p_regions);
1474
}
1475
1476
#pragma mark - Pipeline
1477
1478
void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
1479
MDPipeline *obj = (MDPipeline *)(p_pipeline_id.id);
1480
delete obj;
1481
}
1482
1483
// ----- BINDING -----
1484
1485
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
1486
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1487
cb->encode_push_constant_data(p_shader, p_data);
1488
}
1489
1490
// ----- CACHE -----
1491
1492
String RenderingDeviceDriverMetal::_pipeline_get_cache_path() const {
1493
String path = OS::get_singleton()->get_user_data_dir() + "/metal/pipelines";
1494
path += "." + context_device.name.validate_filename().replace_char(' ', '_').to_lower();
1495
if (Engine::get_singleton()->is_editor_hint()) {
1496
path += ".editor";
1497
}
1498
path += ".cache";
1499
1500
return path;
1501
}
1502
1503
bool RenderingDeviceDriverMetal::pipeline_cache_create(const Vector<uint8_t> &p_data) {
1504
return false;
1505
// TODO: Convert to metal-cpp when pipeline caching is re-enabled
1506
// CharString path = _pipeline_get_cache_path().utf8();
1507
// NS::SharedPtr<MTL::BinaryArchiveDescriptor> desc = NS::TransferPtr(MTL::BinaryArchiveDescriptor::alloc()->init());
1508
// NS::Error *error = nullptr;
1509
// archive = NS::TransferPtr(device->newBinaryArchive(desc.get(), &error));
1510
// return true;
1511
}
1512
1513
void RenderingDeviceDriverMetal::pipeline_cache_free() {
1514
archive = nullptr;
1515
}
1516
1517
size_t RenderingDeviceDriverMetal::pipeline_cache_query_size() {
1518
return archive_count * 1024;
1519
}
1520
1521
Vector<uint8_t> RenderingDeviceDriverMetal::pipeline_cache_serialize() {
1522
if (!archive) {
1523
return Vector<uint8_t>();
1524
}
1525
1526
// TODO: Convert to metal-cpp when pipeline caching is re-enabled
1527
// CharString path = _pipeline_get_cache_path().utf8();
1528
// NS::URL *target = NS::URL::fileURLWithPath(NS::String::string(path.get_data(), NS::UTF8StringEncoding));
1529
// NS::Error *error = nullptr;
1530
// if (archive->serializeToURL(target, &error)) {
1531
// return Vector<uint8_t>();
1532
// } else {
1533
// print_line(error->localizedDescription()->utf8String());
1534
// return Vector<uint8_t>();
1535
// }
1536
return Vector<uint8_t>();
1537
}
1538
1539
#pragma mark - Rendering
1540
1541
// ----- SUBPASS -----
1542
1543
RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) {
1544
PixelFormats &pf = *pixel_formats;
1545
1546
size_t subpass_count = p_subpasses.size();
1547
1548
Vector<MDSubpass> subpasses;
1549
subpasses.resize(subpass_count);
1550
for (uint32_t i = 0; i < subpass_count; i++) {
1551
MDSubpass &subpass = subpasses.write[i];
1552
subpass.subpass_index = i;
1553
subpass.view_count = p_view_count;
1554
subpass.input_references = p_subpasses[i].input_references;
1555
subpass.color_references = p_subpasses[i].color_references;
1556
subpass.depth_stencil_reference = p_subpasses[i].depth_stencil_reference;
1557
subpass.resolve_references = p_subpasses[i].resolve_references;
1558
}
1559
1560
static const MTL::LoadAction LOAD_ACTIONS[] = {
1561
[ATTACHMENT_LOAD_OP_LOAD] = MTL::LoadActionLoad,
1562
[ATTACHMENT_LOAD_OP_CLEAR] = MTL::LoadActionClear,
1563
[ATTACHMENT_LOAD_OP_DONT_CARE] = MTL::LoadActionDontCare,
1564
};
1565
1566
static const MTL::StoreAction STORE_ACTIONS[] = {
1567
[ATTACHMENT_STORE_OP_STORE] = MTL::StoreActionStore,
1568
[ATTACHMENT_STORE_OP_DONT_CARE] = MTL::StoreActionDontCare,
1569
};
1570
1571
Vector<MDAttachment> attachments;
1572
attachments.resize(p_attachments.size());
1573
1574
for (uint32_t i = 0; i < p_attachments.size(); i++) {
1575
Attachment const &a = p_attachments[i];
1576
MDAttachment &mda = attachments.write[i];
1577
MTL::PixelFormat format = pf.getMTLPixelFormat(a.format);
1578
mda.format = format;
1579
if (a.samples > TEXTURE_SAMPLES_1) {
1580
mda.samples = (*device_properties).find_nearest_supported_sample_count(a.samples);
1581
}
1582
mda.loadAction = LOAD_ACTIONS[a.load_op];
1583
mda.storeAction = STORE_ACTIONS[a.store_op];
1584
bool is_depth = pf.isDepthFormat(format);
1585
if (is_depth) {
1586
mda.type |= MDAttachmentType::Depth;
1587
}
1588
bool is_stencil = pf.isStencilFormat(format);
1589
if (is_stencil) {
1590
mda.type |= MDAttachmentType::Stencil;
1591
mda.stencilLoadAction = LOAD_ACTIONS[a.stencil_load_op];
1592
mda.stencilStoreAction = STORE_ACTIONS[a.stencil_store_op];
1593
}
1594
if (!is_depth && !is_stencil) {
1595
mda.type |= MDAttachmentType::Color;
1596
}
1597
}
1598
MDRenderPass *obj = memnew(MDRenderPass(attachments, subpasses));
1599
return RenderPassID(obj);
1600
}
1601
1602
void RenderingDeviceDriverMetal::render_pass_free(RenderPassID p_render_pass) {
1603
MDRenderPass *obj = (MDRenderPass *)(p_render_pass.id);
1604
memdelete(obj);
1605
}
1606
1607
// ----- COMMANDS -----
1608
1609
void RenderingDeviceDriverMetal::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) {
1610
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1611
cb->render_begin_pass(p_render_pass, p_framebuffer, p_cmd_buffer_type, p_rect, p_clear_values);
1612
}
1613
1614
void RenderingDeviceDriverMetal::command_end_render_pass(CommandBufferID p_cmd_buffer) {
1615
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1616
cb->render_end_pass();
1617
}
1618
1619
void RenderingDeviceDriverMetal::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {
1620
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1621
cb->render_next_subpass();
1622
}
1623
1624
void RenderingDeviceDriverMetal::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) {
1625
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1626
cb->render_set_viewport(p_viewports);
1627
}
1628
1629
void RenderingDeviceDriverMetal::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) {
1630
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1631
cb->render_set_scissor(p_scissors);
1632
}
1633
1634
void RenderingDeviceDriverMetal::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
1635
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1636
cb->render_clear_attachments(p_attachment_clears, p_rects);
1637
}
1638
1639
void RenderingDeviceDriverMetal::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
1640
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1641
cb->bind_pipeline(p_pipeline);
1642
}
1643
1644
void RenderingDeviceDriverMetal::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
1645
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1646
cb->render_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);
1647
}
1648
1649
void RenderingDeviceDriverMetal::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
1650
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1651
cb->render_draw(p_vertex_count, p_instance_count, p_base_vertex, p_first_instance);
1652
}
1653
1654
void RenderingDeviceDriverMetal::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) {
1655
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1656
cb->render_draw_indexed(p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance);
1657
}
1658
1659
void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
1660
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1661
cb->render_draw_indexed_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);
1662
}
1663
1664
void RenderingDeviceDriverMetal::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
1665
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1666
cb->render_draw_indexed_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
1667
}
1668
1669
void RenderingDeviceDriverMetal::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
1670
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1671
cb->render_draw_indirect(p_indirect_buffer, p_offset, p_draw_count, p_stride);
1672
}
1673
1674
void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
1675
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1676
cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
1677
}
1678
1679
void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
1680
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1681
cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets, p_dynamic_offsets);
1682
}
1683
1684
void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {
1685
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1686
cb->render_bind_index_buffer(p_buffer, p_format, p_offset);
1687
}
1688
1689
void RenderingDeviceDriverMetal::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) {
1690
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
1691
cb->render_set_blend_constants(p_constants);
1692
}
1693
1694
void RenderingDeviceDriverMetal::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) {
1695
if (!Math::is_equal_approx(p_width, 1.0f)) {
1696
ERR_FAIL_MSG("Setting line widths other than 1.0 is not supported by the Metal rendering driver.");
1697
}
1698
}
1699
1700
// ----- PIPELINE -----
1701
1702
RenderingDeviceDriverMetal::Result<NS::SharedPtr<MTL::Function>> RenderingDeviceDriverMetal::_create_function(MDLibrary *p_library, NS::String *p_name, VectorView<PipelineSpecializationConstant> &p_specialization_constants) {
1703
MTL::Library *library = p_library->get_library();
1704
if (!library) {
1705
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to compile Metal library");
1706
}
1707
1708
MTL::Function *function = library->newFunction(p_name);
1709
ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, "No function named main0");
1710
1711
NS::Dictionary *constants_dict = function->functionConstantsDictionary();
1712
if (constants_dict->count() == 0) {
1713
return NS::TransferPtr(function);
1714
}
1715
1716
LocalVector<MTL::FunctionConstant *> constants;
1717
NS::Enumerator<NS::String> *keys = constants_dict->keyEnumerator<NS::String>();
1718
while (NS::String *key = keys->nextObject()) {
1719
constants.push_back(constants_dict->object<MTL::FunctionConstant>(key));
1720
}
1721
1722
// Check if already sorted by index.
1723
bool is_sorted = true;
1724
for (NS::UInteger i = 1; i < constants.size(); i++) {
1725
MTL::FunctionConstant *prev = constants[i - 1];
1726
MTL::FunctionConstant *curr = constants[i];
1727
if (prev->index() > curr->index()) {
1728
is_sorted = false;
1729
break;
1730
}
1731
}
1732
1733
if (!is_sorted) {
1734
struct Comparator {
1735
bool operator()(const MTL::FunctionConstant *p, const MTL::FunctionConstant *q) const {
1736
return p->index() < q->index();
1737
}
1738
};
1739
1740
constants.sort_custom<Comparator>();
1741
}
1742
1743
// Build a sorted list of specialization constants by constant_id.
1744
uint32_t *indexes = (uint32_t *)alloca(p_specialization_constants.size() * sizeof(uint32_t));
1745
for (uint32_t i = 0; i < p_specialization_constants.size(); i++) {
1746
indexes[i] = i;
1747
}
1748
std::sort(indexes, &indexes[p_specialization_constants.size()], [&](int a, int b) {
1749
return p_specialization_constants[a].constant_id < p_specialization_constants[b].constant_id;
1750
});
1751
1752
NS::SharedPtr<MTL::FunctionConstantValues> constantValues = NS::TransferPtr(MTL::FunctionConstantValues::alloc()->init());
1753
1754
// Merge the sorted constants from the function with the sorted user constants.
1755
NS::UInteger i = 0;
1756
uint32_t j = 0;
1757
while (i < constants.size() && j < p_specialization_constants.size()) {
1758
MTL::FunctionConstant *curr = (MTL::FunctionConstant *)constants[i];
1759
PipelineSpecializationConstant const &sc = p_specialization_constants[indexes[j]];
1760
if (curr->index() == sc.constant_id) {
1761
switch (curr->type()) {
1762
case MTL::DataTypeBool:
1763
case MTL::DataTypeFloat:
1764
case MTL::DataTypeInt:
1765
case MTL::DataTypeUInt: {
1766
constantValues->setConstantValue(&sc.int_value, curr->type(), sc.constant_id);
1767
} break;
1768
default:
1769
ERR_FAIL_V_MSG(NS::TransferPtr(function), "Invalid specialization constant type");
1770
}
1771
i++;
1772
j++;
1773
} else if (curr->index() < sc.constant_id) {
1774
i++;
1775
} else {
1776
j++;
1777
}
1778
}
1779
1780
// Handle R32UI_ALIGNMENT_CONSTANT_ID if present.
1781
if (i < constants.size()) {
1782
MTL::FunctionConstant *curr = constants[i];
1783
if (curr->index() == R32UI_ALIGNMENT_CONSTANT_ID) {
1784
uint32_t alignment = 16; // TODO(sgc): is this always correct?
1785
constantValues->setConstantValue(&alignment, curr->type(), curr->index());
1786
i++;
1787
}
1788
}
1789
1790
NS::Error *err = nullptr;
1791
function->release();
1792
function = library->newFunction(p_name, constantValues.get(), &err);
1793
ERR_FAIL_NULL_V_MSG(function, ERR_CANT_CREATE, String("specialized function failed: ") + (err ? err->localizedDescription()->utf8String() : "unknown error"));
1794
1795
return NS::TransferPtr(function);
1796
}
1797
1798
// RDD::PolygonCullMode == MTL::CullMode.
1799
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, MTL::CullModeNone));
1800
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, MTL::CullModeFront));
1801
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, MTL::CullModeBack));
1802
1803
// RDD::StencilOperation == MTL::StencilOperation.
1804
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, MTL::StencilOperationKeep));
1805
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, MTL::StencilOperationZero));
1806
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, MTL::StencilOperationReplace));
1807
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, MTL::StencilOperationIncrementClamp));
1808
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, MTL::StencilOperationDecrementClamp));
1809
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, MTL::StencilOperationInvert));
1810
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, MTL::StencilOperationIncrementWrap));
1811
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, MTL::StencilOperationDecrementWrap));
1812
1813
// RDD::BlendOperation == MTL::BlendOperation.
1814
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, MTL::BlendOperationAdd));
1815
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, MTL::BlendOperationSubtract));
1816
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, MTL::BlendOperationReverseSubtract));
1817
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, MTL::BlendOperationMin));
1818
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, MTL::BlendOperationMax));
1819
1820
RDD::PipelineID RenderingDeviceDriverMetal::render_pipeline_create(
1821
ShaderID p_shader,
1822
VertexFormatID p_vertex_format,
1823
RenderPrimitive p_render_primitive,
1824
PipelineRasterizationState p_rasterization_state,
1825
PipelineMultisampleState p_multisample_state,
1826
PipelineDepthStencilState p_depth_stencil_state,
1827
PipelineColorBlendState p_blend_state,
1828
VectorView<int32_t> p_color_attachments,
1829
BitField<PipelineDynamicStateFlags> p_dynamic_state,
1830
RenderPassID p_render_pass,
1831
uint32_t p_render_subpass,
1832
VectorView<PipelineSpecializationConstant> p_specialization_constants) {
1833
MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
1834
MTL::VertexDescriptor *vert_desc = reinterpret_cast<MTL::VertexDescriptor *>(p_vertex_format.id);
1835
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
1836
1837
os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);
1838
os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "render_pipeline_create", "shader_name=%{public}s", shader->name.get_data());
1839
DEFER([=]() {
1840
os_signpost_interval_end(LOG_INTERVALS, reflect_id, "render_pipeline_create");
1841
});
1842
1843
os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");
1844
1845
NS::SharedPtr<MTL::RenderPipelineDescriptor> desc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());
1846
1847
{
1848
MDSubpass const &subpass = pass->subpasses[p_render_subpass];
1849
for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
1850
uint32_t attachment = subpass.color_references[i].attachment;
1851
if (attachment != AttachmentReference::UNUSED) {
1852
MDAttachment const &a = pass->attachments[attachment];
1853
desc->colorAttachments()->object(i)->setPixelFormat(a.format);
1854
}
1855
}
1856
1857
if (subpass.depth_stencil_reference.attachment != AttachmentReference::UNUSED) {
1858
uint32_t attachment = subpass.depth_stencil_reference.attachment;
1859
MDAttachment const &a = pass->attachments[attachment];
1860
1861
if (a.type & MDAttachmentType::Depth) {
1862
desc->setDepthAttachmentPixelFormat(a.format);
1863
}
1864
1865
if (a.type & MDAttachmentType::Stencil) {
1866
desc->setStencilAttachmentPixelFormat(a.format);
1867
}
1868
}
1869
}
1870
1871
desc->setVertexDescriptor(vert_desc);
1872
desc->setLabel(conv::to_nsstring(shader->name));
1873
1874
if (shader->uses_argument_buffers) {
1875
// Set mutability of argument buffers.
1876
for (uint32_t i = 0; i < shader->sets.size(); i++) {
1877
const UniformSet &set = shader->sets[i];
1878
const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable;
1879
desc->vertexBuffers()->object(i)->setMutability(mutability);
1880
desc->fragmentBuffers()->object(i)->setMutability(mutability);
1881
}
1882
}
1883
1884
// Input assembly & tessellation.
1885
1886
MDRenderPipeline *pipeline = new MDRenderPipeline();
1887
1888
switch (p_render_primitive) {
1889
case RENDER_PRIMITIVE_POINTS:
1890
desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassPoint);
1891
break;
1892
case RENDER_PRIMITIVE_LINES:
1893
case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:
1894
case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:
1895
case RENDER_PRIMITIVE_LINESTRIPS:
1896
desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassLine);
1897
break;
1898
case RENDER_PRIMITIVE_TRIANGLES:
1899
case RENDER_PRIMITIVE_TRIANGLE_STRIPS:
1900
case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:
1901
case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:
1902
case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:
1903
desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);
1904
break;
1905
case RENDER_PRIMITIVE_TESSELATION_PATCH:
1906
desc->setMaxTessellationFactor(p_rasterization_state.patch_control_points);
1907
desc->setTessellationPartitionMode(MTL::TessellationPartitionModeInteger);
1908
ERR_FAIL_V_MSG(PipelineID(), "tessellation not implemented");
1909
break;
1910
case RENDER_PRIMITIVE_MAX:
1911
default:
1912
desc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassUnspecified);
1913
break;
1914
}
1915
1916
switch (p_render_primitive) {
1917
case RENDER_PRIMITIVE_POINTS:
1918
pipeline->raster_state.render_primitive = MTL::PrimitiveTypePoint;
1919
break;
1920
case RENDER_PRIMITIVE_LINES:
1921
case RENDER_PRIMITIVE_LINES_WITH_ADJACENCY:
1922
pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLine;
1923
break;
1924
case RENDER_PRIMITIVE_LINESTRIPS:
1925
case RENDER_PRIMITIVE_LINESTRIPS_WITH_ADJACENCY:
1926
pipeline->raster_state.render_primitive = MTL::PrimitiveTypeLineStrip;
1927
break;
1928
case RENDER_PRIMITIVE_TRIANGLES:
1929
case RENDER_PRIMITIVE_TRIANGLES_WITH_ADJACENCY:
1930
pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangle;
1931
break;
1932
case RENDER_PRIMITIVE_TRIANGLE_STRIPS:
1933
case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_AJACENCY:
1934
case RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX:
1935
pipeline->raster_state.render_primitive = MTL::PrimitiveTypeTriangleStrip;
1936
break;
1937
default:
1938
break;
1939
}
1940
1941
// Rasterization.
1942
desc->setRasterizationEnabled(!p_rasterization_state.discard_primitives);
1943
pipeline->raster_state.clip_mode = p_rasterization_state.enable_depth_clamp ? MTL::DepthClipModeClamp : MTL::DepthClipModeClip;
1944
pipeline->raster_state.fill_mode = p_rasterization_state.wireframe ? MTL::TriangleFillModeLines : MTL::TriangleFillModeFill;
1945
1946
static const MTL::CullMode CULL_MODE[3] = {
1947
MTL::CullModeNone,
1948
MTL::CullModeFront,
1949
MTL::CullModeBack,
1950
};
1951
pipeline->raster_state.cull_mode = CULL_MODE[p_rasterization_state.cull_mode];
1952
pipeline->raster_state.winding = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE) ? MTL::WindingClockwise : MTL::WindingCounterClockwise;
1953
pipeline->raster_state.depth_bias.enabled = p_rasterization_state.depth_bias_enabled;
1954
pipeline->raster_state.depth_bias.depth_bias = p_rasterization_state.depth_bias_constant_factor;
1955
pipeline->raster_state.depth_bias.slope_scale = p_rasterization_state.depth_bias_slope_factor;
1956
pipeline->raster_state.depth_bias.clamp = p_rasterization_state.depth_bias_clamp;
1957
// In Metal there is no line width.
1958
if (!Math::is_equal_approx(p_rasterization_state.line_width, 1.0f)) {
1959
WARN_PRINT("unsupported: line width");
1960
}
1961
1962
// Multisample.
1963
if (p_multisample_state.enable_sample_shading) {
1964
WARN_PRINT("unsupported: multi-sample shading");
1965
}
1966
1967
if (p_multisample_state.sample_count > TEXTURE_SAMPLES_1) {
1968
pipeline->sample_count = (*device_properties).find_nearest_supported_sample_count(p_multisample_state.sample_count);
1969
}
1970
desc->setRasterSampleCount(static_cast<NS::UInteger>(pipeline->sample_count));
1971
desc->setAlphaToCoverageEnabled(p_multisample_state.enable_alpha_to_coverage);
1972
desc->setAlphaToOneEnabled(p_multisample_state.enable_alpha_to_one);
1973
1974
// Depth buffer.
1975
bool depth_enabled = p_depth_stencil_state.enable_depth_test && desc->depthAttachmentPixelFormat() != MTL::PixelFormatInvalid;
1976
bool stencil_enabled = p_depth_stencil_state.enable_stencil && desc->stencilAttachmentPixelFormat() != MTL::PixelFormatInvalid;
1977
1978
if (depth_enabled || stencil_enabled) {
1979
NS::SharedPtr<MTL::DepthStencilDescriptor> ds_desc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init());
1980
1981
pipeline->raster_state.depth_test.enabled = depth_enabled;
1982
ds_desc->setDepthWriteEnabled(p_depth_stencil_state.enable_depth_write);
1983
ds_desc->setDepthCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.depth_compare_operator]);
1984
if (p_depth_stencil_state.enable_depth_range) {
1985
WARN_PRINT("unsupported: depth range");
1986
}
1987
1988
if (stencil_enabled) {
1989
pipeline->raster_state.stencil.enabled = true;
1990
pipeline->raster_state.stencil.front_reference = p_depth_stencil_state.front_op.reference;
1991
pipeline->raster_state.stencil.back_reference = p_depth_stencil_state.back_op.reference;
1992
1993
{
1994
// Front.
1995
NS::SharedPtr<MTL::StencilDescriptor> sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());
1996
sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.fail]);
1997
sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.pass]);
1998
sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.front_op.depth_fail]);
1999
sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.front_op.compare]);
2000
sd->setReadMask(p_depth_stencil_state.front_op.compare_mask);
2001
sd->setWriteMask(p_depth_stencil_state.front_op.write_mask);
2002
ds_desc->setFrontFaceStencil(sd.get());
2003
}
2004
{
2005
// Back.
2006
NS::SharedPtr<MTL::StencilDescriptor> sd = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());
2007
sd->setStencilFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.fail]);
2008
sd->setDepthStencilPassOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.pass]);
2009
sd->setDepthFailureOperation(STENCIL_OPERATIONS[p_depth_stencil_state.back_op.depth_fail]);
2010
sd->setStencilCompareFunction(COMPARE_OPERATORS[p_depth_stencil_state.back_op.compare]);
2011
sd->setReadMask(p_depth_stencil_state.back_op.compare_mask);
2012
sd->setWriteMask(p_depth_stencil_state.back_op.write_mask);
2013
ds_desc->setBackFaceStencil(sd.get());
2014
}
2015
}
2016
2017
pipeline->depth_stencil = NS::TransferPtr(device->newDepthStencilState(ds_desc.get()));
2018
ERR_FAIL_COND_V_MSG(!pipeline->depth_stencil, PipelineID(), "Failed to create depth stencil state");
2019
} else {
2020
// TODO(sgc): FB13671991 raised as Apple docs state calling setDepthStencilState:nil is valid, but currently generates an exception
2021
pipeline->depth_stencil = NS::RetainPtr(get_resource_cache().get_depth_stencil_state(false, false));
2022
}
2023
2024
// Blend state.
2025
{
2026
for (uint32_t i = 0; i < p_color_attachments.size(); i++) {
2027
if (p_color_attachments[i] == ATTACHMENT_UNUSED) {
2028
continue;
2029
}
2030
2031
const PipelineColorBlendState::Attachment &bs = p_blend_state.attachments[i];
2032
2033
MTL::RenderPipelineColorAttachmentDescriptor *ca_desc = desc->colorAttachments()->object(p_color_attachments[i]);
2034
ca_desc->setBlendingEnabled(bs.enable_blend);
2035
2036
ca_desc->setSourceRGBBlendFactor(BLEND_FACTORS[bs.src_color_blend_factor]);
2037
ca_desc->setDestinationRGBBlendFactor(BLEND_FACTORS[bs.dst_color_blend_factor]);
2038
ca_desc->setRgbBlendOperation(BLEND_OPERATIONS[bs.color_blend_op]);
2039
2040
ca_desc->setSourceAlphaBlendFactor(BLEND_FACTORS[bs.src_alpha_blend_factor]);
2041
ca_desc->setDestinationAlphaBlendFactor(BLEND_FACTORS[bs.dst_alpha_blend_factor]);
2042
ca_desc->setAlphaBlendOperation(BLEND_OPERATIONS[bs.alpha_blend_op]);
2043
2044
MTL::ColorWriteMask writeMask = MTL::ColorWriteMaskNone;
2045
if (bs.write_r) {
2046
writeMask |= MTL::ColorWriteMaskRed;
2047
}
2048
if (bs.write_g) {
2049
writeMask |= MTL::ColorWriteMaskGreen;
2050
}
2051
if (bs.write_b) {
2052
writeMask |= MTL::ColorWriteMaskBlue;
2053
}
2054
if (bs.write_a) {
2055
writeMask |= MTL::ColorWriteMaskAlpha;
2056
}
2057
ca_desc->setWriteMask(writeMask);
2058
}
2059
2060
pipeline->raster_state.blend.r = p_blend_state.blend_constant.r;
2061
pipeline->raster_state.blend.g = p_blend_state.blend_constant.g;
2062
pipeline->raster_state.blend.b = p_blend_state.blend_constant.b;
2063
pipeline->raster_state.blend.a = p_blend_state.blend_constant.a;
2064
}
2065
2066
// Dynamic state.
2067
2068
if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) {
2069
pipeline->raster_state.depth_bias.enabled = true;
2070
}
2071
2072
if (p_dynamic_state.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) {
2073
pipeline->raster_state.blend.enabled = true;
2074
}
2075
2076
if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) {
2077
// TODO(sgc): ??
2078
}
2079
2080
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2081
// TODO(sgc): ??
2082
}
2083
2084
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2085
// TODO(sgc): ??
2086
}
2087
2088
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) {
2089
pipeline->raster_state.stencil.enabled = true;
2090
}
2091
2092
if (shader->vert) {
2093
Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->vert.get(), MTLSTR("main0"), p_specialization_constants);
2094
ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
2095
desc->setVertexFunction(std::get<NS::SharedPtr<MTL::Function>>(function_or_err).get());
2096
}
2097
2098
if (shader->frag) {
2099
Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->frag.get(), MTLSTR("main0"), p_specialization_constants);
2100
ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
2101
desc->setFragmentFunction(std::get<NS::SharedPtr<MTL::Function>>(function_or_err).get());
2102
}
2103
2104
MTL::PipelineOption options = MTL::PipelineOptionNone;
2105
MTL::BinaryArchive *arc = archive.get();
2106
if (arc) {
2107
NS::SharedPtr<NS::Array> archives = NS::TransferPtr(NS::Array::array(reinterpret_cast<NS::Object *const *>(&arc), 1)->retain());
2108
desc->setBinaryArchives(archives.get());
2109
if (archive_fail_on_miss) {
2110
options |= MTL::PipelineOptionFailOnBinaryArchiveMiss;
2111
}
2112
}
2113
2114
NS::Error *error = nullptr;
2115
pipeline->state = NS::TransferPtr(device->newRenderPipelineState(desc.get(), options, nullptr, &error));
2116
pipeline->shader = shader;
2117
2118
ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String());
2119
ERR_FAIL_COND_V_MSG(!pipeline->state, PipelineID(), "Failed to create render pipeline state");
2120
2121
if (arc) {
2122
if (arc->addRenderPipelineFunctions(desc.get(), &error)) {
2123
archive_count += 1;
2124
} else {
2125
print_error(error->localizedDescription()->utf8String());
2126
}
2127
}
2128
2129
return PipelineID(pipeline);
2130
}
2131
2132
#pragma mark - Compute
2133
2134
// ----- COMMANDS -----
2135
2136
void RenderingDeviceDriverMetal::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
2137
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2138
cb->bind_pipeline(p_pipeline);
2139
}
2140
2141
void RenderingDeviceDriverMetal::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
2142
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2143
cb->compute_bind_uniform_sets(p_uniform_sets, p_shader, p_first_set_index, p_set_count, p_dynamic_offsets);
2144
}
2145
2146
void RenderingDeviceDriverMetal::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
2147
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2148
cb->compute_dispatch(p_x_groups, p_y_groups, p_z_groups);
2149
}
2150
2151
void RenderingDeviceDriverMetal::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {
2152
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2153
cb->compute_dispatch_indirect(p_indirect_buffer, p_offset);
2154
}
2155
2156
// ----- PIPELINE -----
2157
2158
RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {
2159
MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
2160
2161
os_signpost_id_t reflect_id = os_signpost_id_make_with_pointer(LOG_INTERVALS, shader);
2162
os_signpost_interval_begin(LOG_INTERVALS, reflect_id, "compute_pipeline_create", "shader_name=%{public}s", shader->name.get_data());
2163
DEFER([=]() {
2164
os_signpost_interval_end(LOG_INTERVALS, reflect_id, "compute_pipeline_create");
2165
});
2166
2167
os_signpost_event_emit(LOG_DRIVER, OS_SIGNPOST_ID_EXCLUSIVE, "create_pipeline");
2168
2169
Result<NS::SharedPtr<MTL::Function>> function_or_err = _create_function(shader->kernel.get(), MTLSTR("main0"), p_specialization_constants);
2170
ERR_FAIL_COND_V(std::holds_alternative<Error>(function_or_err), PipelineID());
2171
NS::SharedPtr<MTL::Function> function = std::get<NS::SharedPtr<MTL::Function>>(function_or_err);
2172
2173
NS::SharedPtr<MTL::ComputePipelineDescriptor> desc = NS::TransferPtr(MTL::ComputePipelineDescriptor::alloc()->init());
2174
desc->setComputeFunction(function.get());
2175
desc->setLabel(conv::to_nsstring(shader->name));
2176
2177
if (shader->uses_argument_buffers) {
2178
// Set mutability of argument buffers.
2179
for (uint32_t i = 0; i < shader->sets.size(); i++) {
2180
const UniformSet &set = shader->sets[i];
2181
const MTL::Mutability mutability = set.dynamic_uniforms.is_empty() ? MTL::MutabilityImmutable : MTL::MutabilityMutable;
2182
desc->buffers()->object(i)->setMutability(mutability);
2183
}
2184
}
2185
2186
MTL::PipelineOption options = MTL::PipelineOptionNone;
2187
MTL::BinaryArchive *arc = archive.get();
2188
if (arc) {
2189
NS::SharedPtr<NS::Array> archives = NS::TransferPtr(NS::Array::array(reinterpret_cast<NS::Object *const *>(&arc), 1)->retain());
2190
desc->setBinaryArchives(archives.get());
2191
if (archive_fail_on_miss) {
2192
options |= MTL::PipelineOptionFailOnBinaryArchiveMiss;
2193
}
2194
}
2195
2196
NS::Error *error = nullptr;
2197
NS::SharedPtr<MTL::ComputePipelineState> state = NS::TransferPtr(device->newComputePipelineState(desc.get(), options, nullptr, &error));
2198
ERR_FAIL_COND_V_MSG(error != nullptr, PipelineID(), String("error creating pipeline: ") + error->localizedDescription()->utf8String());
2199
ERR_FAIL_COND_V_MSG(!state, PipelineID(), "Failed to create compute pipeline state");
2200
2201
MDComputePipeline *pipeline = new MDComputePipeline(state);
2202
pipeline->compute_state.local = shader->local;
2203
pipeline->shader = shader;
2204
2205
if (arc) {
2206
if (arc->addComputePipelineFunctions(desc.get(), &error)) {
2207
archive_count += 1;
2208
} else {
2209
print_error(error->localizedDescription()->utf8String());
2210
}
2211
}
2212
2213
return PipelineID(pipeline);
2214
}
2215
2216
#pragma mark - Raytracing
2217
2218
// ----- ACCELERATION STRUCTURE -----
2219
2220
RDD::AccelerationStructureID RenderingDeviceDriverMetal::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_coun, BitField<AccelerationStructureGeometryBits> p_geometry_bits) {
2221
ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver.");
2222
}
2223
2224
uint32_t RenderingDeviceDriverMetal::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) {
2225
ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver.");
2226
}
2227
2228
void RenderingDeviceDriverMetal::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView<AccelerationStructureID> p_blases, VectorView<Transform3D> p_transforms) {
2229
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2230
}
2231
2232
RDD::AccelerationStructureID RenderingDeviceDriverMetal::tlas_create(BufferID p_instance_buffer) {
2233
ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver.");
2234
}
2235
2236
void RenderingDeviceDriverMetal::acceleration_structure_free(RDD::AccelerationStructureID p_acceleration_structure) {
2237
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2238
}
2239
2240
uint32_t RenderingDeviceDriverMetal::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) {
2241
ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver.");
2242
}
2243
2244
// ----- PIPELINE -----
2245
2246
RDD::RaytracingPipelineID RenderingDeviceDriverMetal::raytracing_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {
2247
ERR_FAIL_V_MSG(RaytracingPipelineID(), "Ray tracing is not currently supported by the Metal driver.");
2248
}
2249
2250
void RenderingDeviceDriverMetal::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) {
2251
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2252
}
2253
2254
// ----- COMMANDS -----
2255
2256
void RenderingDeviceDriverMetal::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) {
2257
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2258
}
2259
2260
void RenderingDeviceDriverMetal::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) {
2261
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2262
}
2263
2264
void RenderingDeviceDriverMetal::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
2265
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2266
}
2267
2268
void RenderingDeviceDriverMetal::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) {
2269
ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver.");
2270
}
2271
2272
#pragma mark - Queries
2273
2274
// ----- TIMESTAMP -----
2275
2276
RDD::QueryPoolID RenderingDeviceDriverMetal::timestamp_query_pool_create(uint32_t p_query_count) {
2277
return QueryPoolID(1);
2278
}
2279
2280
void RenderingDeviceDriverMetal::timestamp_query_pool_free(QueryPoolID p_pool_id) {
2281
}
2282
2283
void RenderingDeviceDriverMetal::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {
2284
// Metal doesn't support timestamp queries, so we just clear the buffer.
2285
bzero(r_results, p_query_count * sizeof(uint64_t));
2286
}
2287
2288
uint64_t RenderingDeviceDriverMetal::timestamp_query_result_to_time(uint64_t p_result) {
2289
return p_result;
2290
}
2291
2292
void RenderingDeviceDriverMetal::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) {
2293
}
2294
2295
void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) {
2296
}
2297
2298
#pragma mark - Labels
2299
2300
void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {
2301
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2302
cb->begin_label(p_label_name, p_color);
2303
}
2304
2305
void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) {
2306
MDCommandBufferBase *cb = (MDCommandBufferBase *)(p_cmd_buffer.id);
2307
cb->end_label();
2308
}
2309
2310
#pragma mark - Debug
2311
2312
void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {
2313
// TODO: Implement.
2314
}
2315
2316
#pragma mark - Submission
2317
2318
void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {
2319
_frame_index = p_frame_index;
2320
_frames_drawn = p_frames_drawn;
2321
}
2322
2323
void RenderingDeviceDriverMetal::end_segment() {
2324
MutexLock lock(copy_queue_mutex);
2325
_copy_queue_flush();
2326
}
2327
2328
#pragma mark - Misc
2329
2330
void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) {
2331
NS::String *label = conv::to_nsstring(p_name);
2332
2333
switch (p_type) {
2334
case OBJECT_TYPE_TEXTURE: {
2335
MTL::Texture *tex = reinterpret_cast<MTL::Texture *>(p_driver_id.id);
2336
tex->setLabel(label);
2337
} break;
2338
case OBJECT_TYPE_SAMPLER: {
2339
// Can't set label after creation.
2340
} break;
2341
case OBJECT_TYPE_BUFFER: {
2342
const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id;
2343
buf_info->metal_buffer.get()->setLabel(label);
2344
} break;
2345
case OBJECT_TYPE_SHADER: {
2346
MDShader *shader = (MDShader *)(p_driver_id.id);
2347
if (MDRenderShader *rs = dynamic_cast<MDRenderShader *>(shader); rs != nullptr) {
2348
rs->vert->set_label(label);
2349
rs->frag->set_label(label);
2350
} else if (MDComputeShader *cs = dynamic_cast<MDComputeShader *>(shader); cs != nullptr) {
2351
cs->kernel->set_label(label);
2352
} else {
2353
DEV_ASSERT(false);
2354
}
2355
} break;
2356
case OBJECT_TYPE_UNIFORM_SET: {
2357
MDUniformSet *set = (MDUniformSet *)(p_driver_id.id);
2358
set->arg_buffer->setLabel(label);
2359
} break;
2360
case OBJECT_TYPE_PIPELINE: {
2361
// Can't set label after creation.
2362
} break;
2363
default: {
2364
DEV_ASSERT(false);
2365
}
2366
}
2367
}
2368
2369
uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {
2370
switch (p_type) {
2371
case DRIVER_RESOURCE_LOGICAL_DEVICE: {
2372
return (uint64_t)(uintptr_t)device;
2373
}
2374
case DRIVER_RESOURCE_PHYSICAL_DEVICE: {
2375
return 0;
2376
}
2377
case DRIVER_RESOURCE_TOPMOST_OBJECT: {
2378
return 0;
2379
}
2380
case DRIVER_RESOURCE_COMMAND_QUEUE: {
2381
return (uint64_t)(uintptr_t)get_command_queue();
2382
}
2383
case DRIVER_RESOURCE_QUEUE_FAMILY: {
2384
return 0;
2385
}
2386
case DRIVER_RESOURCE_TEXTURE: {
2387
return p_driver_id.id;
2388
}
2389
case DRIVER_RESOURCE_TEXTURE_VIEW: {
2390
return p_driver_id.id;
2391
}
2392
case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: {
2393
return 0;
2394
}
2395
case DRIVER_RESOURCE_SAMPLER: {
2396
return p_driver_id.id;
2397
}
2398
case DRIVER_RESOURCE_UNIFORM_SET: {
2399
return 0;
2400
}
2401
case DRIVER_RESOURCE_BUFFER: {
2402
return p_driver_id.id;
2403
}
2404
case DRIVER_RESOURCE_COMPUTE_PIPELINE: {
2405
MDComputePipeline *pipeline = (MDComputePipeline *)(p_driver_id.id);
2406
return (uint64_t)(uintptr_t)pipeline->state.get();
2407
}
2408
case DRIVER_RESOURCE_RENDER_PIPELINE: {
2409
MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id);
2410
return (uint64_t)(uintptr_t)pipeline->state.get();
2411
}
2412
default: {
2413
return 0;
2414
}
2415
}
2416
}
2417
2418
void RenderingDeviceDriverMetal::_copy_queue_copy_to_buffer(Span<uint8_t> p_src_data, MTL::Buffer *p_dst_buffer, uint64_t p_dst_offset) {
2419
MutexLock lock(copy_queue_mutex);
2420
if (_copy_queue_buffer_available() < p_src_data.size()) {
2421
_copy_queue_flush();
2422
}
2423
2424
MTL::BlitCommandEncoder *blit_encoder = _copy_queue_blit_encoder();
2425
2426
memcpy(_copy_queue_buffer_ptr(), p_src_data.ptr(), p_src_data.size());
2427
2428
copy_queue_rs.get()->addAllocation(p_dst_buffer);
2429
blit_encoder->copyFromBuffer(copy_queue_buffer.get(), copy_queue_buffer_offset, p_dst_buffer, p_dst_offset, p_src_data.size());
2430
2431
_copy_queue_buffer_consume(p_src_data.size());
2432
}
2433
2434
void RenderingDeviceDriverMetal::_copy_queue_flush() {
2435
if (!copy_queue_blit_encoder) {
2436
return;
2437
}
2438
2439
copy_queue_rs.get()->addAllocation(copy_queue_buffer.get());
2440
copy_queue_rs.get()->commit();
2441
2442
copy_queue_blit_encoder.get()->endEncoding();
2443
copy_queue_blit_encoder.reset();
2444
copy_queue_command_buffer.get()->commit();
2445
copy_queue_command_buffer.get()->waitUntilCompleted();
2446
copy_queue_command_buffer.reset();
2447
copy_queue_buffer_offset = 0;
2448
copy_queue_rs.get()->removeAllAllocations();
2449
}
2450
2451
Error RenderingDeviceDriverMetal::_copy_queue_initialize() {
2452
DEV_ASSERT(!copy_queue);
2453
2454
copy_queue = NS::TransferPtr(device->newCommandQueue());
2455
copy_queue.get()->setLabel(MTLSTR("Copy Command Queue"));
2456
ERR_FAIL_COND_V(!copy_queue, ERR_CANT_CREATE);
2457
2458
// Reserve 64 KiB for copy commands. If the buffer fills, it will be flushed automatically.
2459
copy_queue_buffer = NS::TransferPtr(device->newBuffer(64 * 1024, MTL::ResourceStorageModeShared | MTL::ResourceHazardTrackingModeUntracked));
2460
copy_queue_buffer.get()->setLabel(MTLSTR("Copy Command Scratch Buffer"));
2461
2462
if (__builtin_available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 1.0, *)) {
2463
MTL::ResidencySetDescriptor *rs_desc = MTL::ResidencySetDescriptor::alloc()->init();
2464
rs_desc->setInitialCapacity(2);
2465
rs_desc->setLabel(MTLSTR("Copy Queue Residency Set"));
2466
NS::Error *error = nullptr;
2467
copy_queue_rs = NS::TransferPtr(device->newResidencySet(rs_desc, &error));
2468
rs_desc->release();
2469
copy_queue.get()->addResidencySet(copy_queue_rs.get());
2470
}
2471
2472
return OK;
2473
}
2474
2475
uint64_t RenderingDeviceDriverMetal::get_total_memory_used() {
2476
return device->currentAllocatedSize();
2477
}
2478
2479
uint64_t RenderingDeviceDriverMetal::get_lazily_memory_used() {
2480
return 0; // TODO: Track this (grep for memoryless in Godot's Metal backend).
2481
}
2482
2483
uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
2484
MetalDeviceProperties const &props = (*device_properties);
2485
MetalLimits const &limits = props.limits;
2486
uint64_t safe_unbounded = ((uint64_t)1 << 30);
2487
#if defined(DEV_ENABLED)
2488
#define UNKNOWN(NAME) \
2489
case NAME: \
2490
WARN_PRINT_ONCE("Returning maximum value for unknown limit " #NAME "."); \
2491
return safe_unbounded;
2492
#else
2493
#define UNKNOWN(NAME) \
2494
case NAME: \
2495
return safe_unbounded
2496
#endif
2497
2498
// clang-format off
2499
switch (p_limit) {
2500
case LIMIT_MAX_BOUND_UNIFORM_SETS:
2501
return limits.maxBoundDescriptorSets;
2502
case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:
2503
return limits.maxColorAttachments;
2504
case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:
2505
return limits.maxTexturesPerArgumentBuffer;
2506
case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:
2507
return limits.maxSamplersPerArgumentBuffer;
2508
case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:
2509
return limits.maxBuffersPerArgumentBuffer;
2510
case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:
2511
return limits.maxTexturesPerArgumentBuffer;
2512
case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:
2513
return limits.maxBuffersPerArgumentBuffer;
2514
case LIMIT_MAX_DRAW_INDEXED_INDEX:
2515
return limits.maxDrawIndexedIndexValue;
2516
case LIMIT_MAX_FRAMEBUFFER_HEIGHT:
2517
return limits.maxFramebufferHeight;
2518
case LIMIT_MAX_FRAMEBUFFER_WIDTH:
2519
return limits.maxFramebufferWidth;
2520
case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:
2521
return limits.maxImageArrayLayers;
2522
case LIMIT_MAX_TEXTURE_SIZE_1D:
2523
return limits.maxImageDimension1D;
2524
case LIMIT_MAX_TEXTURE_SIZE_2D:
2525
return limits.maxImageDimension2D;
2526
case LIMIT_MAX_TEXTURE_SIZE_3D:
2527
return limits.maxImageDimension3D;
2528
case LIMIT_MAX_TEXTURE_SIZE_CUBE:
2529
return limits.maxImageDimensionCube;
2530
case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:
2531
return limits.maxTexturesPerArgumentBuffer;
2532
case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:
2533
return limits.maxSamplersPerArgumentBuffer;
2534
case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:
2535
return limits.maxBuffersPerArgumentBuffer;
2536
case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:
2537
return limits.maxTexturesPerArgumentBuffer;
2538
case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:
2539
return limits.maxBuffersPerArgumentBuffer;
2540
case LIMIT_MAX_PUSH_CONSTANT_SIZE:
2541
return limits.maxBufferLength;
2542
case LIMIT_MAX_UNIFORM_BUFFER_SIZE:
2543
return limits.maxBufferLength;
2544
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:
2545
return limits.maxVertexDescriptorLayoutStride;
2546
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:
2547
return limits.maxVertexInputAttributes;
2548
case LIMIT_MAX_VERTEX_INPUT_BINDINGS:
2549
return limits.maxVertexInputBindings;
2550
case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:
2551
return limits.maxVertexInputBindingStride;
2552
case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:
2553
return limits.minUniformBufferOffsetAlignment;
2554
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:
2555
return limits.maxComputeWorkGroupCount.width;
2556
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:
2557
return limits.maxComputeWorkGroupCount.height;
2558
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:
2559
return limits.maxComputeWorkGroupCount.depth;
2560
case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:
2561
return std::max({ limits.maxThreadsPerThreadGroup.width, limits.maxThreadsPerThreadGroup.height, limits.maxThreadsPerThreadGroup.depth });
2562
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:
2563
return limits.maxThreadsPerThreadGroup.width;
2564
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:
2565
return limits.maxThreadsPerThreadGroup.height;
2566
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
2567
return limits.maxThreadsPerThreadGroup.depth;
2568
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
2569
return limits.maxThreadGroupMemoryAllocation;
2570
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
2571
return limits.maxViewportDimensionX;
2572
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
2573
return limits.maxViewportDimensionY;
2574
case LIMIT_SUBGROUP_SIZE:
2575
// MoltenVK sets the subgroupSize to the same as the maxSubgroupSize.
2576
return limits.maxSubgroupSize;
2577
case LIMIT_SUBGROUP_MIN_SIZE:
2578
return limits.minSubgroupSize;
2579
case LIMIT_SUBGROUP_MAX_SIZE:
2580
return limits.maxSubgroupSize;
2581
case LIMIT_SUBGROUP_IN_SHADERS:
2582
return (uint64_t)limits.subgroupSupportedShaderStages;
2583
case LIMIT_SUBGROUP_OPERATIONS:
2584
return (uint64_t)limits.subgroupSupportedOperations;
2585
case LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE:
2586
return (uint64_t)((1.0 / limits.temporalScalerInputContentMaxScale) * 1000'000);
2587
case LIMIT_METALFX_TEMPORAL_SCALER_MAX_SCALE:
2588
return (uint64_t)((1.0 / limits.temporalScalerInputContentMinScale) * 1000'000);
2589
case LIMIT_MAX_SHADER_VARYINGS:
2590
return limits.maxShaderVaryings;
2591
default: {
2592
#ifdef DEV_ENABLED
2593
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
2594
#endif
2595
return safe_unbounded;
2596
}
2597
}
2598
// clang-format on
2599
return 0;
2600
}
2601
2602
uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {
2603
switch (p_trait) {
2604
case API_TRAIT_HONORS_PIPELINE_BARRIERS:
2605
return use_barriers;
2606
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
2607
return false;
2608
default:
2609
return RenderingDeviceDriver::api_trait_get(p_trait);
2610
}
2611
}
2612
2613
bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
2614
switch (p_feature) {
2615
case SUPPORTS_HALF_FLOAT:
2616
return true;
2617
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:
2618
return true;
2619
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
2620
return device_properties->features.supports_gpu_address;
2621
case SUPPORTS_METALFX_SPATIAL:
2622
return device_properties->features.metal_fx_spatial;
2623
case SUPPORTS_METALFX_TEMPORAL:
2624
return device_properties->features.metal_fx_temporal;
2625
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
2626
return device_properties->features.supports_native_image_atomics;
2627
case SUPPORTS_VULKAN_MEMORY_MODEL:
2628
return true;
2629
case SUPPORTS_POINT_SIZE:
2630
return true;
2631
default:
2632
return false;
2633
}
2634
}
2635
2636
const RDD::MultiviewCapabilities &RenderingDeviceDriverMetal::get_multiview_capabilities() {
2637
return multiview_capabilities;
2638
}
2639
2640
const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverMetal::get_fragment_shading_rate_capabilities() {
2641
return fsr_capabilities;
2642
}
2643
2644
const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragment_density_map_capabilities() {
2645
return fdm_capabilities;
2646
}
2647
2648
String RenderingDeviceDriverMetal::get_api_version() const {
2649
return vformat("%d.%d", capabilities.version_major, capabilities.version_minor);
2650
}
2651
2652
String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const {
2653
return pipeline_cache_id;
2654
}
2655
2656
const RDD::Capabilities &RenderingDeviceDriverMetal::get_capabilities() const {
2657
return capabilities;
2658
}
2659
2660
bool RenderingDeviceDriverMetal::is_composite_alpha_supported(CommandQueueID p_queue) const {
2661
// The CAMetalLayer.opaque property is configured according to this global setting.
2662
return OS::get_singleton()->is_layered_allowed();
2663
}
2664
2665
size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const {
2666
return device->minimumLinearTextureAlignmentForPixelFormat(pixel_formats->getMTLPixelFormat(p_format));
2667
}
2668
2669
size_t RenderingDeviceDriverMetal::get_texel_buffer_alignment_for_format(MTL::PixelFormat p_format) const {
2670
return device->minimumLinearTextureAlignmentForPixelFormat(p_format);
2671
}
2672
2673
/******************/
2674
2675
RenderingDeviceDriverMetal::RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver) :
2676
context_driver(p_context_driver) {
2677
DEV_ASSERT(p_context_driver != nullptr);
2678
if (String res = OS::get_singleton()->get_environment("GODOT_MTL_ARCHIVE_FAIL_ON_MISS"); res == "1") {
2679
archive_fail_on_miss = true;
2680
}
2681
2682
#if TARGET_OS_OSX
2683
if (String res = OS::get_singleton()->get_environment("GODOT_MTL_SHADER_LOAD_STRATEGY"); res == U"lazy") {
2684
_shader_load_strategy = ShaderLoadStrategy::LAZY;
2685
}
2686
#else
2687
// Always use the lazy strategy on other OSs like iOS, tvOS, or visionOS.
2688
_shader_load_strategy = ShaderLoadStrategy::LAZY;
2689
#endif
2690
}
2691
2692
RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() {
2693
for (KeyValue<SHA256Digest, ShaderCacheEntry *> &kv : _shader_cache) {
2694
memdelete(kv.value);
2695
}
2696
2697
if (shader_container_format != nullptr) {
2698
memdelete(shader_container_format);
2699
}
2700
2701
if (pixel_formats != nullptr) {
2702
memdelete(pixel_formats);
2703
}
2704
2705
if (device_properties != nullptr) {
2706
memdelete(device_properties);
2707
}
2708
}
2709
2710
#pragma mark - Initialization
2711
2712
Error RenderingDeviceDriverMetal::_create_device() {
2713
device = context_driver->get_metal_device();
2714
2715
device_scope = NS::TransferPtr(MTL::CaptureManager::sharedCaptureManager()->newCaptureScope(device));
2716
device_scope->setLabel(MTLSTR("Godot Frame"));
2717
device_scope->beginScope(); // Allow Xcode to capture the first frame, if desired.
2718
2719
return OK;
2720
}
2721
2722
void RenderingDeviceDriverMetal::_track_resource(MTL::Resource *p_resource) {
2723
if (use_barriers) {
2724
_residency_add.push_back(p_resource);
2725
}
2726
}
2727
2728
void RenderingDeviceDriverMetal::_untrack_resource(MTL::Resource *p_resource) {
2729
if (use_barriers) {
2730
_residency_del.push_back(p_resource);
2731
}
2732
}
2733
2734
void RenderingDeviceDriverMetal::_check_capabilities() {
2735
capabilities.device_family = DEVICE_METAL;
2736
parse_msl_version(device_properties->features.msl_target_version, capabilities.version_major, capabilities.version_minor);
2737
}
2738
2739
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))
2740
static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) {
2741
using DP = MetalDeviceProfile;
2742
NS::OperatingSystemVersion os_version = NS::ProcessInfo::processInfo()->operatingSystemVersion();
2743
MetalDeviceProfile res;
2744
res.min_os_version = MinOsVersion(os_version.majorVersion, os_version.minorVersion, os_version.patchVersion);
2745
#if TARGET_OS_OSX
2746
res.platform = DP::Platform::macOS;
2747
#elif TARGET_OS_IPHONE
2748
res.platform = DP::Platform::iOS;
2749
#elif TARGET_OS_VISION
2750
res.platform = DP::Platform::visionOS;
2751
#else
2752
#error "Unsupported Apple platform"
2753
#endif
2754
res.features = {
2755
.msl_version = p_device_properties->features.msl_target_version,
2756
.use_argument_buffers = p_device_properties->features.argument_buffers_enabled(),
2757
.simdPermute = p_device_properties->features.simdPermute,
2758
};
2759
2760
// highestFamily will only be set to an Apple GPU family
2761
switch (p_device_properties->features.highestFamily) {
2762
case MTL::GPUFamilyApple1:
2763
res.gpu = DP::GPU::Apple1;
2764
break;
2765
case MTL::GPUFamilyApple2:
2766
res.gpu = DP::GPU::Apple2;
2767
break;
2768
case MTL::GPUFamilyApple3:
2769
res.gpu = DP::GPU::Apple3;
2770
break;
2771
case MTL::GPUFamilyApple4:
2772
res.gpu = DP::GPU::Apple4;
2773
break;
2774
case MTL::GPUFamilyApple5:
2775
res.gpu = DP::GPU::Apple5;
2776
break;
2777
case MTL::GPUFamilyApple6:
2778
res.gpu = DP::GPU::Apple6;
2779
break;
2780
case MTL::GPUFamilyApple7:
2781
res.gpu = DP::GPU::Apple7;
2782
break;
2783
case MTL::GPUFamilyApple8:
2784
res.gpu = DP::GPU::Apple8;
2785
break;
2786
case MTL::GPUFamilyApple9:
2787
res.gpu = DP::GPU::Apple9;
2788
break;
2789
default: {
2790
// Programming error if the default case is hit.
2791
CRASH_NOW_MSG("Unsupported GPU family");
2792
} break;
2793
}
2794
2795
return res;
2796
}
2797
2798
Error RenderingDeviceDriverMetal::_initialize(uint32_t p_device_index, uint32_t p_frame_count) {
2799
context_device = context_driver->device_get(p_device_index);
2800
Error err = _create_device();
2801
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
2802
2803
device_properties = memnew(MetalDeviceProperties(device));
2804
device_profile = device_profile_from_properties(device_properties);
2805
resource_cache = std::make_unique<MDResourceCache>(device, *pixel_formats, device_properties->limits.maxPerStageBufferCount);
2806
shader_container_format = memnew(RenderingShaderContainerFormatMetal(&device_profile));
2807
2808
_check_capabilities();
2809
2810
err = _copy_queue_initialize();
2811
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
2812
2813
_frame_count = p_frame_count;
2814
2815
// Set the pipeline cache ID based on the Metal version.
2816
pipeline_cache_id = "metal-driver-" + get_api_version();
2817
2818
pixel_formats = memnew(PixelFormats(device, device_properties->features));
2819
if (device_properties->features.layeredRendering) {
2820
multiview_capabilities.is_supported = true;
2821
multiview_capabilities.max_view_count = device_properties->limits.maxViewports;
2822
// NOTE: I'm not sure what the limit is as I don't see it referenced anywhere
2823
multiview_capabilities.max_instance_count = UINT32_MAX;
2824
2825
print_verbose("- Metal multiview supported:");
2826
print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count));
2827
print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count));
2828
} else {
2829
print_verbose("- Metal multiview not supported");
2830
}
2831
2832
// The Metal renderer requires Apple4 family. This is 2017 era A11 chips and newer.
2833
if (device_properties->features.highestFamily < MTL::GPUFamilyApple4) {
2834
String error_string = vformat("Your Apple GPU does not support the following features, which are required to use Metal-based renderers in Godot:\n\n");
2835
if (!device_properties->features.imageCubeArray) {
2836
error_string += "- No support for image cube arrays.\n";
2837
}
2838
2839
#if defined(APPLE_EMBEDDED_ENABLED)
2840
// Apple Embedded platforms exports currently don't exit themselves when this method returns `ERR_CANT_CREATE`.
2841
OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible).");
2842
#else
2843
OS::get_singleton()->alert(error_string + "\nClick OK to exit.");
2844
#endif
2845
2846
return ERR_CANT_CREATE;
2847
}
2848
2849
return OK;
2850
}
2851
2852
const RenderingShaderContainerFormat &RenderingDeviceDriverMetal::get_shader_container_format() const {
2853
return *shader_container_format;
2854
}
2855
2856