Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/drivers/metal/metal_objects_shared.cpp
20919 views
1
/**************************************************************************/
2
/* metal_objects_shared.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#include "metal_objects_shared.h"
32
33
#include "rendering_device_driver_metal.h"
34
35
#include <os/signpost.h>
36
#include <simd/simd.h>
37
#include <string>
38
39
#pragma mark - Resource Factory
40
41
NS::SharedPtr<MTL::Function> MDResourceFactory::new_func(NS::String *p_source, NS::String *p_name, NS::Error **p_error) {
42
NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());
43
NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());
44
NS::Error *err = nullptr;
45
NS::SharedPtr<MTL::Library> mtlLib = NS::TransferPtr(device->newLibrary(p_source, options.get(), &err));
46
if (err) {
47
if (p_error != nullptr) {
48
*p_error = err;
49
}
50
}
51
return NS::TransferPtr(mtlLib->newFunction(p_name));
52
}
53
54
NS::SharedPtr<MTL::Function> MDResourceFactory::new_clear_vert_func(ClearAttKey &p_key) {
55
NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());
56
char msl[1024];
57
snprintf(msl, sizeof(msl), R"(
58
#include <metal_stdlib>
59
using namespace metal;
60
61
typedef struct {
62
float4 a_position [[attribute(0)]];
63
} AttributesPos;
64
65
typedef struct {
66
float4 colors[9];
67
} ClearColorsIn;
68
69
typedef struct {
70
float4 v_position [[position]];
71
uint layer%s;
72
} VaryingsPos;
73
74
vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {
75
VaryingsPos varyings;
76
varyings.v_position = float4(attributes.a_position.x, -attributes.a_position.y, ccIn.colors[%d].r, 1.0);
77
varyings.layer = uint(attributes.a_position.w);
78
return varyings;
79
}
80
)",
81
p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX);
82
83
return new_func(NS::String::string(msl, NS::UTF8StringEncoding), MTLSTR("vertClear"), nullptr);
84
}
85
86
NS::SharedPtr<MTL::Function> MDResourceFactory::new_clear_frag_func(ClearAttKey &p_key) {
87
NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());
88
std::string msl;
89
msl.reserve(2048);
90
91
msl += R"(
92
#include <metal_stdlib>
93
using namespace metal;
94
95
typedef struct {
96
float4 v_position [[position]];
97
} VaryingsPos;
98
99
typedef struct {
100
float4 colors[9];
101
} ClearColorsIn;
102
103
typedef struct {
104
)";
105
106
char line[128];
107
for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
108
if (p_key.is_enabled(caIdx)) {
109
const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]);
110
snprintf(line, sizeof(line), " %s4 color%u [[color(%u)]];\n", typeStr, caIdx, caIdx);
111
msl += line;
112
}
113
}
114
msl += R"(} ClearColorsOut;
115
116
fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {
117
118
ClearColorsOut ccOut;
119
)";
120
for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
121
if (p_key.is_enabled(caIdx)) {
122
const char *typeStr = get_format_type_string((MTL::PixelFormat)p_key.pixel_formats[caIdx]);
123
snprintf(line, sizeof(line), " ccOut.color%u = %s4(ccIn.colors[%u]);\n", caIdx, typeStr, caIdx);
124
msl += line;
125
}
126
}
127
msl += R"( return ccOut;
128
})";
129
130
return new_func(NS::String::string(msl.c_str(), NS::UTF8StringEncoding), MTLSTR("fragClear"), nullptr);
131
}
132
133
const char *MDResourceFactory::get_format_type_string(MTL::PixelFormat p_fmt) const {
134
switch (pixel_formats.getFormatType(p_fmt)) {
135
case MTLFormatType::ColorInt8:
136
case MTLFormatType::ColorInt16:
137
return "short";
138
case MTLFormatType::ColorUInt8:
139
case MTLFormatType::ColorUInt16:
140
return "ushort";
141
case MTLFormatType::ColorInt32:
142
return "int";
143
case MTLFormatType::ColorUInt32:
144
return "uint";
145
case MTLFormatType::ColorHalf:
146
return "half";
147
case MTLFormatType::ColorFloat:
148
case MTLFormatType::DepthStencil:
149
case MTLFormatType::Compressed:
150
return "float";
151
case MTLFormatType::None:
152
default:
153
return "unexpected_MTLPixelFormatInvalid";
154
}
155
}
156
157
NS::SharedPtr<MTL::DepthStencilState> MDResourceFactory::new_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {
158
NS::SharedPtr<MTL::DepthStencilDescriptor> dsDesc = NS::TransferPtr(MTL::DepthStencilDescriptor::alloc()->init());
159
dsDesc->setDepthCompareFunction(MTL::CompareFunctionAlways);
160
dsDesc->setDepthWriteEnabled(p_use_depth);
161
162
if (p_use_stencil) {
163
NS::SharedPtr<MTL::StencilDescriptor> sDesc = NS::TransferPtr(MTL::StencilDescriptor::alloc()->init());
164
sDesc->setStencilCompareFunction(MTL::CompareFunctionAlways);
165
sDesc->setStencilFailureOperation(MTL::StencilOperationReplace);
166
sDesc->setDepthFailureOperation(MTL::StencilOperationReplace);
167
sDesc->setDepthStencilPassOperation(MTL::StencilOperationReplace);
168
169
dsDesc->setFrontFaceStencil(sDesc.get());
170
dsDesc->setBackFaceStencil(sDesc.get());
171
} else {
172
dsDesc->setFrontFaceStencil(nullptr);
173
dsDesc->setBackFaceStencil(nullptr);
174
}
175
176
return NS::TransferPtr(device->newDepthStencilState(dsDesc.get()));
177
}
178
179
NS::SharedPtr<MTL::RenderPipelineState> MDResourceFactory::new_clear_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {
180
NS::SharedPtr<MTL::Function> vtxFunc = new_clear_vert_func(p_key);
181
NS::SharedPtr<MTL::Function> fragFunc = new_clear_frag_func(p_key);
182
NS::SharedPtr<MTL::RenderPipelineDescriptor> plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());
183
plDesc->setLabel(MTLSTR("ClearRenderAttachments"));
184
plDesc->setVertexFunction(vtxFunc.get());
185
plDesc->setFragmentFunction(fragFunc.get());
186
plDesc->setRasterSampleCount(p_key.sample_count);
187
plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);
188
189
for (uint32_t caIdx = 0; caIdx < ClearAttKey::COLOR_COUNT; caIdx++) {
190
MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(caIdx);
191
colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[caIdx]);
192
colorDesc->setWriteMask(p_key.is_enabled(caIdx) ? MTL::ColorWriteMaskAll : MTL::ColorWriteMaskNone);
193
}
194
195
MTL::PixelFormat mtlDepthFormat = (MTL::PixelFormat)p_key.depth_format();
196
if (pixel_formats.isDepthFormat(mtlDepthFormat)) {
197
plDesc->setDepthAttachmentPixelFormat(mtlDepthFormat);
198
}
199
200
MTL::PixelFormat mtlStencilFormat = (MTL::PixelFormat)p_key.stencil_format();
201
if (pixel_formats.isStencilFormat(mtlStencilFormat)) {
202
plDesc->setStencilAttachmentPixelFormat(mtlStencilFormat);
203
}
204
205
MTL::VertexDescriptor *vtxDesc = plDesc->vertexDescriptor();
206
207
// Vertex attribute descriptors.
208
NS::UInteger vtxBuffIdx = get_vertex_buffer_index(VERT_CONTENT_BUFFER_INDEX);
209
NS::UInteger vtxStride = 0;
210
211
// Vertex location.
212
MTL::VertexAttributeDescriptor *vaDesc = vtxDesc->attributes()->object(0);
213
vaDesc->setFormat(MTL::VertexFormatFloat4);
214
vaDesc->setBufferIndex(vtxBuffIdx);
215
vaDesc->setOffset(vtxStride);
216
vtxStride += sizeof(simd::float4);
217
218
// Vertex attribute buffer.
219
MTL::VertexBufferLayoutDescriptor *vbDesc = vtxDesc->layouts()->object(vtxBuffIdx);
220
vbDesc->setStepFunction(MTL::VertexStepFunctionPerVertex);
221
vbDesc->setStepRate(1);
222
vbDesc->setStride(vtxStride);
223
224
NS::Error *err = nullptr;
225
NS::SharedPtr<MTL::RenderPipelineState> state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err));
226
if (p_error != nullptr) {
227
*p_error = err;
228
}
229
return state;
230
}
231
232
NS::SharedPtr<MTL::RenderPipelineState> MDResourceFactory::new_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {
233
DEV_ASSERT(!p_key.is_layered_rendering_enabled());
234
DEV_ASSERT(p_key.is_enabled(0));
235
DEV_ASSERT(!p_key.is_depth_enabled());
236
DEV_ASSERT(!p_key.is_stencil_enabled());
237
238
NS::SharedPtr<NS::AutoreleasePool> pool = NS::TransferPtr(NS::AutoreleasePool::alloc()->init());
239
static const char *msl = R"(#include <metal_stdlib>
240
using namespace metal;
241
242
struct FullscreenNoopOut {
243
float4 position [[position]];
244
};
245
246
vertex FullscreenNoopOut fullscreenNoopVert(uint vid [[vertex_id]]) {
247
float2 positions[3] = { float2(-1.0, -1.0), float2(3.0, -1.0), float2(-1.0, 3.0) };
248
float2 pos = positions[vid];
249
250
FullscreenNoopOut out;
251
out.position = float4(pos, 0.0, 1.0);
252
return out;
253
}
254
255
fragment void fullscreenNoopFrag(float4 gl_FragCoord [[position]]) {
256
}
257
)";
258
259
NS::Error *err = nullptr;
260
NS::SharedPtr<MTL::CompileOptions> options = NS::TransferPtr(MTL::CompileOptions::alloc()->init());
261
NS::SharedPtr<MTL::Library> mtlLib = NS::TransferPtr(device->newLibrary(NS::String::string(msl, NS::UTF8StringEncoding), options.get(), &err));
262
if (err && p_error != nullptr) {
263
*p_error = err;
264
}
265
266
if (mtlLib.get() == nullptr) {
267
return {};
268
}
269
270
NS::SharedPtr<MTL::Function> vtxFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopVert")));
271
NS::SharedPtr<MTL::Function> fragFunc = NS::TransferPtr(mtlLib->newFunction(MTLSTR("fullscreenNoopFrag")));
272
273
NS::SharedPtr<MTL::RenderPipelineDescriptor> plDesc = NS::TransferPtr(MTL::RenderPipelineDescriptor::alloc()->init());
274
plDesc->setLabel(MTLSTR("EmptyDrawFullscreenTriangle"));
275
plDesc->setVertexFunction(vtxFunc.get());
276
plDesc->setFragmentFunction(fragFunc.get());
277
plDesc->setRasterSampleCount(p_key.sample_count ? p_key.sample_count : 1);
278
plDesc->setInputPrimitiveTopology(MTL::PrimitiveTopologyClassTriangle);
279
280
MTL::RenderPipelineColorAttachmentDescriptor *colorDesc = plDesc->colorAttachments()->object(0);
281
colorDesc->setPixelFormat((MTL::PixelFormat)p_key.pixel_formats[0]);
282
colorDesc->setWriteMask(MTL::ColorWriteMaskNone);
283
284
err = nullptr;
285
NS::SharedPtr<MTL::RenderPipelineState> state = NS::TransferPtr(device->newRenderPipelineState(plDesc.get(), &err));
286
if (p_error != nullptr && err != nullptr) {
287
*p_error = err;
288
}
289
return state;
290
}
291
292
#pragma mark - Resource Cache
293
294
MTL::RenderPipelineState *MDResourceCache::get_clear_render_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {
295
HashMap::ConstIterator it = clear_states.find(p_key);
296
if (it != clear_states.end()) {
297
return it->value.get();
298
}
299
300
NS::SharedPtr<MTL::RenderPipelineState> state = resource_factory->new_clear_pipeline_state(p_key, p_error);
301
MTL::RenderPipelineState *result = state.get();
302
clear_states[p_key] = std::move(state);
303
return result;
304
}
305
306
MTL::RenderPipelineState *MDResourceCache::get_empty_draw_pipeline_state(ClearAttKey &p_key, NS::Error **p_error) {
307
HashMap::ConstIterator it = empty_draw_states.find(p_key);
308
if (it != empty_draw_states.end()) {
309
return it->value.get();
310
}
311
312
NS::SharedPtr<MTL::RenderPipelineState> state = resource_factory->new_empty_draw_pipeline_state(p_key, p_error);
313
MTL::RenderPipelineState *result = state.get();
314
empty_draw_states[p_key] = std::move(state);
315
return result;
316
}
317
318
MTL::DepthStencilState *MDResourceCache::get_depth_stencil_state(bool p_use_depth, bool p_use_stencil) {
319
if (p_use_depth && p_use_stencil) {
320
if (!clear_depth_stencil_state.all) {
321
clear_depth_stencil_state.all = resource_factory->new_depth_stencil_state(true, true);
322
}
323
return clear_depth_stencil_state.all.get();
324
} else if (p_use_depth) {
325
if (!clear_depth_stencil_state.depth_only) {
326
clear_depth_stencil_state.depth_only = resource_factory->new_depth_stencil_state(true, false);
327
}
328
return clear_depth_stencil_state.depth_only.get();
329
} else if (p_use_stencil) {
330
if (!clear_depth_stencil_state.stencil_only) {
331
clear_depth_stencil_state.stencil_only = resource_factory->new_depth_stencil_state(false, true);
332
}
333
return clear_depth_stencil_state.stencil_only.get();
334
} else {
335
if (!clear_depth_stencil_state.none) {
336
clear_depth_stencil_state.none = resource_factory->new_depth_stencil_state(false, false);
337
}
338
return clear_depth_stencil_state.none.get();
339
}
340
}
341
342
#pragma mark - Render Pass Types
343
344
MTLFmtCaps MDSubpass::getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const {
345
MTLFmtCaps caps = kMTLFmtCapsNone;
346
347
for (RDD::AttachmentReference const &ar : input_references) {
348
if (ar.attachment == p_index) {
349
flags::set(caps, kMTLFmtCapsRead);
350
break;
351
}
352
}
353
354
for (RDD::AttachmentReference const &ar : color_references) {
355
if (ar.attachment == p_index) {
356
flags::set(caps, kMTLFmtCapsColorAtt);
357
break;
358
}
359
}
360
361
for (RDD::AttachmentReference const &ar : resolve_references) {
362
if (ar.attachment == p_index) {
363
flags::set(caps, kMTLFmtCapsResolve);
364
break;
365
}
366
}
367
368
if (depth_stencil_reference.attachment == p_index) {
369
flags::set(caps, kMTLFmtCapsDSAtt);
370
}
371
372
return caps;
373
}
374
375
void MDAttachment::linkToSubpass(const MDRenderPass &p_pass) {
376
firstUseSubpassIndex = UINT32_MAX;
377
lastUseSubpassIndex = 0;
378
379
for (MDSubpass const &subpass : p_pass.subpasses) {
380
MTLFmtCaps reqCaps = subpass.getRequiredFmtCapsForAttachmentAt(index);
381
if (reqCaps) {
382
firstUseSubpassIndex = MIN(subpass.subpass_index, firstUseSubpassIndex);
383
lastUseSubpassIndex = MAX(subpass.subpass_index, lastUseSubpassIndex);
384
}
385
}
386
}
387
388
MTL::StoreAction MDAttachment::getMTLStoreAction(MDSubpass const &p_subpass,
389
bool p_is_rendering_entire_area,
390
bool p_has_resolve,
391
bool p_can_resolve,
392
bool p_is_stencil) const {
393
if (!p_is_rendering_entire_area || !isLastUseOf(p_subpass)) {
394
return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore;
395
}
396
397
switch (p_is_stencil ? stencilStoreAction : storeAction) {
398
case MTL::StoreActionStore:
399
return p_has_resolve && p_can_resolve ? MTL::StoreActionStoreAndMultisampleResolve : MTL::StoreActionStore;
400
case MTL::StoreActionDontCare:
401
return p_has_resolve ? (p_can_resolve ? MTL::StoreActionMultisampleResolve : MTL::StoreActionStore) : MTL::StoreActionDontCare;
402
403
default:
404
return MTL::StoreActionStore;
405
}
406
}
407
408
bool MDAttachment::shouldClear(const MDSubpass &p_subpass, bool p_is_stencil) const {
409
// If the subpass is not the first subpass to use this attachment, don't clear this attachment.
410
if (p_subpass.subpass_index != firstUseSubpassIndex) {
411
return false;
412
}
413
return (p_is_stencil ? stencilLoadAction : loadAction) == MTL::LoadActionClear;
414
}
415
416
MDRenderPass::MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses) :
417
attachments(p_attachments), subpasses(p_subpasses) {
418
for (MDAttachment &att : attachments) {
419
att.linkToSubpass(*this);
420
}
421
}
422
423
#pragma mark - Command Buffer Base
424
425
void MDCommandBufferBase::retain_resource(CFTypeRef p_resource) {
426
CFRetain(p_resource);
427
_retained_resources.push_back(p_resource);
428
}
429
430
void MDCommandBufferBase::release_resources() {
431
for (CFTypeRef r : _retained_resources) {
432
CFRelease(r);
433
}
434
_retained_resources.clear();
435
}
436
437
void MDCommandBufferBase::render_set_viewport(VectorView<Rect2i> p_viewports) {
438
RenderStateBase &state = get_render_state_base();
439
state.viewports.resize(p_viewports.size());
440
for (uint32_t i = 0; i < p_viewports.size(); i += 1) {
441
Rect2i const &vp = p_viewports[i];
442
state.viewports[i] = {
443
.originX = static_cast<double>(vp.position.x),
444
.originY = static_cast<double>(vp.position.y),
445
.width = static_cast<double>(vp.size.width),
446
.height = static_cast<double>(vp.size.height),
447
.znear = 0.0,
448
.zfar = 1.0,
449
};
450
}
451
state.dirty.set_flag(RenderStateBase::DIRTY_VIEWPORT);
452
}
453
454
void MDCommandBufferBase::render_set_scissor(VectorView<Rect2i> p_scissors) {
455
RenderStateBase &state = get_render_state_base();
456
state.scissors.resize(p_scissors.size());
457
for (uint32_t i = 0; i < p_scissors.size(); i += 1) {
458
Rect2i const &vp = p_scissors[i];
459
state.scissors[i] = {
460
.x = static_cast<NS::UInteger>(vp.position.x),
461
.y = static_cast<NS::UInteger>(vp.position.y),
462
.width = static_cast<NS::UInteger>(vp.size.width),
463
.height = static_cast<NS::UInteger>(vp.size.height),
464
};
465
}
466
state.dirty.set_flag(RenderStateBase::DIRTY_SCISSOR);
467
}
468
469
void MDCommandBufferBase::render_set_blend_constants(const Color &p_constants) {
470
DEV_ASSERT(type == MDCommandBufferStateType::Render);
471
RenderStateBase &state = get_render_state_base();
472
if (state.blend_constants != p_constants) {
473
state.blend_constants = p_constants;
474
state.dirty.set_flag(RenderStateBase::DIRTY_BLEND);
475
}
476
}
477
478
void MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects) {
479
uint32_t idx = 0;
480
for (uint32_t i = 0; i < p_rects.size(); i++) {
481
Rect2i const &rect = p_rects[i];
482
idx = _populate_vertices(p_vertices, idx, rect, p_fb_size);
483
}
484
}
485
486
uint32_t MDCommandBufferBase::_populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size) {
487
// Determine the positions of the four edges of the
488
// clear rectangle as a fraction of the attachment size.
489
float leftPos = (float)(p_rect.position.x) / (float)p_fb_size.width;
490
float rightPos = (float)(p_rect.size.width) / (float)p_fb_size.width + leftPos;
491
float bottomPos = (float)(p_rect.position.y) / (float)p_fb_size.height;
492
float topPos = (float)(p_rect.size.height) / (float)p_fb_size.height + bottomPos;
493
494
// Transform to clip-space coordinates, which are bounded by (-1.0 < p < 1.0) in clip-space.
495
leftPos = (leftPos * 2.0f) - 1.0f;
496
rightPos = (rightPos * 2.0f) - 1.0f;
497
bottomPos = (bottomPos * 2.0f) - 1.0f;
498
topPos = (topPos * 2.0f) - 1.0f;
499
500
simd::float4 vtx;
501
502
uint32_t idx = p_index;
503
uint32_t endLayer = get_current_view_count();
504
505
for (uint32_t layer = 0; layer < endLayer; layer++) {
506
vtx.z = 0.0;
507
vtx.w = (float)layer;
508
509
// Top left vertex - First triangle.
510
vtx.y = topPos;
511
vtx.x = leftPos;
512
p_vertices[idx++] = vtx;
513
514
// Bottom left vertex.
515
vtx.y = bottomPos;
516
vtx.x = leftPos;
517
p_vertices[idx++] = vtx;
518
519
// Bottom right vertex.
520
vtx.y = bottomPos;
521
vtx.x = rightPos;
522
p_vertices[idx++] = vtx;
523
524
// Bottom right vertex - Second triangle.
525
p_vertices[idx++] = vtx;
526
527
// Top right vertex.
528
vtx.y = topPos;
529
vtx.x = rightPos;
530
p_vertices[idx++] = vtx;
531
532
// Top left vertex.
533
vtx.y = topPos;
534
vtx.x = leftPos;
535
p_vertices[idx++] = vtx;
536
}
537
538
return idx;
539
}
540
541
void MDCommandBufferBase::_end_render_pass() {
542
MDFrameBuffer const &fb_info = *get_frame_buffer();
543
MDSubpass const &subpass = get_current_subpass();
544
545
PixelFormats &pf = device_driver->get_pixel_formats();
546
547
for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {
548
uint32_t color_index = subpass.color_references[i].attachment;
549
uint32_t resolve_index = subpass.resolve_references[i].attachment;
550
DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED));
551
if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) {
552
continue;
553
}
554
555
MTL::Texture *resolve_tex = fb_info.get_texture(resolve_index);
556
557
CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex->pixelFormat()), kMTLFmtCapsResolve), "not implemented: unresolvable texture types");
558
// see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407
559
}
560
561
end_render_encoding();
562
}
563
564
void MDCommandBufferBase::_render_clear_render_area() {
565
MDRenderPass const &pass = *get_render_pass();
566
MDSubpass const &subpass = get_current_subpass();
567
LocalVector<RDD::RenderPassClearValue> &clear_values = get_clear_values();
568
569
uint32_t ds_index = subpass.depth_stencil_reference.attachment;
570
bool clear_depth = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, false));
571
bool clear_stencil = (ds_index != RDD::AttachmentReference::UNUSED && pass.attachments[ds_index].shouldClear(subpass, true));
572
573
uint32_t color_count = subpass.color_references.size();
574
uint32_t clears_size = color_count + (clear_depth || clear_stencil ? 1 : 0);
575
if (clears_size == 0) {
576
return;
577
}
578
579
RDD::AttachmentClear *clears = ALLOCA_ARRAY(RDD::AttachmentClear, clears_size);
580
uint32_t clears_count = 0;
581
582
for (uint32_t i = 0; i < color_count; i++) {
583
uint32_t idx = subpass.color_references[i].attachment;
584
if (idx != RDD::AttachmentReference::UNUSED && pass.attachments[idx].shouldClear(subpass, false)) {
585
clears[clears_count++] = { .aspect = RDD::TEXTURE_ASPECT_COLOR_BIT, .color_attachment = idx, .value = clear_values[idx] };
586
}
587
}
588
589
if (clear_depth || clear_stencil) {
590
MDAttachment const &attachment = pass.attachments[ds_index];
591
BitField<RDD::TextureAspectBits> bits = {};
592
if (clear_depth && attachment.type & MDAttachmentType::Depth) {
593
bits.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT);
594
}
595
if (clear_stencil && attachment.type & MDAttachmentType::Stencil) {
596
bits.set_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT);
597
}
598
599
clears[clears_count++] = { .aspect = bits, .color_attachment = ds_index, .value = clear_values[ds_index] };
600
}
601
602
if (clears_count == 0) {
603
return;
604
}
605
606
render_clear_attachments(VectorView(clears, clears_count), { get_render_area() });
607
}
608
609
void MDCommandBufferBase::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {
610
switch (type) {
611
case MDCommandBufferStateType::Render:
612
case MDCommandBufferStateType::Compute: {
613
MDShader *shader = (MDShader *)(p_shader.id);
614
if (shader->push_constants.binding == UINT32_MAX) {
615
return;
616
}
617
push_constant_binding = shader->push_constants.binding;
618
void const *ptr = p_data.ptr();
619
push_constant_data_len = p_data.size() * sizeof(uint32_t);
620
DEV_ASSERT(push_constant_data_len <= sizeof(push_constant_data));
621
memcpy(push_constant_data, ptr, push_constant_data_len);
622
if (push_constant_data_len > 0) {
623
mark_push_constants_dirty();
624
}
625
} break;
626
case MDCommandBufferStateType::Blit:
627
case MDCommandBufferStateType::None:
628
return;
629
}
630
}
631
632
#pragma mark - Metal Library
633
634
static const char *SHADER_STAGE_NAMES[] = {
635
[RD::SHADER_STAGE_VERTEX] = "vert",
636
[RD::SHADER_STAGE_FRAGMENT] = "frag",
637
[RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl",
638
[RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval",
639
[RD::SHADER_STAGE_COMPUTE] = "comp",
640
};
641
642
void ShaderCacheEntry::notify_free() const {
643
owner.shader_cache_free_entry(key);
644
}
645
646
#pragma mark - MDLibrary
647
648
MDLibrary::MDLibrary(ShaderCacheEntry *p_entry
649
#ifdef DEV_ENABLED
650
,
651
NS::String *p_source
652
#endif
653
) :
654
_entry(p_entry) {
655
#ifdef DEV_ENABLED
656
_original_source = NS::RetainPtr(p_source);
657
#endif
658
}
659
660
MDLibrary::~MDLibrary() {
661
_entry->notify_free();
662
}
663
664
void MDLibrary::set_label(NS::String *p_label) {
665
}
666
667
#pragma mark - MDLazyLibrary
668
669
/// Loads the MTLLibrary when the library is first accessed.
670
class MDLazyLibrary final : public MDLibrary {
671
NS::SharedPtr<MTL::Library> _library;
672
NS::Error *_error = nullptr;
673
std::shared_mutex _mu;
674
bool _loaded = false;
675
MTL::Device *_device = nullptr;
676
NS::SharedPtr<NS::String> _source;
677
NS::SharedPtr<MTL::CompileOptions> _options;
678
679
void _load();
680
681
public:
682
MDLazyLibrary(ShaderCacheEntry *p_entry,
683
MTL::Device *p_device,
684
NS::String *p_source,
685
MTL::CompileOptions *p_options);
686
687
MTL::Library *get_library() override;
688
NS::Error *get_error() override;
689
};
690
691
MDLazyLibrary::MDLazyLibrary(ShaderCacheEntry *p_entry,
692
MTL::Device *p_device,
693
NS::String *p_source,
694
MTL::CompileOptions *p_options) :
695
MDLibrary(p_entry
696
#ifdef DEV_ENABLED
697
,
698
p_source
699
#endif
700
),
701
_device(p_device),
702
_source(NS::RetainPtr(p_source)),
703
_options(NS::RetainPtr(p_options)) {
704
}
705
706
void MDLazyLibrary::_load() {
707
{
708
std::shared_lock<std::shared_mutex> lock(_mu);
709
if (_loaded) {
710
return;
711
}
712
}
713
714
std::unique_lock<std::shared_mutex> lock(_mu);
715
if (_loaded) {
716
return;
717
}
718
719
os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this;
720
os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",
721
"shader_name=%{public}s stage=%{public}s hash=%X",
722
_entry->name.get_data(), SHADER_STAGE_NAMES[_entry->stage], _entry->key.short_sha());
723
NS::Error *error = nullptr;
724
_library = NS::TransferPtr(_device->newLibrary(_source.get(), _options.get(), &error));
725
os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");
726
_error = error;
727
_device = nullptr;
728
_source.reset();
729
_options.reset();
730
_loaded = true;
731
}
732
733
MTL::Library *MDLazyLibrary::get_library() {
734
_load();
735
return _library.get();
736
}
737
738
NS::Error *MDLazyLibrary::get_error() {
739
_load();
740
return _error;
741
}
742
743
#pragma mark - MDImmediateLibrary
744
745
/// Loads the MTLLibrary immediately on initialization, using Metal's async compilation API.
746
class MDImmediateLibrary final : public MDLibrary {
747
NS::SharedPtr<MTL::Library> _library;
748
NS::Error *_error = nullptr;
749
std::mutex _cv_mutex;
750
std::condition_variable _cv;
751
std::atomic<bool> _complete{ false };
752
bool _ready = false;
753
754
public:
755
MDImmediateLibrary(ShaderCacheEntry *p_entry,
756
MTL::Device *p_device,
757
NS::String *p_source,
758
MTL::CompileOptions *p_options);
759
760
MTL::Library *get_library() override;
761
NS::Error *get_error() override;
762
};
763
764
MDImmediateLibrary::MDImmediateLibrary(ShaderCacheEntry *p_entry,
765
MTL::Device *p_device,
766
NS::String *p_source,
767
MTL::CompileOptions *p_options) :
768
MDLibrary(p_entry
769
#ifdef DEV_ENABLED
770
,
771
p_source
772
#endif
773
) {
774
os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)this;
775
os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile",
776
"shader_name=%{public}s stage=%{public}s hash=%X",
777
p_entry->name.get_data(), SHADER_STAGE_NAMES[p_entry->stage], p_entry->key.short_sha());
778
779
// Use Metal's async compilation API with std::function callback.
780
p_device->newLibrary(p_source, p_options, [this, compile_id, p_entry](MTL::Library *library, NS::Error *error) {
781
os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile");
782
if (library) {
783
_library = NS::RetainPtr(library);
784
}
785
_error = error;
786
if (error) {
787
ERR_PRINT(vformat(U"Error compiling shader %s: %s", p_entry->name.get_data(), error->localizedDescription()->utf8String()));
788
}
789
790
{
791
std::lock_guard<std::mutex> lock(_cv_mutex);
792
_ready = true;
793
}
794
_cv.notify_all();
795
_complete = true;
796
});
797
}
798
799
MTL::Library *MDImmediateLibrary::get_library() {
800
if (!_complete) {
801
std::unique_lock<std::mutex> lock(_cv_mutex);
802
_cv.wait(lock, [this] { return _ready; });
803
}
804
return _library.get();
805
}
806
807
NS::Error *MDImmediateLibrary::get_error() {
808
if (!_complete) {
809
std::unique_lock<std::mutex> lock(_cv_mutex);
810
_cv.wait(lock, [this] { return _ready; });
811
}
812
return _error;
813
}
814
815
#pragma mark - MDBinaryLibrary
816
817
/// Loads the MTLLibrary from pre-compiled binary data.
818
class MDBinaryLibrary final : public MDLibrary {
819
NS::SharedPtr<MTL::Library> _library;
820
NS::Error *_error = nullptr;
821
822
public:
823
MDBinaryLibrary(ShaderCacheEntry *p_entry,
824
MTL::Device *p_device,
825
#ifdef DEV_ENABLED
826
NS::String *p_source,
827
#endif
828
dispatch_data_t p_data);
829
830
MTL::Library *get_library() override;
831
NS::Error *get_error() override;
832
};
833
834
MDBinaryLibrary::MDBinaryLibrary(ShaderCacheEntry *p_entry,
835
MTL::Device *p_device,
836
#ifdef DEV_ENABLED
837
NS::String *p_source,
838
#endif
839
dispatch_data_t p_data) :
840
MDLibrary(p_entry
841
#ifdef DEV_ENABLED
842
,
843
p_source
844
#endif
845
) {
846
NS::Error *error = nullptr;
847
_library = NS::TransferPtr(p_device->newLibrary(p_data, &error));
848
if (error != nullptr) {
849
_error = error;
850
ERR_PRINT(vformat("Unable to load shader library: %s", error->localizedDescription()->utf8String()));
851
}
852
}
853
854
MTL::Library *MDBinaryLibrary::get_library() {
855
return _library.get();
856
}
857
858
NS::Error *MDBinaryLibrary::get_error() {
859
return _error;
860
}
861
862
#pragma mark - MDLibrary Factory Methods
863
864
std::shared_ptr<MDLibrary> MDLibrary::create(ShaderCacheEntry *p_entry,
865
MTL::Device *p_device,
866
NS::String *p_source,
867
MTL::CompileOptions *p_options,
868
ShaderLoadStrategy p_strategy) {
869
std::shared_ptr<MDLibrary> lib;
870
switch (p_strategy) {
871
case ShaderLoadStrategy::IMMEDIATE:
872
[[fallthrough]];
873
default:
874
lib = std::make_shared<MDImmediateLibrary>(p_entry, p_device, p_source, p_options);
875
break;
876
case ShaderLoadStrategy::LAZY:
877
lib = std::make_shared<MDLazyLibrary>(p_entry, p_device, p_source, p_options);
878
break;
879
}
880
p_entry->library = lib;
881
return lib;
882
}
883
884
std::shared_ptr<MDLibrary> MDLibrary::create(ShaderCacheEntry *p_entry,
885
MTL::Device *p_device,
886
#ifdef DEV_ENABLED
887
NS::String *p_source,
888
#endif
889
dispatch_data_t p_data) {
890
std::shared_ptr<MDLibrary> lib = std::make_shared<MDBinaryLibrary>(p_entry, p_device,
891
#ifdef DEV_ENABLED
892
p_source,
893
#endif
894
p_data);
895
p_entry->library = lib;
896
return lib;
897
}
898
899