Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/drivers/metal/metal3_objects.cpp
20919 views
1
/**************************************************************************/
2
/* metal3_objects.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
/**************************************************************************/
32
/* */
33
/* Portions of this code were derived from MoltenVK. */
34
/* */
35
/* Copyright (c) 2015-2023 The Brenwill Workshop Ltd. */
36
/* (http://www.brenwill.com) */
37
/* */
38
/* Licensed under the Apache License, Version 2.0 (the "License"); */
39
/* you may not use this file except in compliance with the License. */
40
/* You may obtain a copy of the License at */
41
/* */
42
/* http://www.apache.org/licenses/LICENSE-2.0 */
43
/* */
44
/* Unless required by applicable law or agreed to in writing, software */
45
/* distributed under the License is distributed on an "AS IS" BASIS, */
46
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */
47
/* implied. See the License for the specific language governing */
48
/* permissions and limitations under the License. */
49
/**************************************************************************/
50
51
#include "metal3_objects.h"
52
53
#include "metal_utils.h"
54
#include "pixel_formats.h"
55
#include "rendering_device_driver_metal3.h"
56
#include "rendering_shader_container_metal.h"
57
58
#include <algorithm>
59
60
using namespace MTL3;
61
62
MDCommandBuffer::MDCommandBuffer(MTL::CommandQueue *p_queue, ::RenderingDeviceDriverMetal *p_device_driver) :
63
_scratch(p_queue->device()), queue(p_queue) {
64
device_driver = p_device_driver;
65
type = MDCommandBufferStateType::None;
66
use_barriers = device_driver->use_barriers;
67
if (use_barriers) {
68
// Already validated availability if use_barriers is true.
69
MTL::Device *device = p_queue->device();
70
NS::SharedPtr<MTL::ResidencySetDescriptor> rs_desc = NS::TransferPtr(MTL::ResidencySetDescriptor::alloc()->init());
71
rs_desc->setInitialCapacity(10);
72
rs_desc->setLabel(MTLSTR("Command Residency Set"));
73
NS::Error *error = nullptr;
74
_frame_state.rs = NS::TransferPtr(device->newResidencySet(rs_desc.get(), &error));
75
CRASH_COND_MSG(error != nullptr, vformat("Failed to create residency set: %s", String(error->localizedDescription()->utf8String())));
76
}
77
}
78
79
void MDCommandBuffer::begin_label(const char *p_label_name, const Color &p_color) {
80
NS::SharedPtr<NS::String> s = NS::TransferPtr(NS::String::alloc()->init(p_label_name, NS::UTF8StringEncoding));
81
command_buffer()->pushDebugGroup(s.get());
82
}
83
84
void MDCommandBuffer::end_label() {
85
command_buffer()->popDebugGroup();
86
}
87
88
void MDCommandBuffer::begin() {
89
DEV_ASSERT(commandBuffer.get() == nullptr && !state_begin);
90
state_begin = true;
91
bzero(pending_after_stages, sizeof(pending_after_stages));
92
bzero(pending_before_queue_stages, sizeof(pending_before_queue_stages));
93
binding_cache.clear();
94
_scratch.reset();
95
release_resources();
96
}
97
98
MDCommandBuffer::Alloc MDCommandBuffer::allocate_arg_buffer(uint32_t p_size) {
99
return _scratch.allocate(p_size);
100
}
101
102
void MDCommandBuffer::end() {
103
switch (type) {
104
case MDCommandBufferStateType::None:
105
return;
106
case MDCommandBufferStateType::Render:
107
return render_end_pass();
108
case MDCommandBufferStateType::Compute:
109
return _end_compute_dispatch();
110
case MDCommandBufferStateType::Blit:
111
return _end_blit();
112
}
113
}
114
115
void MDCommandBuffer::commit() {
116
end();
117
if (use_barriers) {
118
if (_scratch.is_changed()) {
119
Span<MTL::Buffer *const> bufs = _scratch.get_buffers();
120
_frame_state.rs->addAllocations(reinterpret_cast<const MTL::Allocation *const *>(bufs.ptr()), bufs.size());
121
_scratch.clear_changed();
122
_frame_state.rs->commit();
123
}
124
}
125
commandBuffer->commit();
126
commandBuffer.reset();
127
state_begin = false;
128
}
129
130
MTL::CommandBuffer *MDCommandBuffer::command_buffer() {
131
DEV_ASSERT(state_begin);
132
if (commandBuffer.get() == nullptr) {
133
commandBuffer = NS::RetainPtr(queue->commandBuffer());
134
if (use_barriers) {
135
commandBuffer->useResidencySet(_frame_state.rs.get());
136
}
137
}
138
return commandBuffer.get();
139
}
140
141
void MDCommandBuffer::_encode_barrier(MTL::CommandEncoder *p_enc) {
142
DEV_ASSERT(p_enc);
143
144
static const MTL::Stages empty_stages[STAGE_MAX] = { 0, 0, 0 };
145
if (memcmp(&pending_before_queue_stages, empty_stages, sizeof(pending_before_queue_stages)) == 0) {
146
return;
147
}
148
149
int stage = STAGE_MAX;
150
// Determine encoder type by checking if it's the current active encoder.
151
if (render.encoder.get() == p_enc && pending_after_stages[STAGE_RENDER] != 0) {
152
stage = STAGE_RENDER;
153
} else if (compute.encoder.get() == p_enc && pending_after_stages[STAGE_COMPUTE] != 0) {
154
stage = STAGE_COMPUTE;
155
} else if (blit.encoder.get() == p_enc && pending_after_stages[STAGE_BLIT] != 0) {
156
stage = STAGE_BLIT;
157
}
158
159
if (stage == STAGE_MAX) {
160
return;
161
}
162
163
p_enc->barrierAfterQueueStages(pending_after_stages[stage], pending_before_queue_stages[stage]);
164
pending_before_queue_stages[stage] = 0;
165
pending_after_stages[stage] = 0;
166
}
167
168
void MDCommandBuffer::pipeline_barrier(BitField<RDD::PipelineStageBits> p_src_stages,
169
BitField<RDD::PipelineStageBits> p_dst_stages,
170
VectorView<RDD::MemoryAccessBarrier> p_memory_barriers,
171
VectorView<RDD::BufferBarrier> p_buffer_barriers,
172
VectorView<RDD::TextureBarrier> p_texture_barriers,
173
VectorView<RDD::AccelerationStructureBarrier> p_acceleration_structure_barriers) {
174
MTL::Stages after_stages = convert_src_pipeline_stages_to_metal(p_src_stages);
175
if (after_stages == 0) {
176
return;
177
}
178
179
MTL::Stages before_stages = convert_dst_pipeline_stages_to_metal(p_dst_stages);
180
if (before_stages == 0) {
181
return;
182
}
183
184
// Encode intra-encoder memory barrier if an encoder is active for matching stages.
185
if (render.encoder.get() != nullptr) {
186
MTL::RenderStages render_after = static_cast<MTL::RenderStages>(after_stages & (MTL::StageVertex | MTL::StageFragment));
187
MTL::RenderStages render_before = static_cast<MTL::RenderStages>(before_stages & (MTL::StageVertex | MTL::StageFragment));
188
if (render_after != 0 && render_before != 0) {
189
render.encoder->memoryBarrier(MTL::BarrierScopeBuffers | MTL::BarrierScopeTextures, render_after, render_before);
190
}
191
} else if (compute.encoder.get() != nullptr) {
192
if (after_stages & MTL::StageDispatch) {
193
compute.encoder->memoryBarrier(MTL::BarrierScopeBuffers | MTL::BarrierScopeTextures);
194
}
195
}
196
// Blit encoder has no memory barrier API.
197
198
// Also cache for inter-pass barriers based on DESTINATION stages,
199
// since barrierAfterQueueStages is called on the encoder that must wait.
200
if (before_stages & (MTL::StageVertex | MTL::StageFragment)) {
201
pending_after_stages[STAGE_RENDER] |= after_stages;
202
pending_before_queue_stages[STAGE_RENDER] |= before_stages;
203
}
204
205
if (before_stages & MTL::StageDispatch) {
206
pending_after_stages[STAGE_COMPUTE] |= after_stages;
207
pending_before_queue_stages[STAGE_COMPUTE] |= before_stages;
208
}
209
210
if (before_stages & MTL::StageBlit) {
211
pending_after_stages[STAGE_BLIT] |= after_stages;
212
pending_before_queue_stages[STAGE_BLIT] |= before_stages;
213
}
214
}
215
216
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
217
MDPipeline *p = (MDPipeline *)(p_pipeline.id);
218
219
// End current encoder if it is a compute encoder or blit encoder,
220
// as they do not have a defined end boundary in the RDD like render.
221
if (type == MDCommandBufferStateType::Compute) {
222
_end_compute_dispatch();
223
} else if (type == MDCommandBufferStateType::Blit) {
224
_end_blit();
225
}
226
227
if (p->type == MDPipelineType::Render) {
228
DEV_ASSERT(type == MDCommandBufferStateType::Render);
229
MDRenderPipeline *rp = (MDRenderPipeline *)p;
230
231
if (render.encoder.get() == nullptr) {
232
// This error would happen if the render pass failed.
233
ERR_FAIL_NULL_MSG(render.desc.get(), "Render pass descriptor is null.");
234
235
// This condition occurs when there are no attachments when calling render_next_subpass()
236
// and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag.
237
render.desc->setDefaultRasterSampleCount(static_cast<NS::UInteger>(rp->sample_count));
238
239
render.encoder = NS::RetainPtr(command_buffer()->renderCommandEncoder(render.desc.get()));
240
_encode_barrier(render.encoder.get());
241
}
242
243
if (render.pipeline != rp) {
244
render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_RASTER));
245
// Mark all uniforms as dirty, as variants of a shader pipeline may have a different entry point ABI,
246
// due to setting force_active_argument_buffer_resources = true for spirv_cross::CompilerMSL::Options.
247
// As a result, uniform sets with the same layout will generate redundant binding warnings when
248
// capturing a Metal frame in Xcode.
249
//
250
// If we don't mark as dirty, then some bindings will generate a validation error.
251
// binding_cache.clear();
252
render.mark_uniforms_dirty();
253
254
if (render.pipeline != nullptr && render.pipeline->depth_stencil != rp->depth_stencil) {
255
render.dirty.set_flag(RenderState::DIRTY_DEPTH);
256
}
257
if (rp->raster_state.blend.enabled) {
258
render.dirty.set_flag(RenderState::DIRTY_BLEND);
259
}
260
render.pipeline = rp;
261
}
262
} else if (p->type == MDPipelineType::Compute) {
263
DEV_ASSERT(type == MDCommandBufferStateType::None);
264
type = MDCommandBufferStateType::Compute;
265
266
if (compute.pipeline != p) {
267
compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
268
binding_cache.clear();
269
compute.mark_uniforms_dirty();
270
compute.pipeline = (MDComputePipeline *)p;
271
}
272
}
273
}
274
275
void MDCommandBuffer::mark_push_constants_dirty() {
276
switch (type) {
277
case MDCommandBufferStateType::Render:
278
render.dirty.set_flag(RenderState::DirtyFlag::DIRTY_PUSH);
279
break;
280
case MDCommandBufferStateType::Compute:
281
compute.dirty.set_flag(ComputeState::DirtyFlag::DIRTY_PUSH);
282
break;
283
default:
284
break;
285
}
286
}
287
288
MTL::BlitCommandEncoder *MDCommandBuffer::_ensure_blit_encoder() {
289
switch (type) {
290
case MDCommandBufferStateType::None:
291
break;
292
case MDCommandBufferStateType::Render:
293
render_end_pass();
294
break;
295
case MDCommandBufferStateType::Compute:
296
_end_compute_dispatch();
297
break;
298
case MDCommandBufferStateType::Blit:
299
return blit.encoder.get();
300
}
301
302
type = MDCommandBufferStateType::Blit;
303
blit.encoder = NS::RetainPtr(command_buffer()->blitCommandEncoder());
304
_encode_barrier(blit.encoder.get());
305
306
return blit.encoder.get();
307
}
308
309
void MDCommandBuffer::resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
310
MTL::Texture *src_tex = rid::get<MTL::Texture>(p_src_texture);
311
MTL::Texture *dst_tex = rid::get<MTL::Texture>(p_dst_texture);
312
313
NS::SharedPtr<MTL::RenderPassDescriptor> mtlRPD = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init());
314
MTL::RenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD->colorAttachments()->object(0);
315
mtlColorAttDesc->setLoadAction(MTL::LoadActionLoad);
316
mtlColorAttDesc->setStoreAction(MTL::StoreActionMultisampleResolve);
317
318
mtlColorAttDesc->setTexture(src_tex);
319
mtlColorAttDesc->setResolveTexture(dst_tex);
320
mtlColorAttDesc->setLevel(p_src_mipmap);
321
mtlColorAttDesc->setSlice(p_src_layer);
322
mtlColorAttDesc->setResolveLevel(p_dst_mipmap);
323
mtlColorAttDesc->setResolveSlice(p_dst_layer);
324
MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(mtlRPD.get());
325
enc->setLabel(MTLSTR("Resolve Image"));
326
enc->endEncoding();
327
}
328
329
void MDCommandBuffer::clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources) {
330
MTL::Texture *src_tex = rid::get<MTL::Texture>(p_texture);
331
332
if (src_tex->parentTexture()) {
333
// Clear via the parent texture rather than the view.
334
src_tex = src_tex->parentTexture();
335
}
336
337
PixelFormats &pf = device_driver->get_pixel_formats();
338
339
if (pf.isDepthFormat(src_tex->pixelFormat()) || pf.isStencilFormat(src_tex->pixelFormat())) {
340
ERR_FAIL_MSG("invalid: depth or stencil texture format");
341
}
342
343
NS::SharedPtr<MTL::RenderPassDescriptor> desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init());
344
345
if (p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
346
MTL::RenderPassColorAttachmentDescriptor *caDesc = desc->colorAttachments()->object(0);
347
caDesc->setTexture(src_tex);
348
caDesc->setLoadAction(MTL::LoadActionClear);
349
caDesc->setStoreAction(MTL::StoreActionStore);
350
caDesc->setClearColor(MTL::ClearColor(p_color.r, p_color.g, p_color.b, p_color.a));
351
352
// Extract the mipmap levels that are to be updated.
353
uint32_t mipLvlStart = p_subresources.base_mipmap;
354
uint32_t mipLvlCnt = p_subresources.mipmap_count;
355
uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt;
356
357
uint32_t levelCount = src_tex->mipmapLevelCount();
358
359
// Extract the cube or array layers (slices) that are to be updated.
360
bool is3D = src_tex->textureType() == MTL::TextureType3D;
361
uint32_t layerStart = is3D ? 0 : p_subresources.base_layer;
362
uint32_t layerCnt = p_subresources.layer_count;
363
uint32_t layerEnd = layerStart + layerCnt;
364
365
MetalFeatures const &features = device_driver->get_device_properties().features;
366
367
// Iterate across mipmap levels and layers, and perform and empty render to clear each.
368
for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
369
ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range");
370
371
caDesc->setLevel(mipLvl);
372
373
// If a 3D image, we need to get the depth for each level.
374
if (is3D) {
375
layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth;
376
layerEnd = layerStart + layerCnt;
377
}
378
379
if ((features.layeredRendering && src_tex->sampleCount() == 1) || features.multisampleLayeredRendering) {
380
// We can clear all layers at once.
381
if (is3D) {
382
caDesc->setDepthPlane(layerStart);
383
} else {
384
caDesc->setSlice(layerStart);
385
}
386
desc->setRenderTargetArrayLength(layerCnt);
387
MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get());
388
enc->setLabel(MTLSTR("Clear Image"));
389
enc->endEncoding();
390
} else {
391
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
392
if (is3D) {
393
caDesc->setDepthPlane(layer);
394
} else {
395
caDesc->setSlice(layer);
396
}
397
MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get());
398
enc->setLabel(MTLSTR("Clear Image"));
399
enc->endEncoding();
400
}
401
}
402
}
403
}
404
}
405
406
void MDCommandBuffer::clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
407
MTL::BlitCommandEncoder *blit_enc = _ensure_blit_encoder();
408
const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id;
409
410
blit_enc->fillBuffer(buffer->metal_buffer.get(), NS::Range(p_offset, p_size), 0);
411
}
412
413
void MDCommandBuffer::clear_depth_stencil_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const RDD::TextureSubresourceRange &p_subresources) {
414
MTL::Texture *src_tex = rid::get<MTL::Texture>(p_texture);
415
416
if (src_tex->parentTexture()) {
417
// Clear via the parent texture rather than the view.
418
src_tex = src_tex->parentTexture();
419
}
420
421
PixelFormats &pf = device_driver->get_pixel_formats();
422
423
bool is_depth_format = pf.isDepthFormat(src_tex->pixelFormat());
424
bool is_stencil_format = pf.isStencilFormat(src_tex->pixelFormat());
425
426
if (!is_depth_format && !is_stencil_format) {
427
ERR_FAIL_MSG("invalid: color texture format");
428
}
429
430
bool clear_depth = is_depth_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT);
431
bool clear_stencil = is_stencil_format && p_subresources.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT);
432
433
if (clear_depth || clear_stencil) {
434
NS::SharedPtr<MTL::RenderPassDescriptor> desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init());
435
436
MTL::RenderPassDepthAttachmentDescriptor *daDesc = desc->depthAttachment();
437
if (clear_depth) {
438
daDesc->setTexture(src_tex);
439
daDesc->setLoadAction(MTL::LoadActionClear);
440
daDesc->setStoreAction(MTL::StoreActionStore);
441
daDesc->setClearDepth(p_depth);
442
}
443
444
MTL::RenderPassStencilAttachmentDescriptor *saDesc = desc->stencilAttachment();
445
if (clear_stencil) {
446
saDesc->setTexture(src_tex);
447
saDesc->setLoadAction(MTL::LoadActionClear);
448
saDesc->setStoreAction(MTL::StoreActionStore);
449
saDesc->setClearStencil(p_stencil);
450
}
451
452
// Extract the mipmap levels that are to be updated.
453
uint32_t mipLvlStart = p_subresources.base_mipmap;
454
uint32_t mipLvlCnt = p_subresources.mipmap_count;
455
uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt;
456
457
uint32_t levelCount = src_tex->mipmapLevelCount();
458
459
// Extract the cube or array layers (slices) that are to be updated.
460
bool is3D = src_tex->textureType() == MTL::TextureType3D;
461
uint32_t layerStart = is3D ? 0 : p_subresources.base_layer;
462
uint32_t layerCnt = p_subresources.layer_count;
463
uint32_t layerEnd = layerStart + layerCnt;
464
465
MetalFeatures const &features = device_driver->get_device_properties().features;
466
467
// Iterate across mipmap levels and layers, and perform and empty render to clear each.
468
for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
469
ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range");
470
471
if (clear_depth) {
472
daDesc->setLevel(mipLvl);
473
}
474
if (clear_stencil) {
475
saDesc->setLevel(mipLvl);
476
}
477
478
// If a 3D image, we need to get the depth for each level.
479
if (is3D) {
480
layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth;
481
layerEnd = layerStart + layerCnt;
482
}
483
484
if ((features.layeredRendering && src_tex->sampleCount() == 1) || features.multisampleLayeredRendering) {
485
// We can clear all layers at once.
486
if (is3D) {
487
if (clear_depth) {
488
daDesc->setDepthPlane(layerStart);
489
}
490
if (clear_stencil) {
491
saDesc->setDepthPlane(layerStart);
492
}
493
} else {
494
if (clear_depth) {
495
daDesc->setSlice(layerStart);
496
}
497
if (clear_stencil) {
498
saDesc->setSlice(layerStart);
499
}
500
}
501
desc->setRenderTargetArrayLength(layerCnt);
502
MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get());
503
enc->setLabel(MTLSTR("Clear Image"));
504
enc->endEncoding();
505
} else {
506
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
507
if (is3D) {
508
if (clear_depth) {
509
daDesc->setDepthPlane(layer);
510
}
511
if (clear_stencil) {
512
saDesc->setDepthPlane(layer);
513
}
514
} else {
515
if (clear_depth) {
516
daDesc->setSlice(layer);
517
}
518
if (clear_stencil) {
519
saDesc->setSlice(layer);
520
}
521
}
522
MTL::RenderCommandEncoder *enc = get_new_render_encoder_with_descriptor(desc.get());
523
enc->setLabel(MTLSTR("Clear Image"));
524
enc->endEncoding();
525
}
526
}
527
}
528
}
529
}
530
531
void MDCommandBuffer::copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferCopyRegion> p_regions) {
532
const RDM::BufferInfo *src = (const RDM::BufferInfo *)p_src_buffer.id;
533
const RDM::BufferInfo *dst = (const RDM::BufferInfo *)p_dst_buffer.id;
534
535
MTL::BlitCommandEncoder *enc = _ensure_blit_encoder();
536
537
for (uint32_t i = 0; i < p_regions.size(); i++) {
538
RDD::BufferCopyRegion region = p_regions[i];
539
enc->copyFromBuffer(src->metal_buffer.get(), region.src_offset,
540
dst->metal_buffer.get(), region.dst_offset, region.size);
541
}
542
}
543
544
void MDCommandBuffer::copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView<RDD::TextureCopyRegion> p_regions) {
545
MTL::Texture *src = rid::get<MTL::Texture>(p_src_texture);
546
MTL::Texture *dst = rid::get<MTL::Texture>(p_dst_texture);
547
548
MTL::BlitCommandEncoder *enc = _ensure_blit_encoder();
549
PixelFormats &pf = device_driver->get_pixel_formats();
550
551
MTL::PixelFormat src_fmt = src->pixelFormat();
552
bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed;
553
MTL::PixelFormat dst_fmt = dst->pixelFormat();
554
bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed;
555
556
// Validate copy.
557
if (src->sampleCount() != dst->sampleCount() || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) {
558
ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts.");
559
}
560
561
// If source and destination have different formats and at least one is compressed, a temporary buffer is required.
562
bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed);
563
if (need_tmp_buffer) {
564
ERR_FAIL_MSG("not implemented: copy with intermediate buffer");
565
}
566
567
if (src_fmt != dst_fmt) {
568
// Map the source pixel format to the dst through a texture view on the source texture.
569
src = src->newTextureView(dst_fmt);
570
}
571
572
for (uint32_t i = 0; i < p_regions.size(); i++) {
573
RDD::TextureCopyRegion region = p_regions[i];
574
575
MTL::Size extent = MTLSizeFromVector3i(region.size);
576
577
// If copies can be performed using direct texture-texture copying, do so.
578
uint32_t src_level = region.src_subresources.mipmap;
579
uint32_t src_base_layer = region.src_subresources.base_layer;
580
MTL::Size src_extent = mipmapLevelSizeFromTexture(src, src_level);
581
uint32_t dst_level = region.dst_subresources.mipmap;
582
uint32_t dst_base_layer = region.dst_subresources.base_layer;
583
MTL::Size dst_extent = mipmapLevelSizeFromTexture(dst, dst_level);
584
585
// All layers may be copied at once, if the extent completely covers both images.
586
if (src_extent == extent && dst_extent == extent) {
587
enc->copyFromTexture(src, src_base_layer, src_level,
588
dst, dst_base_layer, dst_level,
589
region.src_subresources.layer_count, 1);
590
} else {
591
MTL::Origin src_origin = MTLOriginFromVector3i(region.src_offset);
592
MTL::Size src_size = clampMTLSize(extent, src_origin, src_extent);
593
uint32_t layer_count = 0;
594
if ((src->textureType() == MTL::TextureType3D) != (dst->textureType() == MTL::TextureType3D)) {
595
// In the case, the number of layers to copy is in extent.depth. Use that value,
596
// then clamp the depth, so we don't try to copy more than Metal will allow.
597
layer_count = extent.depth;
598
src_size.depth = 1;
599
} else {
600
layer_count = region.src_subresources.layer_count;
601
}
602
MTL::Origin dst_origin = MTLOriginFromVector3i(region.dst_offset);
603
604
for (uint32_t layer = 0; layer < layer_count; layer++) {
605
// We can copy between a 3D and a 2D image easily. Just copy between
606
// one slice of the 2D image and one plane of the 3D image at a time.
607
if ((src->textureType() == MTL::TextureType3D) == (dst->textureType() == MTL::TextureType3D)) {
608
enc->copyFromTexture(src, src_base_layer + layer, src_level, src_origin, src_size,
609
dst, dst_base_layer + layer, dst_level, dst_origin);
610
} else if (src->textureType() == MTL::TextureType3D) {
611
enc->copyFromTexture(src, src_base_layer, src_level,
612
MTL::Origin(src_origin.x, src_origin.y, src_origin.z + layer), src_size,
613
dst, dst_base_layer + layer, dst_level, dst_origin);
614
} else {
615
DEV_ASSERT(dst->textureType() == MTL::TextureType3D);
616
enc->copyFromTexture(src, src_base_layer + layer, src_level, src_origin, src_size,
617
dst, dst_base_layer, dst_level,
618
MTL::Origin(dst_origin.x, dst_origin.y, dst_origin.z + layer));
619
}
620
}
621
}
622
}
623
}
624
625
void MDCommandBuffer::copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView<RDD::BufferTextureCopyRegion> p_regions) {
626
_copy_texture_buffer(CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions);
627
}
628
629
void MDCommandBuffer::copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferTextureCopyRegion> p_regions) {
630
_copy_texture_buffer(CopySource::Texture, p_src_texture, p_dst_buffer, p_regions);
631
}
632
633
void MDCommandBuffer::_copy_texture_buffer(CopySource p_source,
634
RDD::TextureID p_texture,
635
RDD::BufferID p_buffer,
636
VectorView<RDD::BufferTextureCopyRegion> p_regions) {
637
const RDM::BufferInfo *buffer = (const RDM::BufferInfo *)p_buffer.id;
638
MTL::Texture *texture = rid::get<MTL::Texture>(p_texture);
639
640
MTL::BlitCommandEncoder *enc = _ensure_blit_encoder();
641
642
PixelFormats &pf = device_driver->get_pixel_formats();
643
MTL::PixelFormat mtlPixFmt = texture->pixelFormat();
644
645
MTL::BlitOption options = MTL::BlitOptionNone;
646
if (pf.isPVRTCFormat(mtlPixFmt)) {
647
options |= MTL::BlitOptionRowLinearPVRTC;
648
}
649
650
for (uint32_t i = 0; i < p_regions.size(); i++) {
651
RDD::BufferTextureCopyRegion region = p_regions[i];
652
653
uint32_t mip_level = region.texture_subresource.mipmap;
654
MTL::Origin txt_origin = MTL::Origin(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z);
655
MTL::Size src_extent = mipmapLevelSizeFromTexture(texture, mip_level);
656
MTL::Size txt_size = clampMTLSize(MTL::Size(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z),
657
txt_origin,
658
src_extent);
659
660
uint32_t buffImgWd = region.texture_region_size.x;
661
uint32_t buffImgHt = region.texture_region_size.y;
662
663
NS::UInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd);
664
NS::UInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt);
665
666
MTL::BlitOption blit_options = options;
667
668
if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) {
669
// Don't reduce depths of 32-bit depth/stencil formats.
670
if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_DEPTH) {
671
if (pf.getBytesPerTexel(mtlPixFmt) != 4) {
672
bytesPerRow -= buffImgWd;
673
bytesPerImg -= buffImgWd * buffImgHt;
674
}
675
blit_options |= MTL::BlitOptionDepthFromDepthStencil;
676
} else if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_STENCIL) {
677
// The stencil component is always 1 byte per pixel.
678
bytesPerRow = buffImgWd;
679
bytesPerImg = buffImgWd * buffImgHt;
680
blit_options |= MTL::BlitOptionStencilFromDepthStencil;
681
}
682
}
683
684
if (!isArrayTexture(texture->textureType())) {
685
bytesPerImg = 0;
686
}
687
688
if (p_source == CopySource::Buffer) {
689
enc->copyFromBuffer(buffer->metal_buffer.get(), region.buffer_offset, bytesPerRow, bytesPerImg, txt_size,
690
texture, region.texture_subresource.layer, mip_level, txt_origin, blit_options);
691
} else {
692
enc->copyFromTexture(texture, region.texture_subresource.layer, mip_level, txt_origin, txt_size,
693
buffer->metal_buffer.get(), region.buffer_offset, bytesPerRow, bytesPerImg, blit_options);
694
}
695
}
696
}
697
698
MTL::RenderCommandEncoder *MDCommandBuffer::get_new_render_encoder_with_descriptor(MTL::RenderPassDescriptor *p_desc) {
699
switch (type) {
700
case MDCommandBufferStateType::None:
701
break;
702
case MDCommandBufferStateType::Render:
703
render_end_pass();
704
break;
705
case MDCommandBufferStateType::Compute:
706
_end_compute_dispatch();
707
break;
708
case MDCommandBufferStateType::Blit:
709
_end_blit();
710
break;
711
}
712
713
MTL::RenderCommandEncoder *enc = command_buffer()->renderCommandEncoder(p_desc);
714
_encode_barrier(enc);
715
return enc;
716
}
717
718
#pragma mark - Render Commands
719
720
void MDCommandBuffer::render_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
721
DEV_ASSERT(type == MDCommandBufferStateType::Render);
722
723
if (uint32_t new_size = p_first_set_index + p_set_count; render.uniform_sets.size() < new_size) {
724
uint32_t s = render.uniform_sets.size();
725
render.uniform_sets.resize(new_size);
726
// Set intermediate values to null.
727
std::fill(&render.uniform_sets[s], render.uniform_sets.end().operator->(), nullptr);
728
}
729
730
const MDShader *shader = (const MDShader *)p_shader.id;
731
DynamicOffsetLayout layout = shader->dynamic_offset_layout;
732
733
// Clear bits for sets being bound, then OR new values.
734
for (uint32_t i = 0; i < p_set_count && render.dynamic_offsets != 0; i++) {
735
uint32_t set_index = p_first_set_index + i;
736
uint32_t count = layout.get_count(set_index);
737
if (count > 0) {
738
uint32_t shift = layout.get_offset_index_shift(set_index);
739
uint32_t mask = ((1u << (count * 4u)) - 1u) << shift;
740
render.dynamic_offsets &= ~mask; // Clear this set's bits
741
}
742
}
743
render.dynamic_offsets |= p_dynamic_offsets;
744
745
for (size_t i = 0; i < p_set_count; ++i) {
746
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
747
748
uint32_t index = p_first_set_index + i;
749
if (render.uniform_sets[index] != set || layout.get_count(index) > 0) {
750
render.dirty.set_flag(RenderState::DIRTY_UNIFORMS);
751
render.uniform_set_mask |= 1ULL << index;
752
render.uniform_sets[index] = set;
753
}
754
}
755
}
756
757
void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
758
DEV_ASSERT(type == MDCommandBufferStateType::Render);
759
760
const MDSubpass &subpass = render.get_subpass();
761
762
uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count;
763
simd::float4 *vertices = ALLOCA_ARRAY(simd::float4, vertex_count);
764
simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT];
765
766
Size2i size = render.frameBuffer->size;
767
Rect2i render_area = render.clip_to_render_area({ { 0, 0 }, size });
768
size = Size2i(render_area.position.x + render_area.size.width, render_area.position.y + render_area.size.height);
769
_populate_vertices(vertices, size, p_rects);
770
771
ClearAttKey key;
772
key.sample_count = render.pass->get_sample_count();
773
if (subpass.view_count > 1) {
774
key.enable_layered_rendering();
775
}
776
777
float depth_value = 0;
778
uint32_t stencil_value = 0;
779
780
for (uint32_t i = 0; i < p_attachment_clears.size(); i++) {
781
RDD::AttachmentClear const &attClear = p_attachment_clears[i];
782
uint32_t attachment_index;
783
if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
784
attachment_index = attClear.color_attachment;
785
} else {
786
attachment_index = subpass.depth_stencil_reference.attachment;
787
}
788
789
MDAttachment const &mda = render.pass->attachments[attachment_index];
790
if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
791
key.set_color_format(attachment_index, mda.format);
792
clear_colors[attachment_index] = {
793
attClear.value.color.r,
794
attClear.value.color.g,
795
attClear.value.color.b,
796
attClear.value.color.a
797
};
798
}
799
800
if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT)) {
801
key.set_depth_format(mda.format);
802
depth_value = attClear.value.depth;
803
}
804
805
if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_STENCIL_BIT)) {
806
key.set_stencil_format(mda.format);
807
stencil_value = attClear.value.stencil;
808
}
809
}
810
clear_colors[ClearAttKey::DEPTH_INDEX] = {
811
depth_value,
812
depth_value,
813
depth_value,
814
depth_value
815
};
816
817
MTL::RenderCommandEncoder *enc = render.encoder.get();
818
819
MDResourceCache &cache = device_driver->get_resource_cache();
820
821
enc->pushDebugGroup(MTLSTR("ClearAttachments"));
822
enc->setRenderPipelineState(cache.get_clear_render_pipeline_state(key, nullptr));
823
enc->setDepthStencilState(cache.get_depth_stencil_state(
824
key.is_depth_enabled(),
825
key.is_stencil_enabled()));
826
enc->setStencilReferenceValue(stencil_value);
827
enc->setCullMode(MTL::CullModeNone);
828
enc->setTriangleFillMode(MTL::TriangleFillModeFill);
829
enc->setDepthBias(0, 0, 0);
830
enc->setViewport(MTL::Viewport{ 0, 0, (double)size.width, (double)size.height, 0.0, 1.0 });
831
enc->setScissorRect(MTL::ScissorRect{ 0, 0, (NS::UInteger)size.width, (NS::UInteger)size.height });
832
833
enc->setVertexBytes(clear_colors, sizeof(clear_colors), 0);
834
enc->setFragmentBytes(clear_colors, sizeof(clear_colors), 0);
835
enc->setVertexBytes(vertices, vertex_count * sizeof(vertices[0]), device_driver->get_metal_buffer_index_for_vertex_attribute_binding(VERT_CONTENT_BUFFER_INDEX));
836
837
enc->drawPrimitives(MTL::PrimitiveTypeTriangle, (NS::UInteger)0, vertex_count);
838
enc->popDebugGroup();
839
840
render.dirty.set_flag((RenderState::DirtyFlag)(RenderState::DIRTY_PIPELINE | RenderState::DIRTY_DEPTH | RenderState::DIRTY_RASTER));
841
binding_cache.clear();
842
render.mark_uniforms_dirty({ 0 }); // Mark index 0 dirty, if there is already a binding for index 0.
843
render.mark_viewport_dirty();
844
render.mark_scissors_dirty();
845
render.mark_vertex_dirty();
846
render.mark_blend_dirty();
847
}
848
849
void MDCommandBuffer::_render_set_dirty_state() {
850
_render_bind_uniform_sets();
851
852
if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
853
if (push_constant_binding != UINT32_MAX) {
854
render.encoder->setVertexBytes(push_constant_data, push_constant_data_len, push_constant_binding);
855
render.encoder->setFragmentBytes(push_constant_data, push_constant_data_len, push_constant_binding);
856
}
857
}
858
859
MDSubpass const &subpass = render.get_subpass();
860
if (subpass.view_count > 1) {
861
uint32_t view_range[2] = { 0, subpass.view_count };
862
render.encoder->setVertexBytes(view_range, sizeof(view_range), VIEW_MASK_BUFFER_INDEX);
863
render.encoder->setFragmentBytes(view_range, sizeof(view_range), VIEW_MASK_BUFFER_INDEX);
864
}
865
866
if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) {
867
render.encoder->setRenderPipelineState(render.pipeline->state.get());
868
}
869
870
if (render.dirty.has_flag(RenderState::DIRTY_VIEWPORT)) {
871
render.encoder->setViewports(reinterpret_cast<const MTL::Viewport *>(render.viewports.ptr()), render.viewports.size());
872
}
873
874
if (render.dirty.has_flag(RenderState::DIRTY_DEPTH)) {
875
render.encoder->setDepthStencilState(render.pipeline->depth_stencil.get());
876
}
877
878
if (render.dirty.has_flag(RenderState::DIRTY_RASTER)) {
879
render.pipeline->raster_state.apply(render.encoder.get());
880
}
881
882
if (render.dirty.has_flag(RenderState::DIRTY_SCISSOR) && !render.scissors.is_empty()) {
883
size_t len = render.scissors.size();
884
MTL::ScissorRect *rects = ALLOCA_ARRAY(MTL::ScissorRect, len);
885
for (size_t i = 0; i < len; i++) {
886
rects[i] = render.clip_to_render_area(render.scissors[i]);
887
}
888
render.encoder->setScissorRects(rects, len);
889
}
890
891
if (render.dirty.has_flag(RenderState::DIRTY_BLEND) && render.blend_constants.has_value()) {
892
render.encoder->setBlendColor(render.blend_constants->r, render.blend_constants->g, render.blend_constants->b, render.blend_constants->a);
893
}
894
895
if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) {
896
uint32_t p_binding_count = render.vertex_buffers.size();
897
if (p_binding_count > 0) {
898
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
899
render.encoder->setVertexBuffers(render.vertex_buffers.ptr(), render.vertex_offsets.ptr(), NS::Range(first, p_binding_count));
900
}
901
}
902
903
if (!use_barriers) {
904
render.resource_tracker.encode(render.encoder.get());
905
}
906
907
render.dirty.clear();
908
}
909
910
void ResourceTracker::merge_from(const ::ResourceUsageMap &p_from) {
911
for (KeyValue<StageResourceUsage, ::ResourceVector> const &keyval : p_from) {
912
ResourceVector *resources = _current.getptr(keyval.key);
913
if (resources == nullptr) {
914
resources = &_current.insert(keyval.key, ResourceVector())->value;
915
}
916
resources->reserve(resources->size() + keyval.value.size());
917
918
MTL::Resource *const *keyval_ptr = (MTL::Resource *const *)(void *)keyval.value.ptr();
919
920
// Helper to check if a resource needs to be added based on previous usage.
921
auto should_add_resource = [this, usage = keyval.key](MTL::Resource *res) -> bool {
922
ResourceUsageEntry *existing = _previous.getptr(res);
923
if (existing == nullptr) {
924
_previous.insert(res, usage);
925
return true;
926
}
927
if (existing->usage != usage) {
928
existing->usage |= usage;
929
return true;
930
}
931
return false;
932
};
933
934
// 2-way merge of sorted resource lists.
935
uint32_t i = 0, j = 0;
936
while (i < resources->size() && j < keyval.value.size()) {
937
MTL::Resource *current_res = resources->ptr()[i];
938
MTL::Resource *new_res = keyval_ptr[j];
939
940
if (current_res < new_res) {
941
i++;
942
} else if (current_res > new_res) {
943
if (should_add_resource(new_res)) {
944
resources->insert(i, new_res);
945
}
946
i++;
947
j++;
948
} else {
949
i++;
950
j++;
951
}
952
}
953
954
// Append any remaining resources from the input.
955
for (; j < keyval.value.size(); j++) {
956
if (should_add_resource(keyval_ptr[j])) {
957
resources->push_back(keyval_ptr[j]);
958
}
959
}
960
}
961
}
962
963
void ResourceTracker::encode(MTL::RenderCommandEncoder *p_enc) {
964
for (KeyValue<StageResourceUsage, ResourceVector> const &keyval : _current) {
965
if (keyval.value.is_empty()) {
966
continue;
967
}
968
969
MTL::ResourceUsage vert_usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_VERTEX);
970
MTL::ResourceUsage frag_usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_FRAGMENT);
971
const MTL::Resource **resources = (const MTL::Resource **)(void *)keyval.value.ptr();
972
NS::UInteger count = keyval.value.size();
973
if (vert_usage == frag_usage) {
974
p_enc->useResources(resources, count, vert_usage, MTL::RenderStageVertex | MTL::RenderStageFragment);
975
} else {
976
if (vert_usage != 0) {
977
p_enc->useResources(resources, count, vert_usage, MTL::RenderStageVertex);
978
}
979
if (frag_usage != 0) {
980
p_enc->useResources(resources, count, frag_usage, MTL::RenderStageFragment);
981
}
982
}
983
}
984
985
// Keep the keys for now and clear the vectors to reduce churn.
986
for (KeyValue<StageResourceUsage, ResourceVector> &v : _current) {
987
v.value.clear();
988
}
989
}
990
991
void ResourceTracker::encode(MTL::ComputeCommandEncoder *p_enc) {
992
for (KeyValue<StageResourceUsage, ResourceVector> const &keyval : _current) {
993
if (keyval.value.is_empty()) {
994
continue;
995
}
996
MTL::ResourceUsage usage = (MTL::ResourceUsage)resource_usage_for_stage(keyval.key, RDD::ShaderStage::SHADER_STAGE_COMPUTE);
997
if (usage != 0) {
998
const MTL::Resource **resources = (const MTL::Resource **)(void *)keyval.value.ptr();
999
p_enc->useResources(resources, keyval.value.size(), usage);
1000
}
1001
}
1002
1003
// Keep the keys for now and clear the vectors to reduce churn.
1004
for (KeyValue<StageResourceUsage, ResourceVector> &v : _current) {
1005
v.value.clear();
1006
}
1007
}
1008
1009
void ResourceTracker::reset() {
1010
// Keep the keys for now, as they are likely to be used repeatedly.
1011
for (KeyValue<MTL::Resource *, ResourceUsageEntry> &v : _previous) {
1012
if (v.value.usage == ResourceUnused) {
1013
v.value.unused++;
1014
if (v.value.unused >= RESOURCE_UNUSED_CLEANUP_COUNT) {
1015
_scratch.push_back(v.key);
1016
}
1017
} else {
1018
v.value = ResourceUnused;
1019
v.value.unused = 0;
1020
}
1021
}
1022
1023
// Clear up resources that weren't used for the last pass.
1024
for (MTL::Resource *res : _scratch) {
1025
_previous.erase(res);
1026
}
1027
_scratch.clear();
1028
}
1029
1030
void MDCommandBuffer::_render_bind_uniform_sets() {
1031
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1032
if (!render.dirty.has_flag(RenderState::DIRTY_UNIFORMS)) {
1033
return;
1034
}
1035
1036
render.dirty.clear_flag(RenderState::DIRTY_UNIFORMS);
1037
uint64_t set_uniforms = render.uniform_set_mask;
1038
render.uniform_set_mask = 0;
1039
1040
MDRenderShader *shader = render.pipeline->shader;
1041
const uint32_t dynamic_offsets = render.dynamic_offsets;
1042
1043
while (set_uniforms != 0) {
1044
// Find the index of the next set bit.
1045
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
1046
// Clear the set bit.
1047
set_uniforms &= (set_uniforms - 1);
1048
MDUniformSet *set = render.uniform_sets[index];
1049
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
1050
continue;
1051
}
1052
if (shader->uses_argument_buffers) {
1053
_bind_uniforms_argument_buffers(set, shader, index, dynamic_offsets);
1054
} else {
1055
DirectEncoder de(render.encoder.get(), binding_cache, DirectEncoder::RENDER);
1056
_bind_uniforms_direct(set, shader, de, index, dynamic_offsets);
1057
}
1058
}
1059
}
1060
1061
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
1062
DEV_ASSERT(command_buffer() != nullptr);
1063
end();
1064
1065
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
1066
MDFrameBuffer *fb = (MDFrameBuffer *)(p_frameBuffer.id);
1067
1068
type = MDCommandBufferStateType::Render;
1069
render.pass = pass;
1070
render.current_subpass = UINT32_MAX;
1071
render.render_area = p_rect;
1072
render.clear_values.resize(p_clear_values.size());
1073
for (uint32_t i = 0; i < p_clear_values.size(); i++) {
1074
render.clear_values[i] = p_clear_values[i];
1075
}
1076
render.is_rendering_entire_area = (p_rect.position == Point2i(0, 0)) && p_rect.size == fb->size;
1077
render.frameBuffer = fb;
1078
render_next_subpass();
1079
}
1080
1081
void MDCommandBuffer::render_next_subpass() {
1082
DEV_ASSERT(command_buffer() != nullptr);
1083
1084
if (render.current_subpass == UINT32_MAX) {
1085
render.current_subpass = 0;
1086
} else {
1087
_end_render_pass();
1088
render.current_subpass++;
1089
}
1090
1091
MDFrameBuffer const &fb = *render.frameBuffer;
1092
MDRenderPass const &pass = *render.pass;
1093
MDSubpass const &subpass = render.get_subpass();
1094
1095
NS::SharedPtr<MTL::RenderPassDescriptor> desc = NS::TransferPtr(MTL::RenderPassDescriptor::alloc()->init());
1096
1097
if (subpass.view_count > 1) {
1098
desc->setRenderTargetArrayLength(subpass.view_count);
1099
}
1100
1101
PixelFormats &pf = device_driver->get_pixel_formats();
1102
1103
uint32_t attachmentCount = 0;
1104
for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
1105
uint32_t idx = subpass.color_references[i].attachment;
1106
if (idx == RDD::AttachmentReference::UNUSED) {
1107
continue;
1108
}
1109
1110
attachmentCount += 1;
1111
MTL::RenderPassColorAttachmentDescriptor *ca = desc->colorAttachments()->object(i);
1112
1113
uint32_t resolveIdx = subpass.resolve_references.is_empty() ? RDD::AttachmentReference::UNUSED : subpass.resolve_references[i].attachment;
1114
bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED;
1115
bool can_resolve = true;
1116
if (resolveIdx != RDD::AttachmentReference::UNUSED) {
1117
MTL::Texture *resolve_tex = fb.get_texture(resolveIdx);
1118
can_resolve = flags::all(pf.getCapabilities(resolve_tex->pixelFormat()), kMTLFmtCapsResolve);
1119
if (can_resolve) {
1120
ca->setResolveTexture(resolve_tex);
1121
} else {
1122
CRASH_NOW_MSG("unimplemented: using a texture format that is not supported for resolve");
1123
}
1124
}
1125
1126
MDAttachment const &attachment = pass.attachments[idx];
1127
1128
MTL::Texture *tex = fb.get_texture(idx);
1129
ERR_FAIL_NULL_MSG(tex, "Frame buffer color texture is null.");
1130
1131
if ((attachment.type & MDAttachmentType::Color)) {
1132
if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) {
1133
Color clearColor = render.clear_values[idx].color;
1134
ca->setClearColor(MTL::ClearColor(clearColor.r, clearColor.g, clearColor.b, clearColor.a));
1135
}
1136
}
1137
}
1138
1139
if (subpass.depth_stencil_reference.attachment != RDD::AttachmentReference::UNUSED) {
1140
attachmentCount += 1;
1141
uint32_t idx = subpass.depth_stencil_reference.attachment;
1142
MDAttachment const &attachment = pass.attachments[idx];
1143
MTL::Texture *tex = fb.get_texture(idx);
1144
ERR_FAIL_NULL_MSG(tex, "Frame buffer depth / stencil texture is null.");
1145
if (attachment.type & MDAttachmentType::Depth) {
1146
MTL::RenderPassDepthAttachmentDescriptor *da = desc->depthAttachment();
1147
if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) {
1148
da->setClearDepth(render.clear_values[idx].depth);
1149
}
1150
}
1151
1152
if (attachment.type & MDAttachmentType::Stencil) {
1153
MTL::RenderPassStencilAttachmentDescriptor *sa = desc->stencilAttachment();
1154
if (attachment.configureDescriptor(sa, pf, subpass, tex, render.is_rendering_entire_area, false, false, true)) {
1155
sa->setClearStencil(render.clear_values[idx].stencil);
1156
}
1157
}
1158
}
1159
1160
desc->setRenderTargetWidth(MAX((NS::UInteger)MIN(render.render_area.position.x + render.render_area.size.width, fb.size.width), 1u));
1161
desc->setRenderTargetHeight(MAX((NS::UInteger)MIN(render.render_area.position.y + render.render_area.size.height, fb.size.height), 1u));
1162
1163
if (attachmentCount == 0) {
1164
// If there are no attachments, delay the creation of the encoder,
1165
// so we can use a matching sample count for the pipeline, by setting
1166
// the defaultRasterSampleCount from the pipeline's sample count.
1167
render.desc = desc;
1168
} else {
1169
render.encoder = NS::RetainPtr(command_buffer()->renderCommandEncoder(desc.get()));
1170
_encode_barrier(render.encoder.get());
1171
1172
if (!render.is_rendering_entire_area) {
1173
_render_clear_render_area();
1174
}
1175
// With a new encoder, all state is dirty.
1176
render.dirty.set_flag(RenderState::DIRTY_ALL);
1177
}
1178
}
1179
1180
void MDCommandBuffer::render_draw(uint32_t p_vertex_count,
1181
uint32_t p_instance_count,
1182
uint32_t p_base_vertex,
1183
uint32_t p_first_instance) {
1184
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1185
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
1186
1187
_render_set_dirty_state();
1188
1189
MDSubpass const &subpass = render.get_subpass();
1190
if (subpass.view_count > 1) {
1191
p_instance_count *= subpass.view_count;
1192
}
1193
1194
DEV_ASSERT(render.dirty == 0);
1195
1196
MTL::RenderCommandEncoder *enc = render.encoder.get();
1197
enc->drawPrimitives(render.pipeline->raster_state.render_primitive, p_base_vertex, p_vertex_count, p_instance_count, p_first_instance);
1198
}
1199
1200
void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
1201
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1202
1203
render.vertex_buffers.resize(p_binding_count);
1204
render.vertex_offsets.resize(p_binding_count);
1205
1206
// Are the existing buffer bindings the same?
1207
bool same = true;
1208
1209
// Reverse the buffers, as their bindings are assigned in descending order.
1210
for (uint32_t i = 0; i < p_binding_count; i += 1) {
1211
const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id;
1212
1213
NS::UInteger dynamic_offset = 0;
1214
if (buf_info->is_dynamic()) {
1215
const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info;
1216
uint64_t frame_idx = p_dynamic_offsets & 0x3;
1217
p_dynamic_offsets >>= 2;
1218
dynamic_offset = frame_idx * dyn_buf->size_bytes;
1219
}
1220
if (render.vertex_buffers[i] != buf_info->metal_buffer.get()) {
1221
render.vertex_buffers[i] = buf_info->metal_buffer.get();
1222
same = false;
1223
}
1224
1225
render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1];
1226
}
1227
1228
if (render.encoder.get() != nullptr) {
1229
uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1);
1230
if (same) {
1231
NS::UInteger *offset_ptr = render.vertex_offsets.ptr();
1232
for (uint32_t i = first; i < first + p_binding_count; i++) {
1233
render.encoder->setVertexBufferOffset(*offset_ptr, i);
1234
offset_ptr++;
1235
}
1236
} else {
1237
render.encoder->setVertexBuffers(render.vertex_buffers.ptr(), render.vertex_offsets.ptr(), NS::Range(first, p_binding_count));
1238
}
1239
render.dirty.clear_flag(RenderState::DIRTY_VERTEX);
1240
} else {
1241
render.dirty.set_flag(RenderState::DIRTY_VERTEX);
1242
}
1243
}
1244
1245
void MDCommandBuffer::render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset) {
1246
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1247
1248
const RenderingDeviceDriverMetal::BufferInfo *buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffer.id;
1249
1250
render.index_buffer = buffer->metal_buffer.get();
1251
render.index_type = p_format == RDD::IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT16 ? MTL::IndexTypeUInt16 : MTL::IndexTypeUInt32;
1252
render.index_offset = p_offset;
1253
}
1254
1255
void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count,
1256
uint32_t p_instance_count,
1257
uint32_t p_first_index,
1258
int32_t p_vertex_offset,
1259
uint32_t p_first_instance) {
1260
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1261
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
1262
1263
_render_set_dirty_state();
1264
1265
MDSubpass const &subpass = render.get_subpass();
1266
if (subpass.view_count > 1) {
1267
p_instance_count *= subpass.view_count;
1268
}
1269
1270
MTL::RenderCommandEncoder *enc = render.encoder.get();
1271
1272
uint32_t index_offset = render.index_offset;
1273
index_offset += p_first_index * (render.index_type == MTL::IndexTypeUInt16 ? sizeof(uint16_t) : sizeof(uint32_t));
1274
1275
enc->drawIndexedPrimitives(render.pipeline->raster_state.render_primitive, p_index_count, render.index_type, render.index_buffer, index_offset, p_instance_count, p_vertex_offset, p_first_instance);
1276
}
1277
1278
void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
1279
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1280
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
1281
1282
_render_set_dirty_state();
1283
1284
MTL::RenderCommandEncoder *enc = render.encoder.get();
1285
1286
const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
1287
NS::UInteger indirect_offset = p_offset;
1288
1289
for (uint32_t i = 0; i < p_draw_count; i++) {
1290
enc->drawIndexedPrimitives(render.pipeline->raster_state.render_primitive, render.index_type, render.index_buffer, 0, indirect_buffer->metal_buffer.get(), indirect_offset);
1291
indirect_offset += p_stride;
1292
}
1293
}
1294
1295
void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
1296
ERR_FAIL_MSG("not implemented");
1297
}
1298
1299
void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
1300
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1301
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
1302
1303
_render_set_dirty_state();
1304
1305
MTL::RenderCommandEncoder *enc = render.encoder.get();
1306
1307
const RenderingDeviceDriverMetal::BufferInfo *indirect_buffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
1308
NS::UInteger indirect_offset = p_offset;
1309
1310
for (uint32_t i = 0; i < p_draw_count; i++) {
1311
enc->drawPrimitives(render.pipeline->raster_state.render_primitive, indirect_buffer->metal_buffer.get(), indirect_offset);
1312
indirect_offset += p_stride;
1313
}
1314
}
1315
1316
void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer, uint64_t p_offset, RDD::BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
1317
ERR_FAIL_MSG("not implemented");
1318
}
1319
1320
void MDCommandBuffer::render_end_pass() {
1321
DEV_ASSERT(type == MDCommandBufferStateType::Render);
1322
1323
render.end_encoding();
1324
render.reset();
1325
reset();
1326
}
1327
1328
#pragma mark - RenderState
1329
1330
void MDCommandBuffer::RenderState::reset() {
1331
pass = nullptr;
1332
frameBuffer = nullptr;
1333
pipeline = nullptr;
1334
current_subpass = UINT32_MAX;
1335
render_area = {};
1336
is_rendering_entire_area = false;
1337
desc.reset();
1338
encoder.reset();
1339
index_buffer = nullptr;
1340
index_type = MTL::IndexTypeUInt16;
1341
dirty = DIRTY_NONE;
1342
uniform_sets.clear();
1343
dynamic_offsets = 0;
1344
uniform_set_mask = 0;
1345
clear_values.clear();
1346
viewports.clear();
1347
scissors.clear();
1348
blend_constants.reset();
1349
bzero(vertex_buffers.ptr(), sizeof(MTL::Buffer *) * vertex_buffers.size());
1350
vertex_buffers.clear();
1351
bzero(vertex_offsets.ptr(), sizeof(NS::UInteger) * vertex_offsets.size());
1352
vertex_offsets.clear();
1353
resource_tracker.reset();
1354
}
1355
1356
void MDCommandBuffer::RenderState::end_encoding() {
1357
if (encoder.get() == nullptr) {
1358
return;
1359
}
1360
1361
encoder->endEncoding();
1362
encoder.reset();
1363
}
1364
1365
#pragma mark - ComputeState
1366
1367
void MDCommandBuffer::ComputeState::end_encoding() {
1368
if (encoder.get() == nullptr) {
1369
return;
1370
}
1371
1372
encoder->endEncoding();
1373
encoder.reset();
1374
}
1375
1376
#pragma mark - Compute
1377
1378
void MDCommandBuffer::_compute_set_dirty_state() {
1379
if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
1380
compute.encoder = NS::RetainPtr(command_buffer()->computeCommandEncoder(MTL::DispatchTypeConcurrent));
1381
_encode_barrier(compute.encoder.get());
1382
compute.encoder->setComputePipelineState(compute.pipeline->state.get());
1383
}
1384
1385
_compute_bind_uniform_sets();
1386
1387
if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
1388
if (push_constant_binding != UINT32_MAX) {
1389
compute.encoder->setBytes(push_constant_data, push_constant_data_len, push_constant_binding);
1390
}
1391
}
1392
1393
if (!use_barriers) {
1394
compute.resource_tracker.encode(compute.encoder.get());
1395
}
1396
1397
compute.dirty.clear();
1398
}
1399
1400
void MDCommandBuffer::_compute_bind_uniform_sets() {
1401
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
1402
if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
1403
return;
1404
}
1405
1406
compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
1407
uint64_t set_uniforms = compute.uniform_set_mask;
1408
compute.uniform_set_mask = 0;
1409
1410
MDComputeShader *shader = compute.pipeline->shader;
1411
const uint32_t dynamic_offsets = compute.dynamic_offsets;
1412
1413
while (set_uniforms != 0) {
1414
// Find the index of the next set bit.
1415
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
1416
// Clear the set bit.
1417
set_uniforms &= (set_uniforms - 1);
1418
MDUniformSet *set = compute.uniform_sets[index];
1419
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
1420
continue;
1421
}
1422
if (shader->uses_argument_buffers) {
1423
_bind_uniforms_argument_buffers_compute(set, shader, index, dynamic_offsets);
1424
} else {
1425
DirectEncoder de(compute.encoder.get(), binding_cache, DirectEncoder::COMPUTE);
1426
_bind_uniforms_direct(set, shader, de, index, dynamic_offsets);
1427
}
1428
}
1429
}
1430
1431
void MDCommandBuffer::ComputeState::reset() {
1432
pipeline = nullptr;
1433
encoder.reset();
1434
dirty = DIRTY_NONE;
1435
uniform_sets.clear();
1436
dynamic_offsets = 0;
1437
uniform_set_mask = 0;
1438
resource_tracker.reset();
1439
}
1440
1441
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
1442
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
1443
1444
if (uint32_t new_size = p_first_set_index + p_set_count; compute.uniform_sets.size() < new_size) {
1445
uint32_t s = compute.uniform_sets.size();
1446
compute.uniform_sets.resize(new_size);
1447
// Set intermediate values to null.
1448
std::fill(&compute.uniform_sets[s], compute.uniform_sets.end().operator->(), nullptr);
1449
}
1450
1451
const MDShader *shader = (const MDShader *)p_shader.id;
1452
DynamicOffsetLayout layout = shader->dynamic_offset_layout;
1453
1454
// Clear bits for sets being bound, then OR new values.
1455
for (uint32_t i = 0; i < p_set_count && compute.dynamic_offsets != 0; i++) {
1456
uint32_t set_index = p_first_set_index + i;
1457
uint32_t count = layout.get_count(set_index);
1458
if (count > 0) {
1459
uint32_t shift = layout.get_offset_index_shift(set_index);
1460
uint32_t mask = ((1u << (count * 4u)) - 1u) << shift;
1461
compute.dynamic_offsets &= ~mask; // Clear this set's bits
1462
}
1463
}
1464
compute.dynamic_offsets |= p_dynamic_offsets;
1465
1466
for (size_t i = 0; i < p_set_count; ++i) {
1467
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
1468
1469
uint32_t index = p_first_set_index + i;
1470
if (compute.uniform_sets[index] != set || layout.get_count(index) > 0) {
1471
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
1472
compute.uniform_set_mask |= 1ULL << index;
1473
compute.uniform_sets[index] = set;
1474
}
1475
}
1476
}
1477
1478
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
1479
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
1480
1481
_compute_set_dirty_state();
1482
1483
MTL::Size size = MTL::Size(p_x_groups, p_y_groups, p_z_groups);
1484
1485
MTL::ComputeCommandEncoder *enc = compute.encoder.get();
1486
enc->dispatchThreadgroups(size, compute.pipeline->compute_state.local);
1487
}
1488
1489
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
1490
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
1491
1492
_compute_set_dirty_state();
1493
1494
const RenderingDeviceDriverMetal::BufferInfo *indirectBuffer = (const RenderingDeviceDriverMetal::BufferInfo *)p_indirect_buffer.id;
1495
1496
MTL::ComputeCommandEncoder *enc = compute.encoder.get();
1497
enc->dispatchThreadgroups(indirectBuffer->metal_buffer.get(), p_offset, compute.pipeline->compute_state.local);
1498
}
1499
1500
void MDCommandBuffer::reset() {
1501
push_constant_binding = UINT32_MAX;
1502
push_constant_data_len = 0;
1503
type = MDCommandBufferStateType::None;
1504
binding_cache.clear();
1505
}
1506
1507
void MDCommandBuffer::_end_compute_dispatch() {
1508
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
1509
1510
compute.end_encoding();
1511
compute.reset();
1512
reset();
1513
}
1514
1515
void MDCommandBuffer::_end_blit() {
1516
DEV_ASSERT(type == MDCommandBufferStateType::Blit);
1517
1518
blit.encoder->endEncoding();
1519
blit.reset();
1520
reset();
1521
}
1522
1523
MDComputeShader::MDComputeShader(CharString p_name,
1524
Vector<UniformSet> p_sets,
1525
bool p_uses_argument_buffers,
1526
std::shared_ptr<MDLibrary> p_kernel) :
1527
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(std::move(p_kernel)) {
1528
}
1529
1530
MDRenderShader::MDRenderShader(CharString p_name,
1531
Vector<UniformSet> p_sets,
1532
bool p_needs_view_mask_buffer,
1533
bool p_uses_argument_buffers,
1534
std::shared_ptr<MDLibrary> p_vert, std::shared_ptr<MDLibrary> p_frag) :
1535
MDShader(p_name, p_sets, p_uses_argument_buffers),
1536
needs_view_mask_buffer(p_needs_view_mask_buffer),
1537
vert(std::move(p_vert)),
1538
frag(std::move(p_frag)) {
1539
}
1540
1541
void DirectEncoder::set(MTL::Texture **p_textures, NS::Range p_range) {
1542
if (cache.update(p_range, p_textures)) {
1543
switch (mode) {
1544
case RENDER: {
1545
MTL::RenderCommandEncoder *enc = static_cast<MTL::RenderCommandEncoder *>(encoder);
1546
enc->setVertexTextures(p_textures, p_range);
1547
enc->setFragmentTextures(p_textures, p_range);
1548
} break;
1549
case COMPUTE: {
1550
MTL::ComputeCommandEncoder *enc = static_cast<MTL::ComputeCommandEncoder *>(encoder);
1551
enc->setTextures(p_textures, p_range);
1552
} break;
1553
}
1554
}
1555
}
1556
1557
void DirectEncoder::set(MTL::Buffer **p_buffers, const NS::UInteger *p_offsets, NS::Range p_range) {
1558
if (cache.update(p_range, p_buffers, p_offsets)) {
1559
switch (mode) {
1560
case RENDER: {
1561
MTL::RenderCommandEncoder *enc = static_cast<MTL::RenderCommandEncoder *>(encoder);
1562
enc->setVertexBuffers(p_buffers, p_offsets, p_range);
1563
enc->setFragmentBuffers(p_buffers, p_offsets, p_range);
1564
} break;
1565
case COMPUTE: {
1566
MTL::ComputeCommandEncoder *enc = static_cast<MTL::ComputeCommandEncoder *>(encoder);
1567
enc->setBuffers(p_buffers, p_offsets, p_range);
1568
} break;
1569
}
1570
}
1571
}
1572
1573
void DirectEncoder::set(MTL::Buffer *p_buffer, NS::UInteger p_offset, uint32_t p_index) {
1574
if (cache.update(p_buffer, p_offset, p_index)) {
1575
switch (mode) {
1576
case RENDER: {
1577
MTL::RenderCommandEncoder *enc = static_cast<MTL::RenderCommandEncoder *>(encoder);
1578
enc->setVertexBuffer(p_buffer, p_offset, p_index);
1579
enc->setFragmentBuffer(p_buffer, p_offset, p_index);
1580
} break;
1581
case COMPUTE: {
1582
MTL::ComputeCommandEncoder *enc = static_cast<MTL::ComputeCommandEncoder *>(encoder);
1583
enc->setBuffer(p_buffer, p_offset, p_index);
1584
} break;
1585
}
1586
}
1587
}
1588
1589
void DirectEncoder::set(MTL::SamplerState **p_samplers, NS::Range p_range) {
1590
if (cache.update(p_range, p_samplers)) {
1591
switch (mode) {
1592
case RENDER: {
1593
MTL::RenderCommandEncoder *enc = static_cast<MTL::RenderCommandEncoder *>(encoder);
1594
enc->setVertexSamplerStates(p_samplers, p_range);
1595
enc->setFragmentSamplerStates(p_samplers, p_range);
1596
} break;
1597
case COMPUTE: {
1598
MTL::ComputeCommandEncoder *enc = static_cast<MTL::ComputeCommandEncoder *>(encoder);
1599
enc->setSamplerStates(p_samplers, p_range);
1600
} break;
1601
}
1602
}
1603
}
1604
1605
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new")
1606
1607
void MDCommandBuffer::_bind_uniforms_argument_buffers(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets) {
1608
DEV_ASSERT(p_shader->uses_argument_buffers);
1609
DEV_ASSERT(render.encoder.get() != nullptr);
1610
1611
MTL::RenderCommandEncoder *enc = render.encoder.get();
1612
render.resource_tracker.merge_from(p_set->usage_to_resources);
1613
1614
const UniformSet &shader_set = p_shader->sets[p_set_index];
1615
1616
// Check if this set has dynamic uniforms.
1617
if (!shader_set.dynamic_uniforms.is_empty()) {
1618
// Allocate from the ring buffer.
1619
uint32_t buffer_size = p_set->arg_buffer_data.size();
1620
MDRingBuffer::Allocation alloc = allocate_arg_buffer(buffer_size);
1621
1622
// Copy the base argument buffer data.
1623
memcpy(alloc.ptr, p_set->arg_buffer_data.ptr(), buffer_size);
1624
1625
// Update dynamic buffer GPU addresses.
1626
uint64_t *ptr = (uint64_t *)alloc.ptr;
1627
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
1628
uint32_t dynamic_index = 0;
1629
1630
for (uint32_t i : shader_set.dynamic_uniforms) {
1631
RDD::BoundUniform const &uniform = p_set->uniforms[i];
1632
const UniformInfo &ui = shader_set.uniforms[i];
1633
const UniformInfo::Indexes &idx = ui.arg_buffer;
1634
1635
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
1636
dynamic_index++;
1637
uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf;
1638
1639
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
1640
uint64_t gpu_address = buf_info->metal_buffer.get()->gpuAddress() + frame_idx * buf_info->size_bytes;
1641
*(uint64_t *)(ptr + idx.buffer) = gpu_address;
1642
}
1643
1644
enc->setVertexBuffer(alloc.buffer, alloc.offset, p_set_index);
1645
enc->setFragmentBuffer(alloc.buffer, alloc.offset, p_set_index);
1646
} else {
1647
enc->setVertexBuffer(p_set->arg_buffer.get(), 0, p_set_index);
1648
enc->setFragmentBuffer(p_set->arg_buffer.get(), 0, p_set_index);
1649
}
1650
}
1651
1652
void MDCommandBuffer::_bind_uniforms_direct(MDUniformSet *p_set, MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets) {
1653
DEV_ASSERT(!p_shader->uses_argument_buffers);
1654
1655
UniformSet const &set = p_shader->sets[p_set_index];
1656
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
1657
uint32_t dynamic_index = 0;
1658
1659
for (uint32_t i = 0; i < MIN(p_set->uniforms.size(), set.uniforms.size()); i++) {
1660
RDD::BoundUniform const &uniform = p_set->uniforms[i];
1661
const UniformInfo &ui = set.uniforms[i];
1662
const UniformInfo::Indexes &indexes = ui.slot;
1663
1664
uint32_t frame_idx;
1665
if (uniform.is_dynamic()) {
1666
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
1667
dynamic_index++;
1668
frame_idx = (p_dynamic_offsets >> shift) & 0xf;
1669
} else {
1670
frame_idx = 0;
1671
}
1672
1673
switch (uniform.type) {
1674
case RDD::UNIFORM_TYPE_SAMPLER: {
1675
size_t count = uniform.ids.size();
1676
MTL::SamplerState **objects = ALLOCA_ARRAY(MTL::SamplerState *, count);
1677
for (size_t j = 0; j < count; j += 1) {
1678
objects[j] = rid::get<MTL::SamplerState>(uniform.ids[j]);
1679
}
1680
NS::Range sampler_range = { indexes.sampler, count };
1681
p_enc.set(objects, sampler_range);
1682
} break;
1683
case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
1684
size_t count = uniform.ids.size() / 2;
1685
MTL::Texture **textures = ALLOCA_ARRAY(MTL::Texture *, count);
1686
MTL::SamplerState **samplers = ALLOCA_ARRAY(MTL::SamplerState *, count);
1687
for (uint32_t j = 0; j < count; j += 1) {
1688
samplers[j] = rid::get<MTL::SamplerState>(uniform.ids[j * 2 + 0]);
1689
textures[j] = rid::get<MTL::Texture>(uniform.ids[j * 2 + 1]);
1690
}
1691
NS::Range sampler_range = { indexes.sampler, count };
1692
NS::Range texture_range = { indexes.texture, count };
1693
p_enc.set(samplers, sampler_range);
1694
p_enc.set(textures, texture_range);
1695
} break;
1696
case RDD::UNIFORM_TYPE_TEXTURE: {
1697
size_t count = uniform.ids.size();
1698
MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count);
1699
for (size_t j = 0; j < count; j += 1) {
1700
objects[j] = rid::get<MTL::Texture>(uniform.ids[j]);
1701
}
1702
NS::Range texture_range = { indexes.texture, count };
1703
p_enc.set(objects, texture_range);
1704
} break;
1705
case RDD::UNIFORM_TYPE_IMAGE: {
1706
size_t count = uniform.ids.size();
1707
MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count);
1708
for (size_t j = 0; j < count; j += 1) {
1709
objects[j] = rid::get<MTL::Texture>(uniform.ids[j]);
1710
}
1711
NS::Range texture_range = { indexes.texture, count };
1712
p_enc.set(objects, texture_range);
1713
1714
if (indexes.buffer != UINT32_MAX) {
1715
// Emulated atomic image access.
1716
MTL::Buffer **bufs = ALLOCA_ARRAY(MTL::Buffer *, count);
1717
for (size_t j = 0; j < count; j += 1) {
1718
MTL::Texture *obj = objects[j];
1719
MTL::Texture *tex = obj->parentTexture() ? obj->parentTexture() : obj;
1720
bufs[j] = tex->buffer();
1721
}
1722
NS::UInteger *offs = ALLOCA_ARRAY(NS::UInteger, count);
1723
bzero(offs, sizeof(NS::UInteger) * count);
1724
NS::Range buffer_range = { indexes.buffer, count };
1725
p_enc.set(bufs, offs, buffer_range);
1726
}
1727
} break;
1728
case RDD::UNIFORM_TYPE_TEXTURE_BUFFER: {
1729
ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
1730
} break;
1731
case RDD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
1732
ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
1733
} break;
1734
case RDD::UNIFORM_TYPE_IMAGE_BUFFER: {
1735
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
1736
} break;
1737
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER:
1738
case RDD::UNIFORM_TYPE_STORAGE_BUFFER: {
1739
const RDM::BufferInfo *buf_info = (const RDM::BufferInfo *)uniform.ids[0].id;
1740
p_enc.set(buf_info->metal_buffer.get(), 0, indexes.buffer);
1741
} break;
1742
case RDD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
1743
case RDD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
1744
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
1745
p_enc.set(buf_info->metal_buffer.get(), frame_idx * buf_info->size_bytes, indexes.buffer);
1746
} break;
1747
case RDD::UNIFORM_TYPE_INPUT_ATTACHMENT: {
1748
size_t count = uniform.ids.size();
1749
MTL::Texture **objects = ALLOCA_ARRAY(MTL::Texture *, count);
1750
for (size_t j = 0; j < count; j += 1) {
1751
objects[j] = rid::get<MTL::Texture>(uniform.ids[j]);
1752
}
1753
NS::Range texture_range = { indexes.texture, count };
1754
p_enc.set(objects, texture_range);
1755
} break;
1756
default: {
1757
DEV_ASSERT(false);
1758
}
1759
}
1760
}
1761
}
1762
1763
void MDCommandBuffer::_bind_uniforms_argument_buffers_compute(MDUniformSet *p_set, MDShader *p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets) {
1764
DEV_ASSERT(p_shader->uses_argument_buffers);
1765
DEV_ASSERT(compute.encoder.get() != nullptr);
1766
1767
MTL::ComputeCommandEncoder *enc = compute.encoder.get();
1768
compute.resource_tracker.merge_from(p_set->usage_to_resources);
1769
1770
const UniformSet &shader_set = p_shader->sets[p_set_index];
1771
1772
// Check if this set has dynamic uniforms.
1773
if (!shader_set.dynamic_uniforms.is_empty()) {
1774
// Allocate from the ring buffer.
1775
uint32_t buffer_size = p_set->arg_buffer_data.size();
1776
MDRingBuffer::Allocation alloc = allocate_arg_buffer(buffer_size);
1777
1778
// Copy the base argument buffer data.
1779
memcpy(alloc.ptr, p_set->arg_buffer_data.ptr(), buffer_size);
1780
1781
// Update dynamic buffer GPU addresses.
1782
uint64_t *ptr = (uint64_t *)alloc.ptr;
1783
DynamicOffsetLayout layout = p_shader->dynamic_offset_layout;
1784
uint32_t dynamic_index = 0;
1785
1786
for (uint32_t i : shader_set.dynamic_uniforms) {
1787
RDD::BoundUniform const &uniform = p_set->uniforms[i];
1788
const UniformInfo &ui = shader_set.uniforms[i];
1789
const UniformInfo::Indexes &idx = ui.arg_buffer;
1790
1791
uint32_t shift = layout.get_offset_index_shift(p_set_index, dynamic_index);
1792
dynamic_index++;
1793
uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xf;
1794
1795
const MetalBufferDynamicInfo *buf_info = (const MetalBufferDynamicInfo *)uniform.ids[0].id;
1796
uint64_t gpu_address = buf_info->metal_buffer.get()->gpuAddress() + frame_idx * buf_info->size_bytes;
1797
*(uint64_t *)(ptr + idx.buffer) = gpu_address;
1798
}
1799
1800
enc->setBuffer(alloc.buffer, alloc.offset, p_set_index);
1801
} else {
1802
enc->setBuffer(p_set->arg_buffer.get(), 0, p_set_index);
1803
}
1804
}
1805
1806
GODOT_CLANG_WARNING_POP
1807
1808