Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bevyengine
GitHub Repository: bevyengine/bevy
Path: blob/main/crates/bevy_pbr/src/cluster/gpu.rs
30636 views
1
//! Clustering of lights and other clusterable objects on GPU.
2
//!
3
//! GPU light clustering uses the hardware rasterizer for compute purposes as a
4
//! way to automatically distribute workloads within 2D axis-aligned bounding
5
//! boxes without actually rendering any pixels. The algorithm is as follows,
6
//! with each step corresponding to a raster or compute command
7
//!
8
//! 1. *Z slicing*: We have a 3D cluster froxel grid of size W×H×D and seek to
9
//! rasterize D axis-aligned quads, each of size W×H, representing the range of
10
//! each clusterable object. In this compute phase, we generate D indirect
11
//! instances for each clusterable object for the subsequent indirect draws.
12
//!
13
//! 2. *Count rasterization*: We use instanced indirect drawing to rasterize
14
//! each quad generated in step 1 to a viewport of size W×H, with color
15
//! writes disabled. Each rasterized fragment represents a cluster-object
16
//! pair. In the fragment shader, we check to see if the object
17
//! intersects the cluster, and, if it does, we atomically bump a counter
18
//! corresponding to the number of objects of the given type intersecting
19
//! the cluster in question. We don't record the ID of the object in this
20
//! phase; we simply count the number of objects.
21
//!
22
//! 3. *Local allocation*: Now that we know the number of objects of each
23
//! type in each cluster, we can proceed to allocate space in the
24
//! clustered object buffer for each clustered object list. To do this,
25
//! we need to perform a [*prefix sum*] operation so that each list is
26
//! tightly packed with the others. For example, if adjacent clusters
27
//! have 2, 5, and 3 objects, they'll be allocated at offsets 0, 2, and 7
28
//! respectively. This *local* step uses a [Hillis-Steele scan] in shared
29
//! memory to compute the prefix sum of each chunk of 256 clusters. We
30
//! can't go beyond 256 clusters in this local step because 256 is the
31
//! maximum workgroup size in `wgpu`.
32
//!
33
//! 4. *Global allocation*: To deal with the fact that we can't calculate
34
//! prefix sums beyond 256 clusters in step 3, we employ this second step
35
//! that does a sequential loop over every 256-cluster chunk, propagating
36
//! the prefix sum. At the end of this step, every list of clustered
37
//! objects is allocated.
38
//!
39
//! 5. *Populate rasterization*: Finally, we issue an instanced indirect
40
//! draw command using the same parameters as step (2). We test each
41
//! cluster-object pair for intersection, and, if the test passes, we
42
//! record the ID of each clustered object into the correct space in the
43
//! list, using an scratch pad buffer of atomics to store the position of
44
//! the next object in each list.
45
//!
46
//! [*prefix sum*]: https://en.wikipedia.org/wiki/Prefix_sum
47
//!
48
//! [Hillis-Steele scan]: https://en.wikipedia.org/wiki/Prefix_sum#Algorithm_1:_Shorter_span,_more_parallel
49
50
use alloc::sync::Arc;
51
use std::sync::Mutex;
52
53
use bevy_app::{App, Plugin};
54
use bevy_asset::{embedded_asset, load_embedded_asset, AssetServer, Handle};
55
use bevy_camera::Camera;
56
use bevy_color::Color;
57
use bevy_core_pipeline::{prepass::node::early_prepass, Core3d, Core3dSystems};
58
use bevy_derive::{Deref, DerefMut};
59
use bevy_ecs::{
60
component::Component,
61
entity::Entity,
62
query::With,
63
resource::Resource,
64
schedule::IntoScheduleConfigs as _,
65
system::{Commands, Query, Res, ResMut},
66
world::{FromWorld, World},
67
};
68
use bevy_light::{
69
cluster::{Clusters, GlobalClusterGpuSettings, GlobalClusterSettings},
70
EnvironmentMapLight, IrradianceVolume,
71
};
72
use bevy_material::descriptor::{
73
BindGroupLayoutDescriptor, CachedComputePipelineId, CachedRenderPipelineId,
74
ComputePipelineDescriptor, FragmentState, RenderPipelineDescriptor, VertexState,
75
};
76
use bevy_math::{vec2, Vec2};
77
use bevy_mesh::{VertexBufferLayout, VertexFormat};
78
use bevy_render::{
79
diagnostic::RecordDiagnostics as _,
80
extract_resource::{ExtractResource, ExtractResourcePlugin},
81
render_resource::{
82
binding_types,
83
encase::internal::{CreateFrom as _, Reader},
84
BindGroup, BindGroupEntry, BindGroupLayoutEntries, Buffer, BufferBindingType,
85
BufferDescriptor, BufferInitDescriptor, BufferUsages, ColorTargetState, ColorWrites,
86
CommandEncoder, ComputePassDescriptor, ComputePipeline, Extent3d, IndexFormat, LoadOp,
87
MapMode, Operations, PipelineCache, RenderPassColorAttachment, RenderPassDescriptor,
88
RenderPipeline, ShaderStages, ShaderType, SpecializedComputePipeline,
89
SpecializedComputePipelines, SpecializedRenderPipeline, SpecializedRenderPipelines,
90
StorageBuffer, StoreOp, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages,
91
UninitBufferVec, VertexAttribute, VertexStepMode,
92
},
93
renderer::{RenderContext, RenderDevice, RenderQueue, ViewQuery},
94
sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet, RenderEntity},
95
texture::{CachedTexture, TextureCache},
96
view::{ExtractedView, ViewUniform, ViewUniformOffset, ViewUniforms},
97
GpuResourceAppExt, MainWorld, Render, RenderApp, RenderSystems,
98
};
99
use bevy_shader::{load_shader_library, Shader, ShaderDefVal};
100
use bevy_utils::default;
101
use bytemuck::{Pod, Zeroable};
102
use tracing::{error, trace, warn};
103
104
use crate::{
105
cluster::{
106
GpuClusterOffsetAndCounts, GpuClusterOffsetsAndCountsStorage,
107
GpuClusterableObjectIndexListsStorage, ViewClusterBuffers,
108
},
109
decal::clustered::{DecalsBuffer, RenderClusteredDecal, RenderClusteredDecals},
110
gpu_clustering_is_enabled, ExtractedClusterConfig, GlobalClusterableObjectMeta,
111
GpuClusteredLight, GpuLights, LightMeta, LightProbesBuffer, LightProbesUniform,
112
RenderViewLightProbes, ViewClusterBindings, ViewLightProbesUniformOffset,
113
ViewLightsUniformOffset,
114
};
115
116
/// The workgroup size of the `cluster_allocate.wgsl` shader.
117
const ALLOCATION_WORKGROUP_SIZE: u32 = 256;
118
/// The workgroup size of the `cluster_z_slice.wgsl` shader.
119
const Z_SLICING_WORKGROUP_SIZE: u32 = 64;
120
121
/// A plugin that enables GPU clustering of lights and other objects.
122
pub struct GpuClusteringPlugin;
123
124
impl Plugin for GpuClusteringPlugin {
125
fn build(&self, app: &mut App) {
126
load_shader_library!(app, "cluster.wgsl");
127
embedded_asset!(app, "cluster_z_slice.wgsl");
128
embedded_asset!(app, "cluster_raster.wgsl");
129
embedded_asset!(app, "cluster_allocate.wgsl");
130
131
app.add_plugins(ExtractResourcePlugin::<
132
GlobalClusterSettings,
133
GpuClusteringPlugin,
134
>::default());
135
}
136
137
fn finish(&self, app: &mut App) {
138
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
139
return;
140
};
141
142
// Bail out if we have no storage buffers. This is the case when we have
143
// `WGPU_SETTINGS_PRIO="webgl2"`.
144
let render_device = render_app.world().resource::<RenderDevice>();
145
if render_device.limits().max_storage_buffers_per_shader_stage == 0 {
146
return;
147
}
148
149
render_app
150
.init_gpu_resource::<SpecializedRenderPipelines<ClusteringRasterPipeline>>()
151
.init_gpu_resource::<SpecializedComputePipelines<ClusteringZSlicingPipeline>>()
152
.init_gpu_resource::<SpecializedComputePipelines<ClusteringAllocationPipeline>>()
153
.init_gpu_resource::<RenderViewClusteringReadbackData>()
154
.init_gpu_resource::<GpuClusteringMeshBuffers>()
155
.init_gpu_resource::<ClusteringRasterPipeline>()
156
.init_gpu_resource::<ClusteringZSlicingPipeline>()
157
.init_gpu_resource::<ClusteringAllocationPipeline>()
158
.add_systems(
159
Render,
160
(prepare_clustering_pipelines, prepare_cluster_dummy_textures)
161
.in_set(RenderSystems::Prepare)
162
.run_if(gpu_clustering_is_enabled),
163
)
164
.add_systems(
165
Render,
166
(
167
prepare_clusters_for_gpu_clustering,
168
upload_view_gpu_clustering_buffers,
169
)
170
.chain()
171
.in_set(RenderSystems::PrepareResources)
172
.run_if(gpu_clustering_is_enabled),
173
)
174
.add_systems(
175
Render,
176
prepare_clustering_bind_groups
177
.in_set(RenderSystems::PrepareBindGroups)
178
.run_if(gpu_clustering_is_enabled),
179
)
180
.add_systems(
181
Core3d,
182
cluster_on_gpu
183
.before(early_prepass)
184
.in_set(Core3dSystems::Prepass)
185
.run_if(gpu_clustering_is_enabled),
186
);
187
}
188
}
189
190
/// The texture that we bind when performing the raster passes.
191
///
192
/// We don't actually write to this texture; it exists only so that we can set a
193
/// viewport.
194
#[derive(Component, Deref, DerefMut)]
195
pub struct ViewClusteringDummyTexture(CachedTexture);
196
197
/// The bind groups for each pass of GPU clustering.
198
#[derive(Component)]
199
pub struct ViewClusteringBindGroups {
200
/// The bind group for the Z-slicing compute pass.
201
clustering_bind_group_z_slicing_pass: BindGroup,
202
/// The bind group for the count rasterization pass.
203
clustering_bind_group_count_pass: BindGroup,
204
/// The bind group for both local and global allocation passes.
205
clustering_bind_group_allocate_pass: BindGroup,
206
/// The bind group for the populate rasterization pass.
207
clustering_bind_group_populate_pass: BindGroup,
208
}
209
210
/// The GPU representation of a single Z-slice of a clusterable object.
211
///
212
/// A Z-slice is an axis-aligned bounding box representing the potential
213
/// bounding box of a clusterable object in a single Z slice of the froxel grid.
214
#[derive(Clone, Copy, Default, PartialEq, Eq, Hash, ShaderType, Pod, Zeroable)]
215
#[repr(C)]
216
pub struct ClusterableObjectZSlice {
217
/// The index of the object to be clustered.
218
pub object_index: u32,
219
/// The type of the object to be clustered.
220
///
221
/// This is one of the `CLUSTERABLE_OBJECT_TYPE_` constants in
222
/// `cluster.wgsl`.
223
pub object_type: u32,
224
/// The Z coordinate of the froxels that this slice covers.
225
pub z_slice: u32,
226
}
227
228
/// Metadata stored on GPU that's global to all clusters for a view.
229
#[derive(Clone, Copy, Default, ShaderType, Pod, Zeroable)]
230
#[repr(C)]
231
pub struct ClusterMetadata {
232
/// The indirect draw parameters for the raster passes.
233
indirect_draw_params: ClusterRasterIndirectDrawParams,
234
235
/// The total number of clustered lights, set by the CPU.
236
clustered_light_count: u32,
237
/// The total number of reflection probes, set by the CPU.
238
reflection_probe_count: u32,
239
/// The total number of irradiance volumes, set by the CPU.
240
irradiance_volume_count: u32,
241
/// The total number of clustered decals, set by the CPU.
242
decal_count: u32,
243
244
/// The current maximum size of the Z-slice list.
245
z_slice_list_capacity: u32,
246
247
/// The current size of the clustered object index list.
248
///
249
/// This is set to 0 by the CPU, and the GPU updates it with the computed
250
/// value.
251
index_list_capacity: u32,
252
253
/// The farthest depth that any clustered object AABB has extended to this
254
/// frame.
255
///
256
/// This is set to 0 by the CPU, and the GPU updates it with the computed
257
/// value.
258
///
259
/// This is a float encoded by `f32_bits_to_sortable_u32`. Decode with `sortable_u32_to_f32_bits`.
260
farthest_z: u32,
261
}
262
263
/// Indirect draw parameters for the raster dispatch phase, built partially by
264
/// the CPU and partially by the GPU.
265
///
266
/// These must conform to the format that `wgpu` demands, so this structure
267
/// layout must not be modified.
268
#[derive(Clone, Copy, Default, ShaderType, Pod, Zeroable)]
269
#[repr(C)]
270
pub struct ClusterRasterIndirectDrawParams {
271
index_count: u32,
272
273
/// Represents the total number of Z slices.
274
///
275
/// This field is the one that the GPU modifies.
276
instance_count: u32,
277
278
first_index: u32,
279
base_vertex: u32,
280
first_instance: u32,
281
}
282
283
/// A component, stored on [`ExtractedView`], that stores buffers needed to
284
/// perform GPU clustering for that view.
285
#[derive(Component)]
286
pub struct ViewGpuClusteringBuffers {
287
/// The buffer that holds the Z slices for each clusterable object.
288
///
289
/// The `cluster_z_slice.wgsl` shader fills this buffer out, and the raster
290
/// passes read it.
291
pub z_slices_buffer: UninitBufferVec<ClusterableObjectZSlice>,
292
/// The buffer that holds the scratchpad offsets and counts for each
293
/// clusterable object.
294
///
295
/// The populate pass uses this to coordinate where to write indices for
296
/// each clusterable object. The allocation pass zeroes it out.
297
scratchpad_offsets_and_counts_buffer: UninitBufferVec<GpuClusterOffsetAndCounts>,
298
/// The buffer that stores the [`ClusterMetadata`].
299
///
300
/// Since this buffer is small, [`StorageBuffer`] is fine to use.
301
cluster_metadata_buffer: StorageBuffer<ClusterMetadata>,
302
}
303
304
impl ViewGpuClusteringBuffers {
305
/// Creates a new, empty set of [`ViewGpuClusteringBuffers`] for a single
306
/// view.
307
pub(crate) fn new() -> ViewGpuClusteringBuffers {
308
let mut cluster_metadata_buffer = StorageBuffer::from(ClusterMetadata::default());
309
cluster_metadata_buffer.add_usages(BufferUsages::COPY_SRC | BufferUsages::INDIRECT);
310
cluster_metadata_buffer.set_label(Some("clustering Z slicing metadata buffer"));
311
312
ViewGpuClusteringBuffers {
313
cluster_metadata_buffer,
314
z_slices_buffer: UninitBufferVec::new(BufferUsages::STORAGE | BufferUsages::COPY_DST),
315
scratchpad_offsets_and_counts_buffer: UninitBufferVec::new(
316
BufferUsages::STORAGE | BufferUsages::COPY_DST,
317
),
318
}
319
}
320
}
321
322
/// Stores data associated with reading back clustering statistics from GPU to
323
/// CPU for all views.
324
#[derive(Resource, Default)]
325
pub(crate) struct RenderViewClusteringReadbackData {
326
/// The data for each view.
327
///
328
/// This is locked behind a mutex so that the buffer readback callbacks,
329
/// which execute concurrently, can access it alongside the render world.
330
views: MainEntityHashMap<Arc<Mutex<ViewClusteringReadbackData>>>,
331
}
332
333
/// Data associated with reading back clustering statistics for a single view.
334
struct ViewClusteringReadbackData {
335
/// The current capacity of the Z slice list.
336
///
337
/// This starts out at the default size as specified by the allocation and
338
/// can grow based on the results of GPU readback.
339
z_slice_list_capacity: usize,
340
/// The current capacity of the clustered object index list.
341
///
342
/// This starts out at the default size as specified by the allocation and
343
/// can grow based on the results of GPU readback.
344
max_index_list_capacity: usize,
345
/// Buffers corresponding to GPU readback operations in progress.
346
metadata_staging_pending_buffers: Vec<Buffer>,
347
/// Buffers corresponding to GPU readback operations that are finished.
348
///
349
/// These buffers are ready for reuse.
350
metadata_staging_free_buffers: Vec<Buffer>,
351
/// Statistics about GPU clustering that the GPU calculated last frame.
352
last_frame_statistics: Option<ViewClusteringLastFrameStatistics>,
353
}
354
355
/// Statistics about GPU clustering that the GPU calculated last frame.
356
struct ViewClusteringLastFrameStatistics {
357
/// The actual used size of the index list.
358
///
359
/// If this is greater than the capacity of the index list, the CPU will
360
/// resize the index list buffer.
361
index_list_size: u32,
362
/// The maximum depth of all axis-aligned bounding boxes corresponding to
363
/// clusterable objects in view.
364
farthest_z: f32,
365
}
366
367
impl ViewClusteringReadbackData {
368
/// Creates a new [`ViewClusteringReadbackData`] for a view.
369
///
370
/// The [`Self::z_slice_list_capacity`] and
371
/// [`Self::max_index_list_capacity`] are calculated based on the initial
372
/// capacities that the application set in the [`GlobalClusterGpuSettings`].
373
fn new(settings: &GlobalClusterGpuSettings) -> ViewClusteringReadbackData {
374
ViewClusteringReadbackData {
375
z_slice_list_capacity: settings.initial_z_slice_list_capacity,
376
max_index_list_capacity: settings.initial_index_list_capacity,
377
metadata_staging_pending_buffers: vec![],
378
metadata_staging_free_buffers: vec![],
379
last_frame_statistics: None,
380
}
381
}
382
383
fn get_or_create_staging_buffer(&mut self, render_device: &RenderDevice) -> Buffer {
384
let staging_buffer = self.metadata_staging_free_buffers.pop().unwrap_or_else(|| {
385
render_device.create_buffer(&BufferDescriptor {
386
label: Some("clustering metadata staging buffer"),
387
size: ClusterMetadata::min_size().into(),
388
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
389
mapped_at_creation: false,
390
})
391
});
392
self.metadata_staging_pending_buffers
393
.push(staging_buffer.clone());
394
staging_buffer
395
}
396
397
/// Updates this [`ViewClusteringReadbackData`] with new information from
398
/// the given metadata read back from the GPU.
399
fn update_from_metadata(&mut self, gpu_clustering_metadata: &ClusterMetadata) {
400
// Schedule a resize of the Z slice list if the GPU overflowed.
401
if self.z_slice_list_capacity
402
< gpu_clustering_metadata.indirect_draw_params.instance_count as usize
403
{
404
let new_capacity = gpu_clustering_metadata
405
.indirect_draw_params
406
.instance_count
407
.next_power_of_two();
408
warn!(
409
"Resizing the view clustering Z slice list from a capacity of {0} elements to \
410
a capacity of {1} elements. The scene lighting may have been corrupted for a \
411
few frames. To avoid this, set the `gpu_clustering.z_slice_list_capacity` field \
412
on the `GlobalClusterSettings` resource to at least {1}.",
413
self.z_slice_list_capacity, new_capacity
414
);
415
self.z_slice_list_capacity = new_capacity as usize;
416
}
417
418
// Schedule a resize of the index slice list if the GPU overflowed.
419
if self.max_index_list_capacity < gpu_clustering_metadata.index_list_capacity as usize {
420
let new_capacity = gpu_clustering_metadata
421
.index_list_capacity
422
.next_power_of_two();
423
warn!(
424
"Resizing the view clustering index list from a capacity of {0} elements to a \
425
capacity of {1} elements. The scene lighting may have been corrupted for a \
426
few frames. To avoid this, set the `gpu_clustering.index_list_capacity` field on \
427
the `GlobalClusterSettings` resource to at least {1}.",
428
self.max_index_list_capacity, new_capacity
429
);
430
self.max_index_list_capacity = new_capacity as usize;
431
}
432
433
// Record the statistics we just received.
434
self.last_frame_statistics = Some(ViewClusteringLastFrameStatistics {
435
index_list_size: gpu_clustering_metadata.index_list_capacity,
436
farthest_z: f32::from_bits(sortable_u32_to_f32_bits(
437
gpu_clustering_metadata.farthest_z,
438
)),
439
});
440
}
441
}
442
443
/// Decodes a u32 produced by `f32_bits_to_sortable_u32` (in
444
/// `cluster_z_slice.wgsl`) back into f32 bits.
445
///
446
/// The encode flips the sign bit for positive floats and all bits for
447
/// negative floats, so the decode must inspect the *encoded* sign bit
448
/// (which is inverted relative to the original) and apply the
449
/// complementary mask.
450
fn sortable_u32_to_f32_bits(bits: u32) -> u32 {
451
let mask = (!((bits as i32) >> 31)) as u32 | 0x80000000;
452
bits ^ mask
453
}
454
455
/// Global data relating to the `cluster_raster.wgsl` shader.
456
#[derive(Resource)]
457
pub struct ClusteringRasterPipeline {
458
/// The bind group layout for group 0 for the count (first) pass.
459
pub bind_group_layout_count_pass: BindGroupLayoutDescriptor,
460
/// The bind group layout for group 0 for the populate (second) pass.
461
pub bind_group_layout_populate_pass: BindGroupLayoutDescriptor,
462
/// A handle to the shader itself.
463
pub shader: Handle<Shader>,
464
}
465
466
/// Global data relating to the `cluster_z_slice.wgsl` shader.
467
#[derive(Resource)]
468
pub struct ClusteringZSlicingPipeline {
469
/// The bind group layout for group 0.
470
pub bind_group_layout: BindGroupLayoutDescriptor,
471
/// A handle to the shader itself.
472
pub shader: Handle<Shader>,
473
}
474
475
/// Global data relating to the `cluster_allocate.wgsl` shader.
476
#[derive(Resource)]
477
pub struct ClusteringAllocationPipeline {
478
/// The bind group layout of group 0 for both shader invocations.
479
pub bind_group_layout: BindGroupLayoutDescriptor,
480
/// A handle to the `cluster_allocate.wgsl` shader itself.
481
pub shader: Handle<Shader>,
482
}
483
484
/// The pipeline key that identifies specializations of the
485
/// `cluster_raster.wgsl` shader.
486
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
487
pub struct ClusteringRasterPipelineKey {
488
/// True if this is the populate (second) pass; false if it's the count
489
/// (first) one.
490
populate_pass: bool,
491
}
492
493
/// The pipeline key that identifies specializations of the
494
/// `cluster_allocate.wgsl` shader.
495
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
496
pub struct ClusteringAllocationPipelineKey {
497
/// True if this is the global (second) pass; false if it's the local
498
/// (first) one.
499
global_pass: bool,
500
}
501
502
impl FromWorld for ClusteringRasterPipeline {
503
fn from_world(world: &mut World) -> Self {
504
let asset_server = world.resource::<AssetServer>();
505
506
let mut bind_group_layout_entries_count_pass = vec![
507
// @group(0) @binding(0) var<storage> z_slices:
508
// array<ClusterableObjectZSlice>;
509
binding_types::storage_buffer_read_only::<ClusterableObjectZSlice>(false)
510
.build(0, ShaderStages::VERTEX_FRAGMENT),
511
// @group(0) @binding(1) var<storage, read_write> index_lists:
512
// ClusterableObjectIndexLists;
513
binding_types::storage_buffer::<GpuClusterableObjectIndexListsStorage>(false)
514
.build(1, ShaderStages::FRAGMENT),
515
// @group(0) @binding(2) var<storage> clustered_lights:
516
// ClusteredLights;
517
binding_types::storage_buffer_read_only::<GpuClusteredLight>(false)
518
.build(2, ShaderStages::VERTEX_FRAGMENT),
519
// @group(0) @binding(3) var<uniform> light_probes: LightProbes;
520
binding_types::uniform_buffer::<LightProbesUniform>(true)
521
.build(3, ShaderStages::VERTEX_FRAGMENT),
522
// @group(0) @binding(4) var<storage> clustered_decals:
523
// ClusteredDecals;
524
binding_types::storage_buffer_read_only::<RenderClusteredDecal>(false)
525
.build(4, ShaderStages::VERTEX_FRAGMENT),
526
// @group(0) @binding(5) var<uniform> lights: Lights;
527
binding_types::uniform_buffer::<GpuLights>(true)
528
.build(5, ShaderStages::VERTEX_FRAGMENT),
529
// @group(0) @binding(6) var<uniform> view: View;
530
binding_types::uniform_buffer::<ViewUniform>(true)
531
.build(6, ShaderStages::VERTEX_FRAGMENT),
532
];
533
534
let mut bind_group_layout_entries_populate_pass =
535
bind_group_layout_entries_count_pass.clone();
536
537
// @group(0) @binding(7) var<storage, read_write> offsets_and_counts:
538
// ClusterOffsetsAndCountsAtomic;
539
bind_group_layout_entries_count_pass.push(
540
binding_types::storage_buffer::<GpuClusterOffsetsAndCountsStorage>(false)
541
.build(7, ShaderStages::FRAGMENT),
542
);
543
544
// @group(0) @binding(7) var<storage> offsets_and_counts:
545
// ClusterOffsetsAndCounts;
546
bind_group_layout_entries_populate_pass.push(
547
binding_types::storage_buffer_read_only::<GpuClusterOffsetsAndCountsStorage>(false)
548
.build(7, ShaderStages::FRAGMENT),
549
);
550
// @group(0) @binding(8) var<storage, read_write>
551
// scratchpad_offsets_and_counts: ClusterOffsetsAndCountsAtomic;
552
bind_group_layout_entries_populate_pass.push(
553
binding_types::storage_buffer::<GpuClusterOffsetsAndCountsStorage>(false)
554
.build(8, ShaderStages::FRAGMENT),
555
);
556
557
let bind_group_layout_count_pass = BindGroupLayoutDescriptor::new(
558
"clustering count pass bind group layout",
559
&bind_group_layout_entries_count_pass,
560
);
561
let bind_group_layout_populate_pass = BindGroupLayoutDescriptor::new(
562
"clustering populate pass bind group layout",
563
&bind_group_layout_entries_populate_pass,
564
);
565
566
let shader = load_embedded_asset!(asset_server, "cluster_raster.wgsl");
567
568
ClusteringRasterPipeline {
569
bind_group_layout_count_pass,
570
bind_group_layout_populate_pass,
571
shader,
572
}
573
}
574
}
575
576
impl SpecializedRenderPipeline for ClusteringRasterPipeline {
577
type Key = ClusteringRasterPipelineKey;
578
579
fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor {
580
let mut fragment_shader_defs = vec![];
581
if key.populate_pass {
582
fragment_shader_defs.push(ShaderDefVal::from("POPULATE_PASS"));
583
} else {
584
fragment_shader_defs.push(ShaderDefVal::from("COUNT_PASS"));
585
}
586
587
let mut vertex_shader_defs = fragment_shader_defs.clone();
588
vertex_shader_defs.push(ShaderDefVal::from("VERTEX_SHADER"));
589
590
RenderPipelineDescriptor {
591
label: if key.populate_pass {
592
Some("clustering populate pipeline".into())
593
} else {
594
Some("clustering count pipeline".into())
595
},
596
layout: vec![if key.populate_pass {
597
self.bind_group_layout_populate_pass.clone()
598
} else {
599
self.bind_group_layout_count_pass.clone()
600
}],
601
immediate_size: 0,
602
vertex: VertexState {
603
shader: self.shader.clone(),
604
shader_defs: vertex_shader_defs,
605
entry_point: Some("vertex_main".into()),
606
buffers: vec![VertexBufferLayout {
607
array_stride: size_of::<Vec2>() as u64,
608
step_mode: VertexStepMode::Vertex,
609
attributes: vec![VertexAttribute {
610
format: VertexFormat::Float32x2,
611
offset: 0,
612
shader_location: 0,
613
}],
614
}],
615
},
616
fragment: Some(FragmentState {
617
shader: self.shader.clone(),
618
shader_defs: fragment_shader_defs,
619
entry_point: Some("fragment_main".into()),
620
targets: vec![Some(ColorTargetState {
621
format: TextureFormat::R8Unorm,
622
blend: None,
623
// Disable writing.
624
write_mask: ColorWrites::empty(),
625
})],
626
}),
627
..default()
628
}
629
}
630
}
631
632
impl FromWorld for ClusteringZSlicingPipeline {
633
fn from_world(world: &mut World) -> Self {
634
let asset_server = world.resource::<AssetServer>();
635
636
let bind_group_layout = BindGroupLayoutDescriptor::new(
637
"clustering Z slicing pass bind group layout",
638
&BindGroupLayoutEntries::sequential(
639
ShaderStages::COMPUTE,
640
(
641
// @group(0) @binding(0) var<storage, read_write>
642
// cluster_metadata: ClusterMetadata;
643
binding_types::storage_buffer::<ClusterMetadata>(false),
644
// @group(0) @binding(1) var<storage, read_write> z_slices:
645
// array<ClusterableObjectZSlice>;
646
binding_types::storage_buffer::<ClusterableObjectZSlice>(false),
647
// @group(0) @binding(2) var<storage> clustered_lights:
648
// ClusteredLights;
649
binding_types::storage_buffer_read_only::<GpuClusteredLight>(false),
650
// @group(0) @binding(3) var<uniform> light_probes:
651
// LightProbes;
652
binding_types::uniform_buffer::<LightProbesUniform>(true),
653
// @group(0) @binding(4) var<storage> clustered_decals:
654
// ClusteredDecals;
655
binding_types::storage_buffer_read_only::<RenderClusteredDecal>(false),
656
// @group(0) @binding(5) var<uniform> lights: Lights;
657
binding_types::uniform_buffer::<GpuLights>(true),
658
// @group(0) @binding(6) var<uniform> view: View;
659
binding_types::uniform_buffer::<ViewUniform>(true),
660
),
661
),
662
);
663
664
let shader = load_embedded_asset!(asset_server, "cluster_z_slice.wgsl");
665
666
ClusteringZSlicingPipeline {
667
bind_group_layout,
668
shader,
669
}
670
}
671
}
672
673
impl SpecializedComputePipeline for ClusteringZSlicingPipeline {
674
type Key = ();
675
676
fn specialize(&self, _: Self::Key) -> ComputePipelineDescriptor {
677
ComputePipelineDescriptor {
678
label: Some("clustering Z slicing pipeline".into()),
679
layout: vec![self.bind_group_layout.clone()],
680
shader: self.shader.clone(),
681
shader_defs: vec![],
682
entry_point: Some("z_slice_main".into()),
683
zero_initialize_workgroup_memory: true,
684
..default()
685
}
686
}
687
}
688
689
impl FromWorld for ClusteringAllocationPipeline {
690
fn from_world(world: &mut World) -> Self {
691
let asset_server = world.resource::<AssetServer>();
692
693
let bind_group_layout = BindGroupLayoutDescriptor::new(
694
"clustering allocation pass bind group layout",
695
&BindGroupLayoutEntries::sequential(
696
ShaderStages::COMPUTE,
697
(
698
// @group(0) @binding(0) var<storage, read_write>
699
// offsets_and_counts: ClusterOffsetsAndCounts;
700
binding_types::storage_buffer::<GpuClusterOffsetsAndCountsStorage>(false),
701
// @group(0) @binding(1) var<uniform> lights: Lights;
702
binding_types::uniform_buffer::<GpuLights>(true),
703
// @group(0) @binding(2) var<storage, read_write>
704
// clustering_metadata: ClusterMetadata;
705
binding_types::storage_buffer::<ClusterMetadata>(false),
706
// @group(0) @binding(3) var<storage, read_write>
707
// scratchpad_offsets_and_counts: ClusterOffsetsAndCounts;
708
binding_types::storage_buffer::<GpuClusterOffsetsAndCountsStorage>(false),
709
),
710
),
711
);
712
713
let shader = load_embedded_asset!(asset_server, "cluster_allocate.wgsl");
714
715
ClusteringAllocationPipeline {
716
bind_group_layout,
717
shader,
718
}
719
}
720
}
721
722
impl SpecializedComputePipeline for ClusteringAllocationPipeline {
723
type Key = ClusteringAllocationPipelineKey;
724
725
fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
726
ComputePipelineDescriptor {
727
label: if key.global_pass {
728
Some("clustering allocation global pass pipeline".into())
729
} else {
730
Some("clustering allocation local pass pipeline".into())
731
},
732
layout: vec![self.bind_group_layout.clone()],
733
shader: self.shader.clone(),
734
shader_defs: vec![],
735
entry_point: if key.global_pass {
736
Some("allocate_global_main".into())
737
} else {
738
Some("allocate_local_main".into())
739
},
740
zero_initialize_workgroup_memory: true,
741
..default()
742
}
743
}
744
}
745
746
/// The vertices of the quad that we rasterize to represent a clusterable object
747
/// Z slice.
748
static GPU_CLUSTERING_VERTICES: [Vec2; 4] = [
749
vec2(0.0, 0.0),
750
vec2(1.0, 0.0),
751
vec2(0.0, 1.0),
752
vec2(1.0, 1.0),
753
];
754
755
/// The indices of the quad that we rasterize to represent a clusterable object
756
/// Z slice.
757
static GPU_CLUSTERING_INDICES: [u32; 6] = [0, 1, 2, 1, 3, 2];
758
759
/// The buffers that store the vertices and indices for the quad that we
760
/// rasterize to represent each clusterable object Z slice.
761
#[derive(Resource)]
762
struct GpuClusteringMeshBuffers {
763
/// The vertex buffer containing the 4 vertices of a quad.
764
vertex_buffer: Buffer,
765
/// The index buffer containing the 6 indices of a quad.
766
index_buffer: Buffer,
767
}
768
769
impl FromWorld for GpuClusteringMeshBuffers {
770
fn from_world(world: &mut World) -> Self {
771
let render_device = world.resource::<RenderDevice>();
772
GpuClusteringMeshBuffers {
773
vertex_buffer: render_device.create_buffer_with_data(&BufferInitDescriptor {
774
label: Some("GPU clustering vertex buffer"),
775
contents: bytemuck::bytes_of(&GPU_CLUSTERING_VERTICES),
776
usage: BufferUsages::COPY_DST | BufferUsages::VERTEX,
777
}),
778
index_buffer: render_device.create_buffer_with_data(&BufferInitDescriptor {
779
label: Some("GPU clustering index buffer"),
780
contents: bytemuck::bytes_of(&GPU_CLUSTERING_INDICES),
781
usage: BufferUsages::COPY_DST | BufferUsages::INDEX,
782
}),
783
}
784
}
785
}
786
787
/// The IDs of each pipeline used for GPU clustering for a single view.
788
#[derive(Component)]
789
pub struct ViewGpuClusteringPipelineIds {
790
/// The compute pipeline for the Z slicing compute pass (pass 1).
791
clustering_z_slicing_pipeline_id: CachedComputePipelineId,
792
/// The compute pipeline for the count raster pass (pass 2).
793
clustering_count_pipeline_id: CachedRenderPipelineId,
794
/// The compute pipeline for the local allocation compute pass (pass 3).
795
clustering_allocation_local_pipeline_id: CachedComputePipelineId,
796
/// The compute pipeline for the global allocation compute pass (pass 4).
797
clustering_allocation_global_pipeline_id: CachedComputePipelineId,
798
/// The compute pipeline for the populate raster pass (pass 5).
799
clustering_populate_pipeline_id: CachedRenderPipelineId,
800
}
801
802
/// The render command building system that performs GPU clustering on each
803
/// view.
804
fn cluster_on_gpu(
805
view_query: ViewQuery<(
806
&MainEntity,
807
Option<&ViewGpuClusteringBuffers>,
808
Option<&ViewGpuClusteringPipelineIds>,
809
Option<&ViewClusteringDummyTexture>,
810
Option<&ViewClusteringBindGroups>,
811
Option<&ViewLightProbesUniformOffset>,
812
Option<&ViewLightsUniformOffset>,
813
Option<&ViewUniformOffset>,
814
Option<&ExtractedClusterConfig>,
815
)>,
816
pipeline_cache: Res<PipelineCache>,
817
clustering_mesh_buffers: Res<GpuClusteringMeshBuffers>,
818
render_view_clustering_readback_data: Res<RenderViewClusteringReadbackData>,
819
mut render_context: RenderContext,
820
) {
821
let (
822
view_main_entity,
823
Some(view_gpu_clustering_buffers),
824
Some(view_gpu_clustering_pipeline_ids),
825
Some(view_clustering_dummy_texture),
826
Some(view_clustering_bind_groups),
827
Some(view_light_probes_uniform_offset),
828
Some(view_lights_uniform_offset),
829
Some(view_uniform_offset),
830
Some(extracted_cluster_config),
831
) = view_query.into_inner()
832
else {
833
trace!("Failed to match view query; not clustering");
834
return;
835
};
836
837
let Some(view_clustering_readback_data) = render_view_clustering_readback_data
838
.views
839
.get(view_main_entity)
840
else {
841
return;
842
};
843
844
let (
845
Some(clustering_z_slicing_compute_pipeline),
846
Some(clustering_count_render_pipeline),
847
Some(clustering_allocate_local_compute_pipeline),
848
Some(clustering_allocate_global_compute_pipeline),
849
Some(clustering_populate_render_pipeline),
850
) = (
851
pipeline_cache.get_compute_pipeline(
852
view_gpu_clustering_pipeline_ids.clustering_z_slicing_pipeline_id,
853
),
854
pipeline_cache
855
.get_render_pipeline(view_gpu_clustering_pipeline_ids.clustering_count_pipeline_id),
856
pipeline_cache.get_compute_pipeline(
857
view_gpu_clustering_pipeline_ids.clustering_allocation_local_pipeline_id,
858
),
859
pipeline_cache.get_compute_pipeline(
860
view_gpu_clustering_pipeline_ids.clustering_allocation_global_pipeline_id,
861
),
862
pipeline_cache
863
.get_render_pipeline(view_gpu_clustering_pipeline_ids.clustering_populate_pipeline_id),
864
)
865
else {
866
trace!("One or more clustering pipelines not found; not clustering");
867
return;
868
};
869
870
let diagnostics = render_context.diagnostic_recorder();
871
let diagnostics = diagnostics.as_deref();
872
let time_span = diagnostics.time_span(render_context.command_encoder(), "clustering");
873
874
// Fetch a staging buffer for us to perform readback with.
875
let Ok(staging_buffer) = view_clustering_readback_data
876
.lock()
877
.map(|mut data| data.get_or_create_staging_buffer(render_context.render_device()))
878
else {
879
error!("Failed to fetch staging buffer; not clustering.");
880
return;
881
};
882
883
let command_encoder = render_context.command_encoder();
884
command_encoder.push_debug_group("clustering");
885
886
// Pass 1: Z slicing.
887
run_clustering_z_slicing_pass(
888
command_encoder,
889
clustering_z_slicing_compute_pipeline,
890
&view_clustering_bind_groups.clustering_bind_group_z_slicing_pass,
891
&view_gpu_clustering_buffers.cluster_metadata_buffer,
892
view_light_probes_uniform_offset,
893
view_lights_uniform_offset,
894
view_uniform_offset,
895
);
896
897
// Pass 2: Count raster.
898
run_clustering_rasterization_pass(
899
command_encoder,
900
clustering_count_render_pipeline,
901
&view_clustering_bind_groups.clustering_bind_group_count_pass,
902
view_gpu_clustering_buffers,
903
view_light_probes_uniform_offset,
904
view_lights_uniform_offset,
905
view_uniform_offset,
906
view_clustering_dummy_texture,
907
extracted_cluster_config,
908
&clustering_mesh_buffers,
909
false,
910
);
911
912
// Pass 3: local allocation.
913
run_clustering_allocation_pass(
914
command_encoder,
915
clustering_allocate_local_compute_pipeline,
916
view_clustering_bind_groups,
917
view_lights_uniform_offset,
918
extracted_cluster_config,
919
false,
920
);
921
922
// Pass 4: global allocation.
923
run_clustering_allocation_pass(
924
command_encoder,
925
clustering_allocate_global_compute_pipeline,
926
view_clustering_bind_groups,
927
view_lights_uniform_offset,
928
extracted_cluster_config,
929
true,
930
);
931
932
// Pass 5: populate raster.
933
run_clustering_rasterization_pass(
934
command_encoder,
935
clustering_populate_render_pipeline,
936
&view_clustering_bind_groups.clustering_bind_group_populate_pass,
937
view_gpu_clustering_buffers,
938
view_light_probes_uniform_offset,
939
view_lights_uniform_offset,
940
view_uniform_offset,
941
view_clustering_dummy_texture,
942
extracted_cluster_config,
943
&clustering_mesh_buffers,
944
true,
945
);
946
947
// Schedule a readback of the readback data.
948
schedule_readback_staging(
949
command_encoder,
950
view_gpu_clustering_buffers,
951
&staging_buffer,
952
);
953
schedule_readback_buffer_map(
954
command_encoder,
955
view_clustering_readback_data.clone(),
956
&staging_buffer,
957
);
958
959
command_encoder.pop_debug_group();
960
time_span.end(render_context.command_encoder());
961
962
/// Runs the Z slicing pass (step 1).
963
fn run_clustering_z_slicing_pass(
964
command_encoder: &mut CommandEncoder,
965
clustering_z_slicing_pipeline: &ComputePipeline,
966
clustering_z_slicing_bind_group: &BindGroup,
967
clustering_cluster_metadata_buffer: &StorageBuffer<ClusterMetadata>,
968
view_light_probes_uniform_offset: &ViewLightProbesUniformOffset,
969
view_lights_uniform_offset: &ViewLightsUniformOffset,
970
view_uniform_offset: &ViewUniformOffset,
971
) {
972
let mut compute_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
973
label: Some("clustering Z slicing pass"),
974
..default()
975
});
976
compute_pass.set_pipeline(clustering_z_slicing_pipeline);
977
compute_pass.set_bind_group(
978
0,
979
Some(&**clustering_z_slicing_bind_group),
980
&[
981
**view_light_probes_uniform_offset,
982
view_lights_uniform_offset.offset,
983
view_uniform_offset.offset,
984
],
985
);
986
987
let clustering_cluster_metadata = clustering_cluster_metadata_buffer.get();
988
let clusterable_object_count = clustering_cluster_metadata.clustered_light_count
989
+ clustering_cluster_metadata.reflection_probe_count
990
+ clustering_cluster_metadata.irradiance_volume_count
991
+ clustering_cluster_metadata.decal_count;
992
993
let workgroup_count = clusterable_object_count.div_ceil(Z_SLICING_WORKGROUP_SIZE);
994
compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
995
}
996
997
/// Runs either the count or populate rasterization pass (steps 2 and 5
998
/// respectively) for a single view.
999
///
1000
/// The `populate_pass` parameter specifies whether this is a count pass
1001
/// (false) or a populate pass (true).
1002
fn run_clustering_rasterization_pass(
1003
command_encoder: &mut CommandEncoder,
1004
clustering_render_pipeline: &RenderPipeline,
1005
clustering_bind_group: &BindGroup,
1006
view_gpu_clustering_buffers: &ViewGpuClusteringBuffers,
1007
view_light_probes_uniform_offset: &ViewLightProbesUniformOffset,
1008
view_lights_uniform_offset: &ViewLightsUniformOffset,
1009
view_uniform_offset: &ViewUniformOffset,
1010
view_clustering_dummy_texture: &ViewClusteringDummyTexture,
1011
extracted_cluster_config: &ExtractedClusterConfig,
1012
clustering_mesh_buffers: &GpuClusteringMeshBuffers,
1013
populate_pass: bool,
1014
) {
1015
let Some(cluster_metadata_buffer) =
1016
view_gpu_clustering_buffers.cluster_metadata_buffer.buffer()
1017
else {
1018
error!("Z slicing metadata buffer was never uploaded");
1019
return;
1020
};
1021
1022
let mut render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor {
1023
label: if populate_pass {
1024
Some("clustering populate pass")
1025
} else {
1026
Some("clustering count pass")
1027
},
1028
color_attachments: &[Some(RenderPassColorAttachment {
1029
view: &view_clustering_dummy_texture.default_view,
1030
depth_slice: None,
1031
resolve_target: None,
1032
ops: Operations {
1033
// Do nothing to the color buffer. We only care about using
1034
// the rasterizer for fragment scheduling; we're not going
1035
// to actually paint any pixels.
1036
load: LoadOp::Clear(Color::BLACK.to_linear().into()),
1037
store: StoreOp::Discard,
1038
},
1039
})],
1040
depth_stencil_attachment: None,
1041
..default()
1042
});
1043
render_pass.set_pipeline(clustering_render_pipeline);
1044
render_pass.set_bind_group(
1045
0,
1046
Some(&**clustering_bind_group),
1047
&[
1048
**view_light_probes_uniform_offset,
1049
view_lights_uniform_offset.offset,
1050
view_uniform_offset.offset,
1051
],
1052
);
1053
1054
// Since we rounded up the dummy texture size to prevent thrashing, we
1055
// need to use an explicit viewport here so that we only render to the
1056
// correct portion.
1057
render_pass.set_viewport(
1058
0.0,
1059
0.0,
1060
extracted_cluster_config.dimensions.x as f32,
1061
extracted_cluster_config.dimensions.y as f32,
1062
0.0,
1063
1.0,
1064
);
1065
1066
render_pass.set_vertex_buffer(0, *clustering_mesh_buffers.vertex_buffer.slice(..));
1067
render_pass.set_index_buffer(
1068
*clustering_mesh_buffers.index_buffer.slice(..),
1069
IndexFormat::Uint32,
1070
);
1071
render_pass.draw_indexed_indirect(cluster_metadata_buffer, 0);
1072
}
1073
1074
/// Runs either the local or global allocation pass (steps 3 and 4
1075
/// respectively) for GPU clustering for a single view.
1076
///
1077
/// The `global_pass` parameter specifies whether this is the local pass
1078
/// (false) or the global pass (true).
1079
fn run_clustering_allocation_pass(
1080
command_encoder: &mut CommandEncoder,
1081
clustering_allocation_pipeline: &ComputePipeline,
1082
view_clustering_bind_groups: &ViewClusteringBindGroups,
1083
view_lights_uniform_offset: &ViewLightsUniformOffset,
1084
extracted_cluster_config: &ExtractedClusterConfig,
1085
global_pass: bool,
1086
) {
1087
let mut compute_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor {
1088
label: if global_pass {
1089
Some("clustering allocation global pass")
1090
} else {
1091
Some("clustering allocation local pass")
1092
},
1093
..default()
1094
});
1095
compute_pass.set_pipeline(clustering_allocation_pipeline);
1096
compute_pass.set_bind_group(
1097
0,
1098
Some(&*view_clustering_bind_groups.clustering_bind_group_allocate_pass),
1099
&[view_lights_uniform_offset.offset],
1100
);
1101
1102
// The global pass has only one workgroup because it runs sequentially
1103
// over chunks, while the local pass has a number of workgroups equal to
1104
// the number of chunks because it runs in parallel over them.
1105
let workgroup_count = if global_pass {
1106
1
1107
} else {
1108
extracted_cluster_config
1109
.dimensions
1110
.element_product()
1111
.div_ceil(ALLOCATION_WORKGROUP_SIZE)
1112
};
1113
compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
1114
}
1115
1116
/// Schedules the staging part of readback of the data from GPU.
1117
fn schedule_readback_staging(
1118
command_encoder: &mut CommandEncoder,
1119
view_gpu_clustering_buffers: &ViewGpuClusteringBuffers,
1120
staging_buffer: &Buffer,
1121
) {
1122
match view_gpu_clustering_buffers.cluster_metadata_buffer.buffer() {
1123
None => {
1124
// This should never happen. It shouldn't have been possible to
1125
// create the necessary bind groups without this buffer's being
1126
// present.
1127
error!("No clustering Z slicing metadata buffer found");
1128
}
1129
Some(metadata_buffer) => {
1130
// Copy the metadata buffer to the staging buffer so we can read
1131
// it back.
1132
command_encoder.copy_buffer_to_buffer(
1133
metadata_buffer,
1134
0,
1135
staging_buffer,
1136
0,
1137
Some(u64::from(ClusterMetadata::min_size())),
1138
);
1139
}
1140
}
1141
}
1142
1143
/// Schedules the buffer map operation part of the readback of the data from
1144
/// GPU.
1145
fn schedule_readback_buffer_map(
1146
command_encoder: &mut CommandEncoder,
1147
view_clustering_readback_data: Arc<Mutex<ViewClusteringReadbackData>>,
1148
staging_buffer: &Buffer,
1149
) {
1150
let captured_staging_buffer = staging_buffer.clone();
1151
command_encoder.map_buffer_on_submit(staging_buffer, MapMode::Read, .., move |result| {
1152
if result.is_err() {
1153
return;
1154
};
1155
1156
let mut view_clustering_readback_data = view_clustering_readback_data.lock().unwrap();
1157
1158
{
1159
// Use `encase` to populate a `ClusterMetadata`.
1160
let buffer_view = captured_staging_buffer.slice(..).get_mapped_range();
1161
let Ok(mut buffer_reader) =
1162
Reader::new::<ClusterMetadata>(buffer_view[..].to_vec(), 0)
1163
else {
1164
return;
1165
};
1166
let gpu_clustering_metadata = ClusterMetadata::create_from(&mut buffer_reader);
1167
1168
// Update readback data.
1169
view_clustering_readback_data.update_from_metadata(&gpu_clustering_metadata);
1170
}
1171
1172
// `wgpu` will error if we didn't drop the buffer view at this
1173
// point, which is why we use a separate block above.
1174
captured_staging_buffer.unmap();
1175
1176
// Recycle the staging buffer.
1177
view_clustering_readback_data
1178
.metadata_staging_free_buffers
1179
.push(captured_staging_buffer);
1180
});
1181
}
1182
}
1183
1184
/// Prepares bind groups for each of the shaders involved in GPU clustering.
1185
fn prepare_clustering_bind_groups(
1186
mut commands: Commands,
1187
views_query: Query<
1188
(Entity, &ViewGpuClusteringBuffers, &ViewClusterBindings),
1189
With<ExtractedView>,
1190
>,
1191
render_device: Res<RenderDevice>,
1192
clustering_z_slicing_pipeline: Res<ClusteringZSlicingPipeline>,
1193
clustering_raster_pipeline: Res<ClusteringRasterPipeline>,
1194
clustering_allocation_pipeline: Res<ClusteringAllocationPipeline>,
1195
global_clusterable_object_meta: Res<GlobalClusterableObjectMeta>,
1196
pipeline_cache: Res<PipelineCache>,
1197
light_probes_buffer: Res<LightProbesBuffer>,
1198
decals_buffer: Res<DecalsBuffer>,
1199
light_meta: Res<LightMeta>,
1200
view_uniforms: Res<ViewUniforms>,
1201
) {
1202
let (
1203
Some(gpu_clustered_lights_binding),
1204
Some(light_probes_binding),
1205
Some(decals_buffer),
1206
Some(lights_binding),
1207
Some(view_binding),
1208
) = (
1209
global_clusterable_object_meta
1210
.gpu_clustered_lights
1211
.binding(),
1212
light_probes_buffer.binding(),
1213
decals_buffer.buffer(),
1214
light_meta.view_gpu_lights.binding(),
1215
view_uniforms.uniforms.binding(),
1216
)
1217
else {
1218
return;
1219
};
1220
1221
// Create separate bind groups for each view.
1222
for (view_entity, view_gpu_clustering_buffers, view_cluster_bindings) in &views_query {
1223
let ViewClusterBuffers::Storage {
1224
clusterable_object_index_lists: ref maybe_clusterable_object_index_lists,
1225
cluster_offsets_and_counts: ref maybe_cluster_offsets_and_counts,
1226
} = view_cluster_bindings.buffers
1227
else {
1228
continue;
1229
};
1230
1231
let (
1232
Some(z_slices_buffer),
1233
Some(cluster_metadata_buffer),
1234
Some(scratchpad_offsets_and_counts_buffer),
1235
Some(clusterable_object_index_lists),
1236
Some(cluster_offsets_and_counts),
1237
) = (
1238
view_gpu_clustering_buffers.z_slices_buffer.buffer(),
1239
view_gpu_clustering_buffers.cluster_metadata_buffer.buffer(),
1240
view_gpu_clustering_buffers
1241
.scratchpad_offsets_and_counts_buffer
1242
.buffer(),
1243
maybe_clusterable_object_index_lists.buffer(),
1244
maybe_cluster_offsets_and_counts.buffer(),
1245
)
1246
else {
1247
continue;
1248
};
1249
1250
let clustering_bind_group_entries_z_slicing_pass = [
1251
// @group(0) @binding(0) var<storage, read_write>
1252
// cluster_metadata: ClusterMetadata;
1253
BindGroupEntry {
1254
binding: 0,
1255
resource: cluster_metadata_buffer.as_entire_binding(),
1256
},
1257
// @group(0) @binding(1) var<storage, read_write> z_slices:
1258
// array<ClusterableObjectZSlice>;
1259
BindGroupEntry {
1260
binding: 1,
1261
resource: z_slices_buffer.as_entire_binding(),
1262
},
1263
// @group(0) @binding(2) var<storage> clustered_lights:
1264
// ClusteredLights;
1265
BindGroupEntry {
1266
binding: 2,
1267
resource: gpu_clustered_lights_binding.clone(),
1268
},
1269
// @group(0) @binding(3) var<uniform> light_probes: LightProbes;
1270
BindGroupEntry {
1271
binding: 3,
1272
resource: light_probes_binding.clone(),
1273
},
1274
// @group(0) @binding(4) var<storage> clustered_decals:
1275
// ClusteredDecals;
1276
BindGroupEntry {
1277
binding: 4,
1278
resource: decals_buffer.as_entire_binding(),
1279
},
1280
// @group(0) @binding(5) var<uniform> lights: Lights;
1281
BindGroupEntry {
1282
binding: 5,
1283
resource: lights_binding.clone(),
1284
},
1285
// @group(0) @binding(6) var<uniform> view: View;
1286
BindGroupEntry {
1287
binding: 6,
1288
resource: view_binding.clone(),
1289
},
1290
];
1291
1292
let mut clustering_bind_group_entries_count_pass: Vec<BindGroupEntry> = vec![
1293
// @group(0) @binding(0) var<storage> z_slices:
1294
// array<ClusterableObjectZSlice>;
1295
BindGroupEntry {
1296
binding: 0,
1297
resource: z_slices_buffer.as_entire_binding(),
1298
},
1299
// @group(0) @binding(1) var<storage, read_write> index_lists:
1300
// ClusterableObjectIndexLists;
1301
BindGroupEntry {
1302
binding: 1,
1303
resource: clusterable_object_index_lists.as_entire_binding(),
1304
},
1305
// @group(0) @binding(2) var<storage> clustered_lights:
1306
// ClusteredLights;
1307
BindGroupEntry {
1308
binding: 2,
1309
resource: gpu_clustered_lights_binding.clone(),
1310
},
1311
// @group(0) @binding(3) var<uniform> light_probes: LightProbes;
1312
BindGroupEntry {
1313
binding: 3,
1314
resource: light_probes_binding.clone(),
1315
},
1316
// @group(0) @binding(4) var<storage> clustered_decals:
1317
// ClusteredDecals;
1318
BindGroupEntry {
1319
binding: 4,
1320
resource: decals_buffer.as_entire_binding(),
1321
},
1322
// @group(0) @binding(5) var<uniform> lights: Lights;
1323
BindGroupEntry {
1324
binding: 5,
1325
resource: lights_binding.clone(),
1326
},
1327
// @group(0) @binding(6) var<uniform> view: View;
1328
BindGroupEntry {
1329
binding: 6,
1330
resource: view_binding.clone(),
1331
},
1332
];
1333
1334
let mut clustering_bind_group_entries_populate_pass =
1335
clustering_bind_group_entries_count_pass.clone();
1336
1337
clustering_bind_group_entries_count_pass.push(
1338
// @group(0) @binding(7) var<storage, read_write>
1339
// offsets_and_counts: ClusterOffsetsAndCounts;
1340
BindGroupEntry {
1341
binding: 7,
1342
resource: cluster_offsets_and_counts.as_entire_binding(),
1343
},
1344
);
1345
1346
clustering_bind_group_entries_populate_pass.push(
1347
// @group(0) @binding(7) var<storage>
1348
// offsets_and_counts: ClusterOffsetsAndCounts;
1349
BindGroupEntry {
1350
binding: 7,
1351
resource: cluster_offsets_and_counts.as_entire_binding(),
1352
},
1353
);
1354
clustering_bind_group_entries_populate_pass.push(
1355
// @group(0) @binding(8) var<storage, read_write>
1356
// scratchpad_offsets_and_counts: ClusterOffsetsAndCountsAtomic;
1357
BindGroupEntry {
1358
binding: 8,
1359
resource: scratchpad_offsets_and_counts_buffer.as_entire_binding(),
1360
},
1361
);
1362
1363
let clustering_bind_group_entries_allocation_pass: [BindGroupEntry; _] = [
1364
// @group(0) @binding(0) var<storage, read_write>
1365
// offsets_and_counts: ClusterOffsetsAndCounts;
1366
BindGroupEntry {
1367
binding: 0,
1368
resource: cluster_offsets_and_counts.as_entire_binding(),
1369
},
1370
// @group(0) @binding(1) var<uniform> lights: Lights;
1371
BindGroupEntry {
1372
binding: 1,
1373
resource: lights_binding.clone(),
1374
},
1375
// @group(0) @binding(2) var<storage, read_write>
1376
// clustering_metadata: ClusterMetadata;
1377
BindGroupEntry {
1378
binding: 2,
1379
resource: cluster_metadata_buffer.as_entire_binding(),
1380
},
1381
// @group(0) @binding(3) var<storage, read_write>
1382
// scratchpad_offsets_and_counts: ClusterOffsetsAndCounts;
1383
BindGroupEntry {
1384
binding: 3,
1385
resource: scratchpad_offsets_and_counts_buffer.as_entire_binding(),
1386
},
1387
];
1388
1389
let clustering_bind_group_z_slicing_pass = render_device.create_bind_group(
1390
"clustering Z slicing pass bind group",
1391
&pipeline_cache.get_bind_group_layout(&clustering_z_slicing_pipeline.bind_group_layout),
1392
&clustering_bind_group_entries_z_slicing_pass,
1393
);
1394
let clustering_bind_group_count_pass = render_device.create_bind_group(
1395
"clustering count pass bind group",
1396
&pipeline_cache
1397
.get_bind_group_layout(&clustering_raster_pipeline.bind_group_layout_count_pass),
1398
&clustering_bind_group_entries_count_pass,
1399
);
1400
let clustering_bind_group_allocate_pass = render_device.create_bind_group(
1401
"clustering allocate pass bind group",
1402
&pipeline_cache
1403
.get_bind_group_layout(&clustering_allocation_pipeline.bind_group_layout),
1404
&clustering_bind_group_entries_allocation_pass,
1405
);
1406
let clustering_bind_group_populate_pass = render_device.create_bind_group(
1407
"clustering populate pass bind group",
1408
&pipeline_cache
1409
.get_bind_group_layout(&clustering_raster_pipeline.bind_group_layout_populate_pass),
1410
&clustering_bind_group_entries_populate_pass,
1411
);
1412
1413
commands
1414
.entity(view_entity)
1415
.insert(ViewClusteringBindGroups {
1416
clustering_bind_group_z_slicing_pass,
1417
clustering_bind_group_count_pass,
1418
clustering_bind_group_allocate_pass,
1419
clustering_bind_group_populate_pass,
1420
});
1421
}
1422
}
1423
1424
/// Creates the dummy textures that we use to establish a viewport for the
1425
/// rasterization phases of GPU clustering.
1426
///
1427
/// We don't actually write to these textures, but they need to exist so that a
1428
/// viewport of the appropriate size can be set.
1429
fn prepare_cluster_dummy_textures(
1430
mut commands: Commands,
1431
views_query: Query<(Entity, &ExtractedClusterConfig), With<ExtractedView>>,
1432
render_device: Res<RenderDevice>,
1433
mut texture_cache: ResMut<TextureCache>,
1434
) {
1435
for (view_entity, view_cluster_config) in &views_query {
1436
let dummy_texture = texture_cache.get(
1437
&render_device,
1438
TextureDescriptor {
1439
label: Some("clustering dummy texture"),
1440
// We round these up to the nearest multiple of 32 to guard
1441
// against the risk of thrashing between different sizes,
1442
// especially if the auto-resize feature is on.
1443
size: Extent3d {
1444
width: view_cluster_config.dimensions.x.next_multiple_of(32),
1445
height: view_cluster_config.dimensions.y.next_multiple_of(32),
1446
depth_or_array_layers: 1,
1447
},
1448
mip_level_count: 1,
1449
sample_count: 1,
1450
dimension: TextureDimension::D2,
1451
format: TextureFormat::R8Unorm,
1452
usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::COPY_DST,
1453
view_formats: &[],
1454
},
1455
);
1456
commands
1457
.entity(view_entity)
1458
.insert(ViewClusteringDummyTexture(dummy_texture));
1459
}
1460
}
1461
1462
/// Prepares the compute and raster pipelines for the various shader invocations
1463
/// in GPU clustering for each view.
1464
fn prepare_clustering_pipelines(
1465
mut commands: Commands,
1466
views_query: Query<Entity, With<ExtractedView>>,
1467
pipeline_cache: Res<PipelineCache>,
1468
mut clustering_z_slicing_pipelines: ResMut<
1469
SpecializedComputePipelines<ClusteringZSlicingPipeline>,
1470
>,
1471
mut clustering_raster_pipelines: ResMut<SpecializedRenderPipelines<ClusteringRasterPipeline>>,
1472
mut clustering_allocation_pipelines: ResMut<
1473
SpecializedComputePipelines<ClusteringAllocationPipeline>,
1474
>,
1475
clustering_z_slicing_pipeline: Res<ClusteringZSlicingPipeline>,
1476
clustering_raster_pipeline: Res<ClusteringRasterPipeline>,
1477
clustering_allocation_pipeline: Res<ClusteringAllocationPipeline>,
1478
) {
1479
for view_entity in &views_query {
1480
let clustering_z_slicing_pipeline_id = clustering_z_slicing_pipelines.specialize(
1481
&pipeline_cache,
1482
&clustering_z_slicing_pipeline,
1483
(),
1484
);
1485
let clustering_count_pipeline_id = clustering_raster_pipelines.specialize(
1486
&pipeline_cache,
1487
&clustering_raster_pipeline,
1488
ClusteringRasterPipelineKey {
1489
populate_pass: false,
1490
},
1491
);
1492
let clustering_local_allocation_pipeline_id = clustering_allocation_pipelines.specialize(
1493
&pipeline_cache,
1494
&clustering_allocation_pipeline,
1495
ClusteringAllocationPipelineKey { global_pass: false },
1496
);
1497
let clustering_global_allocation_pipeline_id = clustering_allocation_pipelines.specialize(
1498
&pipeline_cache,
1499
&clustering_allocation_pipeline,
1500
ClusteringAllocationPipelineKey { global_pass: true },
1501
);
1502
let clustering_populate_pipeline_id = clustering_raster_pipelines.specialize(
1503
&pipeline_cache,
1504
&clustering_raster_pipeline,
1505
ClusteringRasterPipelineKey {
1506
populate_pass: true,
1507
},
1508
);
1509
1510
commands
1511
.entity(view_entity)
1512
.insert(ViewGpuClusteringPipelineIds {
1513
clustering_z_slicing_pipeline_id,
1514
clustering_count_pipeline_id,
1515
clustering_allocation_local_pipeline_id: clustering_local_allocation_pipeline_id,
1516
clustering_allocation_global_pipeline_id: clustering_global_allocation_pipeline_id,
1517
clustering_populate_pipeline_id,
1518
});
1519
}
1520
}
1521
1522
/// Uploads the buffers needed to perform GPU clustering to the GPU.
1523
fn upload_view_gpu_clustering_buffers(
1524
mut views_query: Query<&mut ViewGpuClusteringBuffers>,
1525
render_device: Res<RenderDevice>,
1526
render_queue: Res<RenderQueue>,
1527
) {
1528
for mut view_gpu_clustering_buffers in &mut views_query {
1529
view_gpu_clustering_buffers
1530
.z_slices_buffer
1531
.write_buffer(&render_device);
1532
1533
view_gpu_clustering_buffers
1534
.cluster_metadata_buffer
1535
.write_buffer(&render_device, &render_queue);
1536
1537
// Make sure the scratchpad buffer is nonempty, and upload it.
1538
if view_gpu_clustering_buffers
1539
.scratchpad_offsets_and_counts_buffer
1540
.is_empty()
1541
{
1542
view_gpu_clustering_buffers
1543
.scratchpad_offsets_and_counts_buffer
1544
.add();
1545
}
1546
view_gpu_clustering_buffers
1547
.scratchpad_offsets_and_counts_buffer
1548
.write_buffer(&render_device);
1549
}
1550
}
1551
1552
/// Extracts information needed for GPU clustering from each view in the render
1553
/// world, and synchronizes statistics back from the render world to the main
1554
/// world if needed.
1555
pub fn extract_clusters_for_gpu_clustering(
1556
mut commands: Commands,
1557
mut main_world: ResMut<MainWorld>,
1558
render_view_clustering_index_list_sizes: Res<RenderViewClusteringReadbackData>,
1559
) {
1560
let mut views = main_world.query::<(Entity, RenderEntity, &mut Clusters, &Camera)>();
1561
1562
for (main_view_entity, render_view_entity, mut clusters, camera) in
1563
views.iter_mut(&mut main_world)
1564
{
1565
let mut entity_commands = commands
1566
.get_entity(render_view_entity)
1567
.expect("Clusters entity wasn't synced.");
1568
if !camera.is_active {
1569
entity_commands.remove::<ExtractedClusterConfig>();
1570
continue;
1571
}
1572
1573
entity_commands.insert(ExtractedClusterConfig::from(&*clusters));
1574
1575
// Read back statistics from the render world to the main world if we
1576
// have some.
1577
// The clustering systems in the main world will pick them up and adjust
1578
// cluster settings if necessary.
1579
if let Some(view_clustering_buffer_size_data) = render_view_clustering_index_list_sizes
1580
.views
1581
.get(&MainEntity::from(main_view_entity))
1582
{
1583
let view_clustering_buffer_size_data = view_clustering_buffer_size_data.lock().unwrap();
1584
if let Some(last_frame_statistics) =
1585
&view_clustering_buffer_size_data.last_frame_statistics
1586
{
1587
clusters.last_frame_farthest_z = Some(last_frame_statistics.farthest_z);
1588
clusters.last_frame_total_cluster_index_count =
1589
Some(last_frame_statistics.index_list_size as usize);
1590
}
1591
}
1592
}
1593
1594
let global_cluster_settings = main_world.resource::<GlobalClusterSettings>();
1595
commands.insert_resource(global_cluster_settings.clone());
1596
}
1597
1598
/// Creates associated buffers necessary to perform GPU clustering for all
1599
/// views.
1600
pub(crate) fn prepare_clusters_for_gpu_clustering(
1601
mut commands: Commands,
1602
views_query: Query<(
1603
Entity,
1604
&MainEntity,
1605
&ExtractedClusterConfig,
1606
Option<&RenderViewLightProbes<EnvironmentMapLight>>,
1607
Option<&RenderViewLightProbes<IrradianceVolume>>,
1608
)>,
1609
render_clustered_decals: Res<RenderClusteredDecals>,
1610
render_device: Res<RenderDevice>,
1611
render_queue: Res<RenderQueue>,
1612
global_clusterable_object_meta: Res<GlobalClusterableObjectMeta>,
1613
global_cluster_settings: Res<GlobalClusterSettings>,
1614
mut render_view_clustering_index_list_sizes: ResMut<RenderViewClusteringReadbackData>,
1615
) {
1616
let render_device = render_device.into_inner();
1617
1618
let Some(ref global_cluster_settings_gpu) = global_cluster_settings.gpu_clustering else {
1619
error!("`prepare_clusters_for_gpu_clustering() called when not GPU clustering");
1620
return;
1621
};
1622
1623
let gpu_clustered_lights_storage = &global_clusterable_object_meta.gpu_clustered_lights;
1624
1625
let mut all_view_main_entities = MainEntityHashSet::default();
1626
1627
for (
1628
view_entity,
1629
view_main_entity,
1630
extracted_cluster_config,
1631
maybe_environment_maps,
1632
maybe_irradiance_volumes,
1633
) in &views_query
1634
{
1635
// Allocate the cluster array.
1636
let mut view_clusters_bindings =
1637
ViewClusterBindings::new(BufferBindingType::Storage { read_only: false });
1638
view_clusters_bindings.clear();
1639
let cluster_count = extracted_cluster_config.dimensions.x as usize
1640
* extracted_cluster_config.dimensions.y as usize
1641
* extracted_cluster_config.dimensions.z as usize;
1642
view_clusters_bindings.reserve_clusters(cluster_count);
1643
1644
all_view_main_entities.insert(*view_main_entity);
1645
1646
// Create the readback data.
1647
let Ok(view_clustering_buffer_size_data) = render_view_clustering_index_list_sizes
1648
.views
1649
.entry(*view_main_entity)
1650
.or_insert_with(|| {
1651
Arc::new(Mutex::new(ViewClusteringReadbackData::new(
1652
global_cluster_settings_gpu,
1653
)))
1654
})
1655
.lock()
1656
else {
1657
warn!("Failed to acquire lock for view clustering buffer size data; skipping buffer creation for view: {}", view_entity.to_bits());
1658
continue;
1659
};
1660
1661
let mut view_gpu_clustering_buffers = ViewGpuClusteringBuffers::new();
1662
1663
// Count the number of each type of clusterable object that we have.
1664
let clustered_light_count = gpu_clustered_lights_storage.data.len() as u32;
1665
let reflection_probe_count = match maybe_environment_maps {
1666
Some(view_reflection_probes) => view_reflection_probes.len() as u32,
1667
None => 0,
1668
};
1669
let irradiance_volume_count = match maybe_irradiance_volumes {
1670
Some(view_irradiance_volumes) => view_irradiance_volumes.len() as u32,
1671
None => 0,
1672
};
1673
let decal_count = render_clustered_decals.len() as u32;
1674
1675
// Initialize the metadata.
1676
*view_gpu_clustering_buffers
1677
.cluster_metadata_buffer
1678
.get_mut() = ClusterMetadata {
1679
indirect_draw_params: ClusterRasterIndirectDrawParams {
1680
index_count: 6,
1681
// This will be filled in by the GPU.
1682
instance_count: 0,
1683
first_index: 0,
1684
base_vertex: 0,
1685
first_instance: 0,
1686
},
1687
clustered_light_count,
1688
reflection_probe_count,
1689
irradiance_volume_count,
1690
decal_count,
1691
index_list_capacity: view_clustering_buffer_size_data.max_index_list_capacity as u32,
1692
z_slice_list_capacity: view_clustering_buffer_size_data.z_slice_list_capacity as u32,
1693
farthest_z: 0,
1694
};
1695
1696
// Allocate Z slices.
1697
if view_gpu_clustering_buffers.z_slices_buffer.len()
1698
< view_clustering_buffer_size_data.z_slice_list_capacity
1699
{
1700
view_gpu_clustering_buffers.z_slices_buffer.add_multiple(
1701
view_clustering_buffer_size_data.z_slice_list_capacity
1702
- view_gpu_clustering_buffers.z_slices_buffer.len(),
1703
);
1704
}
1705
1706
// Make room for the appropriate number of indices.
1707
view_clusters_bindings
1708
.reserve_indices(view_clustering_buffer_size_data.max_index_list_capacity);
1709
view_clusters_bindings.write_buffers(render_device, &render_queue);
1710
1711
// Allocate scratchpad offsets and counts.
1712
view_gpu_clustering_buffers
1713
.scratchpad_offsets_and_counts_buffer
1714
.add_multiple(cluster_count);
1715
1716
commands
1717
.entity(view_entity)
1718
.insert((view_clusters_bindings, view_gpu_clustering_buffers));
1719
}
1720
1721
// Clear out clustering allocations corresponding to views that don't exist
1722
// any longer.
1723
render_view_clustering_index_list_sizes
1724
.views
1725
.retain(|view_main_entity, _| all_view_main_entities.contains(view_main_entity));
1726
}
1727
1728
impl ExtractResource<GpuClusteringPlugin> for GlobalClusterSettings {
1729
type Source = GlobalClusterSettings;
1730
1731
fn extract_resource(source: &Self::Source) -> Self {
1732
source.clone()
1733
}
1734
}
1735
1736