Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bevyengine
GitHub Repository: bevyengine/bevy
Path: blob/main/crates/bevy_render/src/batching/gpu_preprocessing.rs
6596 views
1
//! Batching functionality when GPU preprocessing is in use.
2
3
use core::{any::TypeId, marker::PhantomData, mem};
4
5
use bevy_app::{App, Plugin};
6
use bevy_derive::{Deref, DerefMut};
7
use bevy_ecs::{
8
prelude::Entity,
9
query::{Has, With},
10
resource::Resource,
11
schedule::IntoScheduleConfigs as _,
12
system::{Query, Res, ResMut, StaticSystemParam},
13
world::{FromWorld, World},
14
};
15
use bevy_encase_derive::ShaderType;
16
use bevy_math::UVec4;
17
use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};
18
use bevy_utils::{default, TypeIdMap};
19
use bytemuck::{Pod, Zeroable};
20
use encase::{internal::WriteInto, ShaderSize};
21
use indexmap::IndexMap;
22
use nonmax::NonMaxU32;
23
use tracing::{error, info};
24
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
25
26
use crate::{
27
experimental::occlusion_culling::OcclusionCulling,
28
render_phase::{
29
BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
30
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
31
PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,
32
SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
33
ViewSortedRenderPhases,
34
},
35
render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
36
renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},
37
sync_world::MainEntity,
38
view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
39
Render, RenderApp, RenderDebugFlags, RenderSystems,
40
};
41
42
use super::{BatchMeta, GetBatchData, GetFullBatchData};
43
44
#[derive(Default)]
45
pub struct BatchingPlugin {
46
/// Debugging flags that can optionally be set when constructing the renderer.
47
pub debug_flags: RenderDebugFlags,
48
}
49
50
impl Plugin for BatchingPlugin {
51
fn build(&self, app: &mut App) {
52
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
53
return;
54
};
55
56
render_app
57
.insert_resource(IndirectParametersBuffers::new(
58
self.debug_flags
59
.contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
60
))
61
.add_systems(
62
Render,
63
write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush),
64
)
65
.add_systems(
66
Render,
67
clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews),
68
);
69
}
70
71
fn finish(&self, app: &mut App) {
72
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
73
return;
74
};
75
76
render_app.init_resource::<GpuPreprocessingSupport>();
77
}
78
}
79
80
/// Records whether GPU preprocessing and/or GPU culling are supported on the
81
/// device.
82
///
83
/// No GPU preprocessing is supported on WebGL because of the lack of compute
84
/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a
85
/// `wgpu` limitation] GPU culling is not.
86
///
87
/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
88
#[derive(Clone, Copy, PartialEq, Resource)]
89
pub struct GpuPreprocessingSupport {
90
/// The maximum amount of GPU preprocessing available on this platform.
91
pub max_supported_mode: GpuPreprocessingMode,
92
}
93
94
impl GpuPreprocessingSupport {
95
/// Returns true if this GPU preprocessing support level isn't `None`.
96
#[inline]
97
pub fn is_available(&self) -> bool {
98
self.max_supported_mode != GpuPreprocessingMode::None
99
}
100
101
/// Returns the given GPU preprocessing mode, capped to the current
102
/// preprocessing mode.
103
pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {
104
match (self.max_supported_mode, mode) {
105
(GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {
106
GpuPreprocessingMode::None
107
}
108
(mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,
109
(GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {
110
GpuPreprocessingMode::PreprocessingOnly
111
}
112
}
113
}
114
115
/// Returns true if GPU culling is supported on this platform.
116
pub fn is_culling_supported(&self) -> bool {
117
self.max_supported_mode == GpuPreprocessingMode::Culling
118
}
119
}
120
121
/// The amount of GPU preprocessing (compute and indirect draw) that we do.
122
#[derive(Clone, Copy, PartialEq)]
123
pub enum GpuPreprocessingMode {
124
/// No GPU preprocessing is in use at all.
125
///
126
/// This is used when GPU compute isn't available.
127
None,
128
129
/// GPU preprocessing is in use, but GPU culling isn't.
130
///
131
/// This is used when the [`NoIndirectDrawing`] component is present on the
132
/// camera.
133
PreprocessingOnly,
134
135
/// Both GPU preprocessing and GPU culling are in use.
136
///
137
/// This is used by default.
138
Culling,
139
}
140
141
/// The GPU buffers holding the data needed to render batches.
142
///
143
/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
144
/// `BD` type parameter in that mode.
145
///
146
/// We have a separate *buffer data input* type (`BDI`) here, which a compute
147
/// shader is expected to expand to the full buffer data (`BD`) type. GPU
148
/// uniform building is generally faster and uses less system RAM to VRAM bus
149
/// bandwidth, but only implemented for some pipelines (for example, not in the
150
/// 2D pipeline at present) and only when compute shader is available.
151
#[derive(Resource)]
152
pub struct BatchedInstanceBuffers<BD, BDI>
153
where
154
BD: GpuArrayBufferable + Sync + Send + 'static,
155
BDI: Pod + Default,
156
{
157
/// The uniform data inputs for the current frame.
158
///
159
/// These are uploaded during the extraction phase.
160
pub current_input_buffer: InstanceInputUniformBuffer<BDI>,
161
162
/// The uniform data inputs for the previous frame.
163
///
164
/// The indices don't generally line up between `current_input_buffer`
165
/// and `previous_input_buffer`, because, among other reasons, entities
166
/// can spawn or despawn between frames. Instead, each current buffer
167
/// data input uniform is expected to contain the index of the
168
/// corresponding buffer data input uniform in this list.
169
pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
170
171
/// The data needed to render buffers for each phase.
172
///
173
/// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
174
/// `AlphaMask3d`, etc.
175
pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
176
}
177
178
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
179
where
180
BD: GpuArrayBufferable + Sync + Send + 'static,
181
BDI: Pod + Sync + Send + Default + 'static,
182
{
183
fn default() -> Self {
184
BatchedInstanceBuffers {
185
current_input_buffer: InstanceInputUniformBuffer::new(),
186
previous_input_buffer: InstanceInputUniformBuffer::new(),
187
phase_instance_buffers: HashMap::default(),
188
}
189
}
190
}
191
192
/// The GPU buffers holding the data needed to render batches for a single
193
/// phase.
194
///
195
/// These are split out per phase so that we can run the phases in parallel.
196
/// This is the version of the structure that has a type parameter, which
197
/// enables Bevy's scheduler to run the batching operations for the different
198
/// phases in parallel.
199
///
200
/// See the documentation for [`BatchedInstanceBuffers`] for more information.
201
#[derive(Resource)]
202
pub struct PhaseBatchedInstanceBuffers<PI, BD>
203
where
204
PI: PhaseItem,
205
BD: GpuArrayBufferable + Sync + Send + 'static,
206
{
207
/// The buffers for this phase.
208
pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
209
phantom: PhantomData<PI>,
210
}
211
212
impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
213
where
214
PI: PhaseItem,
215
BD: GpuArrayBufferable + Sync + Send + 'static,
216
{
217
fn default() -> Self {
218
PhaseBatchedInstanceBuffers {
219
buffers: UntypedPhaseBatchedInstanceBuffers::default(),
220
phantom: PhantomData,
221
}
222
}
223
}
224
225
/// The GPU buffers holding the data needed to render batches for a single
226
/// phase, without a type parameter for that phase.
227
///
228
/// Since this structure doesn't have a type parameter, it can be placed in
229
/// [`BatchedInstanceBuffers::phase_instance_buffers`].
230
pub struct UntypedPhaseBatchedInstanceBuffers<BD>
231
where
232
BD: GpuArrayBufferable + Sync + Send + 'static,
233
{
234
/// A storage area for the buffer data that the GPU compute shader is
235
/// expected to write to.
236
///
237
/// There will be one entry for each index.
238
pub data_buffer: UninitBufferVec<BD>,
239
240
/// The index of the buffer data in the current input buffer that
241
/// corresponds to each instance.
242
///
243
/// This is keyed off each view. Each view has a separate buffer.
244
pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
245
246
/// A buffer that holds the number of indexed meshes that weren't visible in
247
/// the previous frame, when GPU occlusion culling is in use.
248
///
249
/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
250
/// view. Bevy uses this value to determine how many threads to dispatch to
251
/// check meshes that weren't visible next frame to see if they became newly
252
/// visible this frame.
253
pub late_indexed_indirect_parameters_buffer:
254
RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
255
256
/// A buffer that holds the number of non-indexed meshes that weren't
257
/// visible in the previous frame, when GPU occlusion culling is in use.
258
///
259
/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
260
/// view. Bevy uses this value to determine how many threads to dispatch to
261
/// check meshes that weren't visible next frame to see if they became newly
262
/// visible this frame.
263
pub late_non_indexed_indirect_parameters_buffer:
264
RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
265
}
266
267
/// Holds the GPU buffer of instance input data, which is the data about each
268
/// mesh instance that the CPU provides.
269
///
270
/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing
271
/// shader is expected to expand to the full *buffer data* type.
272
pub struct InstanceInputUniformBuffer<BDI>
273
where
274
BDI: Pod + Default,
275
{
276
/// The buffer containing the data that will be uploaded to the GPU.
277
buffer: RawBufferVec<BDI>,
278
279
/// Indices of slots that are free within the buffer.
280
///
281
/// When adding data, we preferentially overwrite these slots first before
282
/// growing the buffer itself.
283
free_uniform_indices: Vec<u32>,
284
}
285
286
impl<BDI> InstanceInputUniformBuffer<BDI>
287
where
288
BDI: Pod + Default,
289
{
290
/// Creates a new, empty buffer.
291
pub fn new() -> InstanceInputUniformBuffer<BDI> {
292
InstanceInputUniformBuffer {
293
buffer: RawBufferVec::new(BufferUsages::STORAGE),
294
free_uniform_indices: vec![],
295
}
296
}
297
298
/// Clears the buffer and entity list out.
299
pub fn clear(&mut self) {
300
self.buffer.clear();
301
self.free_uniform_indices.clear();
302
}
303
304
/// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
305
#[inline]
306
pub fn buffer(&self) -> &RawBufferVec<BDI> {
307
&self.buffer
308
}
309
310
/// Adds a new piece of buffered data to the uniform buffer and returns its
311
/// index.
312
pub fn add(&mut self, element: BDI) -> u32 {
313
match self.free_uniform_indices.pop() {
314
Some(uniform_index) => {
315
self.buffer.values_mut()[uniform_index as usize] = element;
316
uniform_index
317
}
318
None => self.buffer.push(element) as u32,
319
}
320
}
321
322
/// Removes a piece of buffered data from the uniform buffer.
323
///
324
/// This simply marks the data as free.
325
pub fn remove(&mut self, uniform_index: u32) {
326
self.free_uniform_indices.push(uniform_index);
327
}
328
329
/// Returns the piece of buffered data at the given index.
330
///
331
/// Returns [`None`] if the index is out of bounds or the data is removed.
332
pub fn get(&self, uniform_index: u32) -> Option<BDI> {
333
if (uniform_index as usize) >= self.buffer.len()
334
|| self.free_uniform_indices.contains(&uniform_index)
335
{
336
None
337
} else {
338
Some(self.get_unchecked(uniform_index))
339
}
340
}
341
342
/// Returns the piece of buffered data at the given index.
343
/// Can return data that has previously been removed.
344
///
345
/// # Panics
346
/// if `uniform_index` is not in bounds of [`Self::buffer`].
347
pub fn get_unchecked(&self, uniform_index: u32) -> BDI {
348
self.buffer.values()[uniform_index as usize]
349
}
350
351
/// Stores a piece of buffered data at the given index.
352
///
353
/// # Panics
354
/// if `uniform_index` is not in bounds of [`Self::buffer`].
355
pub fn set(&mut self, uniform_index: u32, element: BDI) {
356
self.buffer.values_mut()[uniform_index as usize] = element;
357
}
358
359
// Ensures that the buffers are nonempty, which the GPU requires before an
360
// upload can take place.
361
pub fn ensure_nonempty(&mut self) {
362
if self.buffer.is_empty() {
363
self.buffer.push(default());
364
}
365
}
366
367
/// Returns the number of instances in this buffer.
368
pub fn len(&self) -> usize {
369
self.buffer.len()
370
}
371
372
/// Returns true if this buffer has no instances or false if it contains any
373
/// instances.
374
pub fn is_empty(&self) -> bool {
375
self.buffer.is_empty()
376
}
377
378
/// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
379
/// ready to be uploaded to the GPU.
380
pub fn into_buffer(self) -> RawBufferVec<BDI> {
381
self.buffer
382
}
383
}
384
385
impl<BDI> Default for InstanceInputUniformBuffer<BDI>
386
where
387
BDI: Pod + Default,
388
{
389
fn default() -> Self {
390
Self::new()
391
}
392
}
393
394
/// The buffer of GPU preprocessing work items for a single view.
395
#[cfg_attr(
396
not(target_arch = "wasm32"),
397
expect(
398
clippy::large_enum_variant,
399
reason = "See https://github.com/bevyengine/bevy/issues/19220"
400
)
401
)]
402
pub enum PreprocessWorkItemBuffers {
403
/// The work items we use if we aren't using indirect drawing.
404
///
405
/// Because we don't have to separate indexed from non-indexed meshes in
406
/// direct mode, we only have a single buffer here.
407
Direct(RawBufferVec<PreprocessWorkItem>),
408
409
/// The buffer of work items we use if we are using indirect drawing.
410
///
411
/// We need to separate out indexed meshes from non-indexed meshes in this
412
/// case because the indirect parameters for these two types of meshes have
413
/// different sizes.
414
Indirect {
415
/// The buffer of work items corresponding to indexed meshes.
416
indexed: RawBufferVec<PreprocessWorkItem>,
417
/// The buffer of work items corresponding to non-indexed meshes.
418
non_indexed: RawBufferVec<PreprocessWorkItem>,
419
/// The work item buffers we use when GPU occlusion culling is in use.
420
gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
421
},
422
}
423
424
/// The work item buffers we use when GPU occlusion culling is in use.
425
pub struct GpuOcclusionCullingWorkItemBuffers {
426
/// The buffer of work items corresponding to indexed meshes.
427
pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
428
/// The buffer of work items corresponding to non-indexed meshes.
429
pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
430
/// The offset into the
431
/// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
432
/// where this view's indirect dispatch counts for indexed meshes live.
433
pub late_indirect_parameters_indexed_offset: u32,
434
/// The offset into the
435
/// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
436
/// where this view's indirect dispatch counts for non-indexed meshes live.
437
pub late_indirect_parameters_non_indexed_offset: u32,
438
}
439
440
/// A GPU-side data structure that stores the number of workgroups to dispatch
441
/// for the second phase of GPU occlusion culling.
442
///
443
/// The late mesh preprocessing phase checks meshes that weren't visible frame
444
/// to determine if they're potentially visible this frame.
445
#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
446
#[repr(C)]
447
pub struct LatePreprocessWorkItemIndirectParameters {
448
/// The number of workgroups to dispatch.
449
///
450
/// This will be equal to `work_item_count / 64`, rounded *up*.
451
dispatch_x: u32,
452
/// The number of workgroups along the abstract Y axis to dispatch: always
453
/// 1.
454
dispatch_y: u32,
455
/// The number of workgroups along the abstract Z axis to dispatch: always
456
/// 1.
457
dispatch_z: u32,
458
/// The actual number of work items.
459
///
460
/// The GPU indirect dispatch doesn't read this, but it's used internally to
461
/// determine the actual number of work items that exist in the late
462
/// preprocessing work item buffer.
463
work_item_count: u32,
464
/// Padding to 64-byte boundaries for some hardware.
465
pad: UVec4,
466
}
467
468
impl Default for LatePreprocessWorkItemIndirectParameters {
469
fn default() -> LatePreprocessWorkItemIndirectParameters {
470
LatePreprocessWorkItemIndirectParameters {
471
dispatch_x: 0,
472
dispatch_y: 1,
473
dispatch_z: 1,
474
work_item_count: 0,
475
pad: default(),
476
}
477
}
478
}
479
480
/// Returns the set of work item buffers for the given view, first creating it
481
/// if necessary.
482
///
483
/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
484
/// which meshes are to be drawn.
485
///
486
/// You may need to call this function if you're implementing your own custom
487
/// render phases. See the `specialized_mesh_pipeline` example.
488
pub fn get_or_create_work_item_buffer<'a, I>(
489
work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
490
view: RetainedViewEntity,
491
no_indirect_drawing: bool,
492
enable_gpu_occlusion_culling: bool,
493
) -> &'a mut PreprocessWorkItemBuffers
494
where
495
I: 'static,
496
{
497
let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
498
Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
499
Entry::Vacant(vacant_entry) => {
500
if no_indirect_drawing {
501
vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
502
BufferUsages::STORAGE,
503
)))
504
} else {
505
vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
506
indexed: RawBufferVec::new(BufferUsages::STORAGE),
507
non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
508
// We fill this in below if `enable_gpu_occlusion_culling`
509
// is set.
510
gpu_occlusion_culling: None,
511
})
512
}
513
}
514
};
515
516
// Initialize the GPU occlusion culling buffers if necessary.
517
if let PreprocessWorkItemBuffers::Indirect {
518
ref mut gpu_occlusion_culling,
519
..
520
} = *preprocess_work_item_buffers
521
{
522
match (
523
enable_gpu_occlusion_culling,
524
gpu_occlusion_culling.is_some(),
525
) {
526
(false, false) | (true, true) => {}
527
(false, true) => {
528
*gpu_occlusion_culling = None;
529
}
530
(true, false) => {
531
*gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {
532
late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
533
late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
534
late_indirect_parameters_indexed_offset: 0,
535
late_indirect_parameters_non_indexed_offset: 0,
536
});
537
}
538
}
539
}
540
541
preprocess_work_item_buffers
542
}
543
544
/// Initializes work item buffers for a phase in preparation for a new frame.
545
pub fn init_work_item_buffers(
546
work_item_buffers: &mut PreprocessWorkItemBuffers,
547
late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
548
LatePreprocessWorkItemIndirectParameters,
549
>,
550
late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
551
LatePreprocessWorkItemIndirectParameters,
552
>,
553
) {
554
// Add the offsets for indirect parameters that the late phase of mesh
555
// preprocessing writes to.
556
if let PreprocessWorkItemBuffers::Indirect {
557
gpu_occlusion_culling:
558
Some(GpuOcclusionCullingWorkItemBuffers {
559
ref mut late_indirect_parameters_indexed_offset,
560
ref mut late_indirect_parameters_non_indexed_offset,
561
..
562
}),
563
..
564
} = *work_item_buffers
565
{
566
*late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer
567
.push(LatePreprocessWorkItemIndirectParameters::default())
568
as u32;
569
*late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer
570
.push(LatePreprocessWorkItemIndirectParameters::default())
571
as u32;
572
}
573
}
574
575
impl PreprocessWorkItemBuffers {
576
/// Adds a new work item to the appropriate buffer.
577
///
578
/// `indexed` specifies whether the work item corresponds to an indexed
579
/// mesh.
580
pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
581
match *self {
582
PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
583
buffer.push(preprocess_work_item);
584
}
585
PreprocessWorkItemBuffers::Indirect {
586
indexed: ref mut indexed_buffer,
587
non_indexed: ref mut non_indexed_buffer,
588
ref mut gpu_occlusion_culling,
589
} => {
590
if indexed {
591
indexed_buffer.push(preprocess_work_item);
592
} else {
593
non_indexed_buffer.push(preprocess_work_item);
594
}
595
596
if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
597
if indexed {
598
gpu_occlusion_culling.late_indexed.add();
599
} else {
600
gpu_occlusion_culling.late_non_indexed.add();
601
}
602
}
603
}
604
}
605
}
606
607
/// Clears out the GPU work item buffers in preparation for a new frame.
608
pub fn clear(&mut self) {
609
match *self {
610
PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
611
buffer.clear();
612
}
613
PreprocessWorkItemBuffers::Indirect {
614
indexed: ref mut indexed_buffer,
615
non_indexed: ref mut non_indexed_buffer,
616
ref mut gpu_occlusion_culling,
617
} => {
618
indexed_buffer.clear();
619
non_indexed_buffer.clear();
620
621
if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
622
gpu_occlusion_culling.late_indexed.clear();
623
gpu_occlusion_culling.late_non_indexed.clear();
624
gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;
625
gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;
626
}
627
}
628
}
629
}
630
}
631
632
/// One invocation of the preprocessing shader: i.e. one mesh instance in a
633
/// view.
634
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
635
#[repr(C)]
636
pub struct PreprocessWorkItem {
637
/// The index of the batch input data in the input buffer that the shader
638
/// reads from.
639
pub input_index: u32,
640
641
/// In direct mode, the index of the mesh uniform; in indirect mode, the
642
/// index of the [`IndirectParametersGpuMetadata`].
643
///
644
/// In indirect mode, this is the index of the
645
/// [`IndirectParametersGpuMetadata`] in the
646
/// `IndirectParametersBuffers::indexed_metadata` or
647
/// `IndirectParametersBuffers::non_indexed_metadata`.
648
pub output_or_indirect_parameters_index: u32,
649
}
650
651
/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
652
///
653
/// This is the variant for indexed meshes. We generate the instances of this
654
/// structure in the `build_indirect_params.wgsl` compute shader.
655
#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
656
#[repr(C)]
657
pub struct IndirectParametersIndexed {
658
/// The number of indices that this mesh has.
659
pub index_count: u32,
660
/// The number of instances we are to draw.
661
pub instance_count: u32,
662
/// The offset of the first index for this mesh in the index buffer slab.
663
pub first_index: u32,
664
/// The offset of the first vertex for this mesh in the vertex buffer slab.
665
pub base_vertex: u32,
666
/// The index of the first mesh instance in the `MeshUniform` buffer.
667
pub first_instance: u32,
668
}
669
670
/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
671
///
672
/// This is the variant for non-indexed meshes. We generate the instances of
673
/// this structure in the `build_indirect_params.wgsl` compute shader.
674
#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
675
#[repr(C)]
676
pub struct IndirectParametersNonIndexed {
677
/// The number of vertices that this mesh has.
678
pub vertex_count: u32,
679
/// The number of instances we are to draw.
680
pub instance_count: u32,
681
/// The offset of the first vertex for this mesh in the vertex buffer slab.
682
pub base_vertex: u32,
683
/// The index of the first mesh instance in the `Mesh` buffer.
684
pub first_instance: u32,
685
}
686
687
/// A structure, initialized on CPU and read on GPU, that contains metadata
688
/// about each batch.
689
///
690
/// Each batch will have one instance of this structure.
691
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
692
#[repr(C)]
693
pub struct IndirectParametersCpuMetadata {
694
/// The index of the first instance of this mesh in the array of
695
/// `MeshUniform`s.
696
///
697
/// Note that this is the *first* output index in this batch. Since each
698
/// instance of this structure refers to arbitrarily many instances, the
699
/// `MeshUniform`s corresponding to this batch span the indices
700
/// `base_output_index..(base_output_index + instance_count)`.
701
pub base_output_index: u32,
702
703
/// The index of the batch set that this batch belongs to in the
704
/// [`IndirectBatchSet`] buffer.
705
///
706
/// A *batch set* is a set of meshes that may be multi-drawn together.
707
/// Multiple batches (and therefore multiple instances of
708
/// [`IndirectParametersGpuMetadata`] structures) can be part of the same
709
/// batch set.
710
pub batch_set_index: u32,
711
}
712
713
/// A structure, written and read GPU, that records how many instances of each
714
/// mesh are actually to be drawn.
715
///
716
/// The GPU mesh preprocessing shader increments the
717
/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
718
/// determines that meshes are visible. The indirect parameter building shader
719
/// reads this metadata in order to construct the indirect draw parameters.
720
///
721
/// Each batch will have one instance of this structure.
722
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
723
#[repr(C)]
724
pub struct IndirectParametersGpuMetadata {
725
/// The index of the first mesh in this batch in the array of
726
/// `MeshInputUniform`s.
727
pub mesh_index: u32,
728
729
/// The number of instances that were judged visible last frame.
730
///
731
/// The CPU sets this value to 0, and the GPU mesh preprocessing shader
732
/// increments it as it culls mesh instances.
733
pub early_instance_count: u32,
734
735
/// The number of instances that have been judged potentially visible this
736
/// frame that weren't in the last frame's potentially visible set.
737
///
738
/// The CPU sets this value to 0, and the GPU mesh preprocessing shader
739
/// increments it as it culls mesh instances.
740
pub late_instance_count: u32,
741
}
742
743
/// A structure, shared between CPU and GPU, that holds the number of on-GPU
744
/// indirect draw commands for each *batch set*.
745
///
746
/// A *batch set* is a set of meshes that may be multi-drawn together.
747
///
748
/// If the current hardware and driver support `multi_draw_indirect_count`, the
749
/// indirect parameters building shader increments
750
/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
751
/// `multi_draw_indirect_count` command reads
752
/// [`Self::indirect_parameters_count`] in order to determine how many commands
753
/// belong to each batch set.
754
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
755
#[repr(C)]
756
pub struct IndirectBatchSet {
757
/// The number of indirect parameter commands (i.e. batches) in this batch
758
/// set.
759
///
760
/// The CPU sets this value to 0 before uploading this structure to GPU. The
761
/// indirect parameters building shader increments this value as it creates
762
/// indirect parameters. Then the `multi_draw_indirect_count` command reads
763
/// this value in order to determine how many indirect draw commands to
764
/// process.
765
pub indirect_parameters_count: u32,
766
767
/// The offset within the `IndirectParametersBuffers::indexed_data` or
768
/// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
769
/// command for this batch set.
770
///
771
/// The CPU fills out this value.
772
pub indirect_parameters_base: u32,
773
}
774
775
/// The buffers containing all the information that indirect draw commands
776
/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
777
///
778
/// In addition to the indirect draw buffers themselves, this structure contains
779
/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
780
/// structures that culling writes to so that the indirect parameter building
781
/// pass can determine how many meshes are actually to be drawn.
782
///
783
/// These buffers will remain empty if indirect drawing isn't in use.
784
#[derive(Resource, Deref, DerefMut)]
785
pub struct IndirectParametersBuffers {
786
/// A mapping from a phase type ID to the indirect parameters buffers for
787
/// that phase.
788
///
789
/// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
790
#[deref]
791
pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
792
/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
793
/// that they can be read back to CPU.
794
///
795
/// This is a debugging feature that may reduce performance. It primarily
796
/// exists for the `occlusion_culling` example.
797
pub allow_copies_from_indirect_parameter_buffers: bool,
798
}
799
800
impl IndirectParametersBuffers {
801
/// Initializes a new [`IndirectParametersBuffers`] resource.
802
pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
803
IndirectParametersBuffers {
804
buffers: TypeIdMap::default(),
805
allow_copies_from_indirect_parameter_buffers,
806
}
807
}
808
}
809
810
/// The buffers containing all the information that indirect draw commands use
811
/// to draw the scene, for a single phase.
812
///
813
/// This is the version of the structure that has a type parameter, so that the
814
/// batching for different phases can run in parallel.
815
///
816
/// See the [`IndirectParametersBuffers`] documentation for more information.
817
#[derive(Resource)]
818
pub struct PhaseIndirectParametersBuffers<PI>
819
where
820
PI: PhaseItem,
821
{
822
/// The indirect draw buffers for the phase.
823
pub buffers: UntypedPhaseIndirectParametersBuffers,
824
phantom: PhantomData<PI>,
825
}
826
827
impl<PI> PhaseIndirectParametersBuffers<PI>
828
where
829
PI: PhaseItem,
830
{
831
pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
832
PhaseIndirectParametersBuffers {
833
buffers: UntypedPhaseIndirectParametersBuffers::new(
834
allow_copies_from_indirect_parameter_buffers,
835
),
836
phantom: PhantomData,
837
}
838
}
839
}
840
841
/// The buffers containing all the information that indirect draw commands use
842
/// to draw the scene, for a single phase.
843
///
844
/// This is the version of the structure that doesn't have a type parameter, so
845
/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
846
///
847
/// See the [`IndirectParametersBuffers`] documentation for more information.
848
pub struct UntypedPhaseIndirectParametersBuffers {
849
/// Information that indirect draw commands use to draw indexed meshes in
850
/// the scene.
851
pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
852
/// Information that indirect draw commands use to draw non-indexed meshes
853
/// in the scene.
854
pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
855
}
856
857
impl UntypedPhaseIndirectParametersBuffers {
858
/// Creates the indirect parameters buffers.
859
pub fn new(
860
allow_copies_from_indirect_parameter_buffers: bool,
861
) -> UntypedPhaseIndirectParametersBuffers {
862
let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
863
if allow_copies_from_indirect_parameter_buffers {
864
indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
865
}
866
867
UntypedPhaseIndirectParametersBuffers {
868
non_indexed: MeshClassIndirectParametersBuffers::new(
869
allow_copies_from_indirect_parameter_buffers,
870
),
871
indexed: MeshClassIndirectParametersBuffers::new(
872
allow_copies_from_indirect_parameter_buffers,
873
),
874
}
875
}
876
877
/// Reserves space for `count` new batches.
878
///
879
/// The `indexed` parameter specifies whether the meshes that these batches
880
/// correspond to are indexed or not.
881
pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
882
if indexed {
883
self.indexed.allocate(count)
884
} else {
885
self.non_indexed.allocate(count)
886
}
887
}
888
889
/// Returns the number of batches currently allocated.
890
///
891
/// The `indexed` parameter specifies whether the meshes that these batches
892
/// correspond to are indexed or not.
893
fn batch_count(&self, indexed: bool) -> usize {
894
if indexed {
895
self.indexed.batch_count()
896
} else {
897
self.non_indexed.batch_count()
898
}
899
}
900
901
/// Returns the number of batch sets currently allocated.
902
///
903
/// The `indexed` parameter specifies whether the meshes that these batch
904
/// sets correspond to are indexed or not.
905
pub fn batch_set_count(&self, indexed: bool) -> usize {
906
if indexed {
907
self.indexed.batch_sets.len()
908
} else {
909
self.non_indexed.batch_sets.len()
910
}
911
}
912
913
/// Adds a new batch set to `Self::indexed_batch_sets` or
914
/// `Self::non_indexed_batch_sets` as appropriate.
915
///
916
/// `indexed` specifies whether the meshes that these batch sets correspond
917
/// to are indexed or not. `indirect_parameters_base` specifies the offset
918
/// within `Self::indexed_data` or `Self::non_indexed_data` of the first
919
/// batch in this batch set.
920
#[inline]
921
pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
922
if indexed {
923
self.indexed.batch_sets.push(IndirectBatchSet {
924
indirect_parameters_base,
925
indirect_parameters_count: 0,
926
});
927
} else {
928
self.non_indexed.batch_sets.push(IndirectBatchSet {
929
indirect_parameters_base,
930
indirect_parameters_count: 0,
931
});
932
}
933
}
934
935
/// Returns the index that a newly-added batch set will have.
936
///
937
/// The `indexed` parameter specifies whether the meshes in such a batch set
938
/// are indexed or not.
939
pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
940
NonMaxU32::new(self.batch_set_count(indexed) as u32)
941
}
942
943
/// Clears out the buffers in preparation for a new frame.
944
pub fn clear(&mut self) {
945
self.indexed.clear();
946
self.non_indexed.clear();
947
}
948
}
949
950
/// The buffers containing all the information that indirect draw commands use
951
/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
952
/// single phase.
953
pub struct MeshClassIndirectParametersBuffers<IP>
954
where
955
IP: Clone + ShaderSize + WriteInto,
956
{
957
/// The GPU buffer that stores the indirect draw parameters for the meshes.
958
///
959
/// The indirect parameters building shader writes to this buffer, while the
960
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
961
/// it to perform the draws.
962
data: UninitBufferVec<IP>,
963
964
/// The GPU buffer that holds the data used to construct indirect draw
965
/// parameters for meshes.
966
///
967
/// The GPU mesh preprocessing shader writes to this buffer, and the
968
/// indirect parameters building shader reads this buffer to construct the
969
/// indirect draw parameters.
970
cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
971
972
/// The GPU buffer that holds data built by the GPU used to construct
973
/// indirect draw parameters for meshes.
974
///
975
/// The GPU mesh preprocessing shader writes to this buffer, and the
976
/// indirect parameters building shader reads this buffer to construct the
977
/// indirect draw parameters.
978
gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
979
980
/// The GPU buffer that holds the number of indirect draw commands for each
981
/// phase of each view, for meshes.
982
///
983
/// The indirect parameters building shader writes to this buffer, and the
984
/// `multi_draw_indirect_count` command reads from it in order to know how
985
/// many indirect draw commands to process.
986
batch_sets: RawBufferVec<IndirectBatchSet>,
987
}
988
989
impl<IP> MeshClassIndirectParametersBuffers<IP>
990
where
991
IP: Clone + ShaderSize + WriteInto,
992
{
993
fn new(
994
allow_copies_from_indirect_parameter_buffers: bool,
995
) -> MeshClassIndirectParametersBuffers<IP> {
996
let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
997
if allow_copies_from_indirect_parameter_buffers {
998
indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
999
}
1000
1001
MeshClassIndirectParametersBuffers {
1002
data: UninitBufferVec::new(indirect_parameter_buffer_usages),
1003
cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
1004
gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
1005
batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
1006
}
1007
}
1008
1009
/// Returns the GPU buffer that stores the indirect draw parameters for
1010
/// indexed meshes.
1011
///
1012
/// The indirect parameters building shader writes to this buffer, while the
1013
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
1014
/// it to perform the draws.
1015
#[inline]
1016
pub fn data_buffer(&self) -> Option<&Buffer> {
1017
self.data.buffer()
1018
}
1019
1020
/// Returns the GPU buffer that holds the CPU-constructed data used to
1021
/// construct indirect draw parameters for meshes.
1022
///
1023
/// The CPU writes to this buffer, and the indirect parameters building
1024
/// shader reads this buffer to construct the indirect draw parameters.
1025
#[inline]
1026
pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
1027
self.cpu_metadata.buffer()
1028
}
1029
1030
/// Returns the GPU buffer that holds the GPU-constructed data used to
1031
/// construct indirect draw parameters for meshes.
1032
///
1033
/// The GPU mesh preprocessing shader writes to this buffer, and the
1034
/// indirect parameters building shader reads this buffer to construct the
1035
/// indirect draw parameters.
1036
#[inline]
1037
pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
1038
self.gpu_metadata.buffer()
1039
}
1040
1041
/// Returns the GPU buffer that holds the number of indirect draw commands
1042
/// for each phase of each view.
1043
///
1044
/// The indirect parameters building shader writes to this buffer, and the
1045
/// `multi_draw_indirect_count` command reads from it in order to know how
1046
/// many indirect draw commands to process.
1047
#[inline]
1048
pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
1049
self.batch_sets.buffer()
1050
}
1051
1052
/// Reserves space for `count` new batches.
1053
///
1054
/// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
1055
/// and [`Self::data`] buffers.
1056
fn allocate(&mut self, count: u32) -> u32 {
1057
let length = self.data.len();
1058
self.cpu_metadata.reserve_internal(count as usize);
1059
self.gpu_metadata.add_multiple(count as usize);
1060
for _ in 0..count {
1061
self.data.add();
1062
self.cpu_metadata
1063
.push(IndirectParametersCpuMetadata::default());
1064
}
1065
length as u32
1066
}
1067
1068
/// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
1069
/// index.
1070
pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
1071
self.cpu_metadata.set(index, value);
1072
}
1073
1074
/// Returns the number of batches corresponding to meshes that are currently
1075
/// allocated.
1076
#[inline]
1077
pub fn batch_count(&self) -> usize {
1078
self.data.len()
1079
}
1080
1081
/// Clears out all the buffers in preparation for a new frame.
1082
pub fn clear(&mut self) {
1083
self.data.clear();
1084
self.cpu_metadata.clear();
1085
self.gpu_metadata.clear();
1086
self.batch_sets.clear();
1087
}
1088
}
1089
1090
impl Default for IndirectParametersBuffers {
1091
fn default() -> Self {
1092
// By default, we don't allow GPU indirect parameter mapping, since
1093
// that's a debugging option.
1094
Self::new(false)
1095
}
1096
}
1097
1098
impl FromWorld for GpuPreprocessingSupport {
1099
fn from_world(world: &mut World) -> Self {
1100
let adapter = world.resource::<RenderAdapter>();
1101
let device = world.resource::<RenderDevice>();
1102
1103
// Filter Android drivers that are incompatible with GPU preprocessing:
1104
// - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
1105
// than 730).
1106
// - We filter out Mali GPUs with driver versions lower than 48.
1107
fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool {
1108
crate::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730)
1109
|| crate::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48)
1110
}
1111
1112
let culling_feature_support = device.features().contains(
1113
Features::INDIRECT_FIRST_INSTANCE
1114
| Features::MULTI_DRAW_INDIRECT
1115
| Features::PUSH_CONSTANTS,
1116
);
1117
// Depth downsampling for occlusion culling requires 12 textures
1118
let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
1119
// Even if the adapter supports compute, we might be simulating a lack of
1120
// compute via device limits (see `WgpuSettingsPriority::WebGL2` and
1121
// `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
1122
// `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
1123
device.limits().max_compute_workgroup_storage_size != 0;
1124
1125
let downlevel_support = adapter
1126
.get_downlevel_capabilities()
1127
.flags
1128
.contains(DownlevelFlags::COMPUTE_SHADERS);
1129
1130
let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info()));
1131
1132
let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
1133
|| is_non_supported_android_device(&adapter_info)
1134
|| adapter_info.backend == wgpu::Backend::Gl
1135
{
1136
info!(
1137
"GPU preprocessing is not supported on this device. \
1138
Falling back to CPU preprocessing.",
1139
);
1140
GpuPreprocessingMode::None
1141
} else if !(culling_feature_support && limit_support && downlevel_support) {
1142
info!("Some GPU preprocessing are limited on this device.");
1143
GpuPreprocessingMode::PreprocessingOnly
1144
} else {
1145
info!("GPU preprocessing is fully supported on this device.");
1146
GpuPreprocessingMode::Culling
1147
};
1148
1149
GpuPreprocessingSupport { max_supported_mode }
1150
}
1151
}
1152
1153
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
1154
where
1155
BD: GpuArrayBufferable + Sync + Send + 'static,
1156
BDI: Pod + Sync + Send + Default + 'static,
1157
{
1158
/// Creates new buffers.
1159
pub fn new() -> Self {
1160
Self::default()
1161
}
1162
1163
/// Clears out the buffers in preparation for a new frame.
1164
pub fn clear(&mut self) {
1165
for phase_instance_buffer in self.phase_instance_buffers.values_mut() {
1166
phase_instance_buffer.clear();
1167
}
1168
}
1169
}
1170
1171
impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
1172
where
1173
BD: GpuArrayBufferable + Sync + Send + 'static,
1174
{
1175
pub fn new() -> Self {
1176
UntypedPhaseBatchedInstanceBuffers {
1177
data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
1178
work_item_buffers: HashMap::default(),
1179
late_indexed_indirect_parameters_buffer: RawBufferVec::new(
1180
BufferUsages::STORAGE | BufferUsages::INDIRECT,
1181
),
1182
late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
1183
BufferUsages::STORAGE | BufferUsages::INDIRECT,
1184
),
1185
}
1186
}
1187
1188
/// Returns the binding of the buffer that contains the per-instance data.
1189
///
1190
/// This buffer needs to be filled in via a compute shader.
1191
pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {
1192
self.data_buffer
1193
.buffer()
1194
.map(|buffer| buffer.as_entire_binding())
1195
}
1196
1197
/// Clears out the buffers in preparation for a new frame.
1198
pub fn clear(&mut self) {
1199
self.data_buffer.clear();
1200
self.late_indexed_indirect_parameters_buffer.clear();
1201
self.late_non_indexed_indirect_parameters_buffer.clear();
1202
1203
// Clear each individual set of buffers, but don't depopulate the hash
1204
// table. We want to avoid reallocating these vectors every frame.
1205
for view_work_item_buffers in self.work_item_buffers.values_mut() {
1206
view_work_item_buffers.clear();
1207
}
1208
}
1209
}
1210
1211
impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
1212
where
1213
BD: GpuArrayBufferable + Sync + Send + 'static,
1214
{
1215
fn default() -> Self {
1216
Self::new()
1217
}
1218
}
1219
1220
/// Information about a render batch that we're building up during a sorted
1221
/// render phase.
1222
struct SortedRenderBatch<F>
1223
where
1224
F: GetBatchData,
1225
{
1226
/// The index of the first phase item in this batch in the list of phase
1227
/// items.
1228
phase_item_start_index: u32,
1229
1230
/// The index of the first instance in this batch in the instance buffer.
1231
instance_start_index: u32,
1232
1233
/// True if the mesh in question has an index buffer; false otherwise.
1234
indexed: bool,
1235
1236
/// The index of the indirect parameters for this batch in the
1237
/// [`IndirectParametersBuffers`].
1238
///
1239
/// If CPU culling is being used, then this will be `None`.
1240
indirect_parameters_index: Option<NonMaxU32>,
1241
1242
/// Metadata that can be used to determine whether an instance can be placed
1243
/// into this batch.
1244
///
1245
/// If `None`, the item inside is unbatchable.
1246
meta: Option<BatchMeta<F::CompareData>>,
1247
}
1248
1249
impl<F> SortedRenderBatch<F>
1250
where
1251
F: GetBatchData,
1252
{
1253
/// Finalizes this batch and updates the [`SortedRenderPhase`] with the
1254
/// appropriate indices.
1255
///
1256
/// `instance_end_index` is the index of the last instance in this batch
1257
/// plus one.
1258
fn flush<I>(
1259
self,
1260
instance_end_index: u32,
1261
phase: &mut SortedRenderPhase<I>,
1262
phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
1263
) where
1264
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1265
{
1266
let (batch_range, batch_extra_index) =
1267
phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
1268
*batch_range = self.instance_start_index..instance_end_index;
1269
*batch_extra_index = match self.indirect_parameters_index {
1270
Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
1271
range: u32::from(indirect_parameters_index)
1272
..(u32::from(indirect_parameters_index) + 1),
1273
batch_set_index: None,
1274
},
1275
None => PhaseItemExtraIndex::None,
1276
};
1277
if let Some(indirect_parameters_index) = self.indirect_parameters_index {
1278
phase_indirect_parameters_buffers
1279
.add_batch_set(self.indexed, indirect_parameters_index.into());
1280
}
1281
}
1282
}
1283
1284
/// A system that runs early in extraction and clears out all the
1285
/// [`BatchedInstanceBuffers`] for the frame.
1286
///
1287
/// We have to run this during extraction because, if GPU preprocessing is in
1288
/// use, the extraction phase will write to the mesh input uniform buffers
1289
/// directly, so the buffers need to be cleared before then.
1290
pub fn clear_batched_gpu_instance_buffers<GFBD>(
1291
gpu_batched_instance_buffers: Option<
1292
ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
1293
>,
1294
) where
1295
GFBD: GetFullBatchData,
1296
{
1297
// Don't clear the entire table, because that would delete the buffers, and
1298
// we want to reuse those allocations.
1299
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
1300
gpu_batched_instance_buffers.clear();
1301
}
1302
}
1303
1304
/// A system that removes GPU preprocessing work item buffers that correspond to
1305
/// deleted [`ExtractedView`]s.
1306
///
1307
/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
1308
/// because [`ExtractedView`]s aren't created until after the extraction phase
1309
/// is completed.
1310
pub fn delete_old_work_item_buffers<GFBD>(
1311
mut gpu_batched_instance_buffers: ResMut<
1312
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1313
>,
1314
extracted_views: Query<&ExtractedView>,
1315
) where
1316
GFBD: GetFullBatchData,
1317
{
1318
let retained_view_entities: HashSet<_> = extracted_views
1319
.iter()
1320
.map(|extracted_view| extracted_view.retained_view_entity)
1321
.collect();
1322
for phase_instance_buffers in gpu_batched_instance_buffers
1323
.phase_instance_buffers
1324
.values_mut()
1325
{
1326
phase_instance_buffers
1327
.work_item_buffers
1328
.retain(|retained_view_entity, _| {
1329
retained_view_entities.contains(retained_view_entity)
1330
});
1331
}
1332
}
1333
1334
/// Batch the items in a sorted render phase, when GPU instance buffer building
1335
/// is in use. This means comparing metadata needed to draw each phase item and
1336
/// trying to combine the draws into a batch.
1337
pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
1338
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,
1339
mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,
1340
mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
1341
mut views: Query<(
1342
&ExtractedView,
1343
Has<NoIndirectDrawing>,
1344
Has<OcclusionCulling>,
1345
)>,
1346
system_param_item: StaticSystemParam<GFBD::Param>,
1347
) where
1348
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1349
GFBD: GetFullBatchData,
1350
{
1351
// We only process GPU-built batch data in this function.
1352
let UntypedPhaseBatchedInstanceBuffers {
1353
ref mut data_buffer,
1354
ref mut work_item_buffers,
1355
ref mut late_indexed_indirect_parameters_buffer,
1356
ref mut late_non_indexed_indirect_parameters_buffer,
1357
} = phase_batched_instance_buffers.buffers;
1358
1359
for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1360
let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1361
continue;
1362
};
1363
1364
// Create the work item buffer if necessary.
1365
let work_item_buffer = get_or_create_work_item_buffer::<I>(
1366
work_item_buffers,
1367
extracted_view.retained_view_entity,
1368
no_indirect_drawing,
1369
gpu_occlusion_culling,
1370
);
1371
1372
// Initialize those work item buffers in preparation for this new frame.
1373
init_work_item_buffers(
1374
work_item_buffer,
1375
late_indexed_indirect_parameters_buffer,
1376
late_non_indexed_indirect_parameters_buffer,
1377
);
1378
1379
// Walk through the list of phase items, building up batches as we go.
1380
let mut batch: Option<SortedRenderBatch<GFBD>> = None;
1381
1382
for current_index in 0..phase.items.len() {
1383
// Get the index of the input data, and comparison metadata, for
1384
// this entity.
1385
let item = &phase.items[current_index];
1386
let entity = item.main_entity();
1387
let item_is_indexed = item.indexed();
1388
let current_batch_input_index =
1389
GFBD::get_index_and_compare_data(&system_param_item, entity);
1390
1391
// Unpack that index and metadata. Note that it's possible for index
1392
// and/or metadata to not be present, which signifies that this
1393
// entity is unbatchable. In that case, we break the batch here.
1394
// If the index isn't present the item is not part of this pipeline and so will be skipped.
1395
let Some((current_input_index, current_meta)) = current_batch_input_index else {
1396
// Break a batch if we need to.
1397
if let Some(batch) = batch.take() {
1398
batch.flush(
1399
data_buffer.len() as u32,
1400
phase,
1401
&mut phase_indirect_parameters_buffers.buffers,
1402
);
1403
}
1404
1405
continue;
1406
};
1407
let current_meta =
1408
current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
1409
1410
// Determine if this entity can be included in the batch we're
1411
// building up.
1412
let can_batch = batch.as_ref().is_some_and(|batch| {
1413
// `None` for metadata indicates that the items are unbatchable.
1414
match (&current_meta, &batch.meta) {
1415
(Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
1416
(_, _) => false,
1417
}
1418
});
1419
1420
// Make space in the data buffer for this instance.
1421
let output_index = data_buffer.add() as u32;
1422
1423
// If we can't batch, break the existing batch and make a new one.
1424
if !can_batch {
1425
// Break a batch if we need to.
1426
if let Some(batch) = batch.take() {
1427
batch.flush(
1428
output_index,
1429
phase,
1430
&mut phase_indirect_parameters_buffers.buffers,
1431
);
1432
}
1433
1434
let indirect_parameters_index = if no_indirect_drawing {
1435
None
1436
} else if item_is_indexed {
1437
Some(
1438
phase_indirect_parameters_buffers
1439
.buffers
1440
.indexed
1441
.allocate(1),
1442
)
1443
} else {
1444
Some(
1445
phase_indirect_parameters_buffers
1446
.buffers
1447
.non_indexed
1448
.allocate(1),
1449
)
1450
};
1451
1452
// Start a new batch.
1453
if let Some(indirect_parameters_index) = indirect_parameters_index {
1454
GFBD::write_batch_indirect_parameters_metadata(
1455
item_is_indexed,
1456
output_index,
1457
None,
1458
&mut phase_indirect_parameters_buffers.buffers,
1459
indirect_parameters_index,
1460
);
1461
};
1462
1463
batch = Some(SortedRenderBatch {
1464
phase_item_start_index: current_index as u32,
1465
instance_start_index: output_index,
1466
indexed: item_is_indexed,
1467
indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
1468
meta: current_meta,
1469
});
1470
}
1471
1472
// Add a new preprocessing work item so that the preprocessing
1473
// shader will copy the per-instance data over.
1474
if let Some(batch) = batch.as_ref() {
1475
work_item_buffer.push(
1476
item_is_indexed,
1477
PreprocessWorkItem {
1478
input_index: current_input_index.into(),
1479
output_or_indirect_parameters_index: match (
1480
no_indirect_drawing,
1481
batch.indirect_parameters_index,
1482
) {
1483
(true, _) => output_index,
1484
(false, Some(indirect_parameters_index)) => {
1485
indirect_parameters_index.into()
1486
}
1487
(false, None) => 0,
1488
},
1489
},
1490
);
1491
}
1492
}
1493
1494
// Flush the final batch if necessary.
1495
if let Some(batch) = batch.take() {
1496
batch.flush(
1497
data_buffer.len() as u32,
1498
phase,
1499
&mut phase_indirect_parameters_buffers.buffers,
1500
);
1501
}
1502
}
1503
}
1504
1505
/// Creates batches for a render phase that uses bins.
1506
pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
1507
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
1508
phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
1509
mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
1510
mut views: Query<
1511
(
1512
&ExtractedView,
1513
Has<NoIndirectDrawing>,
1514
Has<OcclusionCulling>,
1515
),
1516
With<ExtractedView>,
1517
>,
1518
param: StaticSystemParam<GFBD::Param>,
1519
) where
1520
BPI: BinnedPhaseItem,
1521
GFBD: GetFullBatchData,
1522
{
1523
let system_param_item = param.into_inner();
1524
1525
let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();
1526
1527
let UntypedPhaseBatchedInstanceBuffers {
1528
ref mut data_buffer,
1529
ref mut work_item_buffers,
1530
ref mut late_indexed_indirect_parameters_buffer,
1531
ref mut late_non_indexed_indirect_parameters_buffer,
1532
} = phase_batched_instance_buffers.buffers;
1533
1534
for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1535
let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1536
continue;
1537
};
1538
1539
// Create the work item buffer if necessary; otherwise, just mark it as
1540
// used this frame.
1541
let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
1542
work_item_buffers,
1543
extracted_view.retained_view_entity,
1544
no_indirect_drawing,
1545
gpu_occlusion_culling,
1546
);
1547
1548
// Initialize those work item buffers in preparation for this new frame.
1549
init_work_item_buffers(
1550
work_item_buffer,
1551
late_indexed_indirect_parameters_buffer,
1552
late_non_indexed_indirect_parameters_buffer,
1553
);
1554
1555
// Prepare multidrawables.
1556
1557
if let (
1558
&mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),
1559
&mut PreprocessWorkItemBuffers::Indirect {
1560
indexed: ref mut indexed_work_item_buffer,
1561
non_indexed: ref mut non_indexed_work_item_buffer,
1562
gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,
1563
},
1564
) = (&mut phase.batch_sets, &mut *work_item_buffer)
1565
{
1566
let mut output_index = data_buffer.len() as u32;
1567
1568
// Initialize the state for both indexed and non-indexed meshes.
1569
let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1570
MultidrawableBatchSetPreparer::new(
1571
phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,
1572
phase_indirect_parameters_buffers
1573
.buffers
1574
.indexed
1575
.batch_sets
1576
.len() as u32,
1577
);
1578
let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1579
MultidrawableBatchSetPreparer::new(
1580
phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,
1581
phase_indirect_parameters_buffers
1582
.buffers
1583
.non_indexed
1584
.batch_sets
1585
.len() as u32,
1586
);
1587
1588
// Prepare each batch set.
1589
for (batch_set_key, bins) in &phase.multidrawable_meshes {
1590
if batch_set_key.indexed() {
1591
indexed_preparer.prepare_multidrawable_binned_batch_set(
1592
bins,
1593
&mut output_index,
1594
data_buffer,
1595
indexed_work_item_buffer,
1596
&mut phase_indirect_parameters_buffers.buffers.indexed,
1597
batch_sets,
1598
);
1599
} else {
1600
non_indexed_preparer.prepare_multidrawable_binned_batch_set(
1601
bins,
1602
&mut output_index,
1603
data_buffer,
1604
non_indexed_work_item_buffer,
1605
&mut phase_indirect_parameters_buffers.buffers.non_indexed,
1606
batch_sets,
1607
);
1608
}
1609
}
1610
1611
// Reserve space in the occlusion culling buffers, if necessary.
1612
if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {
1613
gpu_occlusion_culling_buffers
1614
.late_indexed
1615
.add_multiple(indexed_preparer.work_item_count);
1616
gpu_occlusion_culling_buffers
1617
.late_non_indexed
1618
.add_multiple(non_indexed_preparer.work_item_count);
1619
}
1620
}
1621
1622
// Prepare batchables.
1623
1624
for (key, bin) in &phase.batchable_meshes {
1625
let mut batch: Option<BinnedRenderPhaseBatch> = None;
1626
for (&main_entity, &input_index) in bin.entities() {
1627
let output_index = data_buffer.add() as u32;
1628
1629
match batch {
1630
Some(ref mut batch) => {
1631
batch.instance_range.end = output_index + 1;
1632
1633
// Append to the current batch.
1634
//
1635
// If we're in indirect mode, then we write the first
1636
// output index of this batch, so that we have a
1637
// tightly-packed buffer if GPU culling discards some of
1638
// the instances. Otherwise, we can just write the
1639
// output index directly.
1640
work_item_buffer.push(
1641
key.0.indexed(),
1642
PreprocessWorkItem {
1643
input_index: *input_index,
1644
output_or_indirect_parameters_index: match (
1645
no_indirect_drawing,
1646
&batch.extra_index,
1647
) {
1648
(true, _) => output_index,
1649
(
1650
false,
1651
PhaseItemExtraIndex::IndirectParametersIndex {
1652
range: indirect_parameters_range,
1653
..
1654
},
1655
) => indirect_parameters_range.start,
1656
(false, &PhaseItemExtraIndex::DynamicOffset(_))
1657
| (false, &PhaseItemExtraIndex::None) => 0,
1658
},
1659
},
1660
);
1661
}
1662
1663
None if !no_indirect_drawing => {
1664
// Start a new batch, in indirect mode.
1665
let indirect_parameters_index = phase_indirect_parameters_buffers
1666
.buffers
1667
.allocate(key.0.indexed(), 1);
1668
let batch_set_index = phase_indirect_parameters_buffers
1669
.buffers
1670
.get_next_batch_set_index(key.0.indexed());
1671
1672
GFBD::write_batch_indirect_parameters_metadata(
1673
key.0.indexed(),
1674
output_index,
1675
batch_set_index,
1676
&mut phase_indirect_parameters_buffers.buffers,
1677
indirect_parameters_index,
1678
);
1679
work_item_buffer.push(
1680
key.0.indexed(),
1681
PreprocessWorkItem {
1682
input_index: *input_index,
1683
output_or_indirect_parameters_index: indirect_parameters_index,
1684
},
1685
);
1686
batch = Some(BinnedRenderPhaseBatch {
1687
representative_entity: (Entity::PLACEHOLDER, main_entity),
1688
instance_range: output_index..output_index + 1,
1689
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1690
range: indirect_parameters_index..(indirect_parameters_index + 1),
1691
batch_set_index: None,
1692
},
1693
});
1694
}
1695
1696
None => {
1697
// Start a new batch, in direct mode.
1698
work_item_buffer.push(
1699
key.0.indexed(),
1700
PreprocessWorkItem {
1701
input_index: *input_index,
1702
output_or_indirect_parameters_index: output_index,
1703
},
1704
);
1705
batch = Some(BinnedRenderPhaseBatch {
1706
representative_entity: (Entity::PLACEHOLDER, main_entity),
1707
instance_range: output_index..output_index + 1,
1708
extra_index: PhaseItemExtraIndex::None,
1709
});
1710
}
1711
}
1712
}
1713
1714
if let Some(batch) = batch {
1715
match phase.batch_sets {
1716
BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {
1717
error!("Dynamic uniform batch sets shouldn't be used here");
1718
}
1719
BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {
1720
vec.push(batch);
1721
}
1722
BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {
1723
// The Bevy renderer will never mark a mesh as batchable
1724
// but not multidrawable if multidraw is in use.
1725
// However, custom render pipelines might do so, such as
1726
// the `specialized_mesh_pipeline` example.
1727
vec.push(BinnedRenderPhaseBatchSet {
1728
first_batch: batch,
1729
batch_count: 1,
1730
bin_key: key.1.clone(),
1731
index: phase_indirect_parameters_buffers
1732
.buffers
1733
.batch_set_count(key.0.indexed())
1734
as u32,
1735
});
1736
}
1737
}
1738
}
1739
}
1740
1741
// Prepare unbatchables.
1742
for (key, unbatchables) in &mut phase.unbatchable_meshes {
1743
// Allocate the indirect parameters if necessary.
1744
let mut indirect_parameters_offset = if no_indirect_drawing {
1745
None
1746
} else if key.0.indexed() {
1747
Some(
1748
phase_indirect_parameters_buffers
1749
.buffers
1750
.indexed
1751
.allocate(unbatchables.entities.len() as u32),
1752
)
1753
} else {
1754
Some(
1755
phase_indirect_parameters_buffers
1756
.buffers
1757
.non_indexed
1758
.allocate(unbatchables.entities.len() as u32),
1759
)
1760
};
1761
1762
for main_entity in unbatchables.entities.keys() {
1763
let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1764
else {
1765
continue;
1766
};
1767
let output_index = data_buffer.add() as u32;
1768
1769
if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
1770
// We're in indirect mode, so add an indirect parameters
1771
// index.
1772
GFBD::write_batch_indirect_parameters_metadata(
1773
key.0.indexed(),
1774
output_index,
1775
None,
1776
&mut phase_indirect_parameters_buffers.buffers,
1777
*indirect_parameters_index,
1778
);
1779
work_item_buffer.push(
1780
key.0.indexed(),
1781
PreprocessWorkItem {
1782
input_index: input_index.into(),
1783
output_or_indirect_parameters_index: *indirect_parameters_index,
1784
},
1785
);
1786
unbatchables
1787
.buffer_indices
1788
.add(UnbatchableBinnedEntityIndices {
1789
instance_index: *indirect_parameters_index,
1790
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1791
range: *indirect_parameters_index..(*indirect_parameters_index + 1),
1792
batch_set_index: None,
1793
},
1794
});
1795
phase_indirect_parameters_buffers
1796
.buffers
1797
.add_batch_set(key.0.indexed(), *indirect_parameters_index);
1798
*indirect_parameters_index += 1;
1799
} else {
1800
work_item_buffer.push(
1801
key.0.indexed(),
1802
PreprocessWorkItem {
1803
input_index: input_index.into(),
1804
output_or_indirect_parameters_index: output_index,
1805
},
1806
);
1807
unbatchables
1808
.buffer_indices
1809
.add(UnbatchableBinnedEntityIndices {
1810
instance_index: output_index,
1811
extra_index: PhaseItemExtraIndex::None,
1812
});
1813
}
1814
}
1815
}
1816
}
1817
}
1818
1819
/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct
1820
/// multidrawable batch sets.
1821
///
1822
/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:
1823
/// one for indexed meshes and one for non-indexed meshes.
1824
struct MultidrawableBatchSetPreparer<BPI, GFBD>
1825
where
1826
BPI: BinnedPhaseItem,
1827
GFBD: GetFullBatchData,
1828
{
1829
/// The offset in the indirect parameters buffer at which the next indirect
1830
/// parameters will be written.
1831
indirect_parameters_index: u32,
1832
/// The number of batch sets we've built so far for this mesh class.
1833
batch_set_index: u32,
1834
/// The number of work items we've emitted so far for this mesh class.
1835
work_item_count: usize,
1836
phantom: PhantomData<(BPI, GFBD)>,
1837
}
1838
1839
impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>
1840
where
1841
BPI: BinnedPhaseItem,
1842
GFBD: GetFullBatchData,
1843
{
1844
/// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing
1845
/// indirect parameters and batch sets at the given indices.
1846
#[inline]
1847
fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {
1848
MultidrawableBatchSetPreparer {
1849
indirect_parameters_index: initial_indirect_parameters_index,
1850
batch_set_index: initial_batch_set_index,
1851
work_item_count: 0,
1852
phantom: PhantomData,
1853
}
1854
}
1855
1856
/// Creates batch sets and writes the GPU data needed to draw all visible
1857
/// entities of one mesh class in the given batch set.
1858
///
1859
/// The *mesh class* represents whether the mesh has indices or not.
1860
#[inline]
1861
fn prepare_multidrawable_binned_batch_set<IP>(
1862
&mut self,
1863
bins: &IndexMap<BPI::BinKey, RenderBin>,
1864
output_index: &mut u32,
1865
data_buffer: &mut UninitBufferVec<GFBD::BufferData>,
1866
indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,
1867
mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,
1868
batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,
1869
) where
1870
IP: Clone + ShaderSize + WriteInto,
1871
{
1872
let current_indexed_batch_set_index = self.batch_set_index;
1873
let current_output_index = *output_index;
1874
1875
let indirect_parameters_base = self.indirect_parameters_index;
1876
1877
// We're going to write the first entity into the batch set. Do this
1878
// here so that we can preload the bin into cache as a side effect.
1879
let Some((first_bin_key, first_bin)) = bins.iter().next() else {
1880
return;
1881
};
1882
let first_bin_len = first_bin.entities().len();
1883
let first_bin_entity = first_bin
1884
.entities()
1885
.keys()
1886
.next()
1887
.copied()
1888
.unwrap_or(MainEntity::from(Entity::PLACEHOLDER));
1889
1890
// Traverse the batch set, processing each bin.
1891
for bin in bins.values() {
1892
// Record the first output index for this batch, as well as its own
1893
// index.
1894
mesh_class_buffers
1895
.cpu_metadata
1896
.push(IndirectParametersCpuMetadata {
1897
base_output_index: *output_index,
1898
batch_set_index: self.batch_set_index,
1899
});
1900
1901
// Traverse the bin, pushing `PreprocessWorkItem`s for each entity
1902
// within it. This is a hot loop, so make it as fast as possible.
1903
for &input_index in bin.entities().values() {
1904
indexed_work_item_buffer.push(PreprocessWorkItem {
1905
input_index: *input_index,
1906
output_or_indirect_parameters_index: self.indirect_parameters_index,
1907
});
1908
}
1909
1910
// Reserve space for the appropriate number of entities in the data
1911
// buffer. Also, advance the output index and work item count.
1912
let bin_entity_count = bin.entities().len();
1913
data_buffer.add_multiple(bin_entity_count);
1914
*output_index += bin_entity_count as u32;
1915
self.work_item_count += bin_entity_count;
1916
1917
self.indirect_parameters_index += 1;
1918
}
1919
1920
// Reserve space for the bins in this batch set in the GPU buffers.
1921
let bin_count = bins.len();
1922
mesh_class_buffers.gpu_metadata.add_multiple(bin_count);
1923
mesh_class_buffers.data.add_multiple(bin_count);
1924
1925
// Write the information the GPU will need about this batch set.
1926
mesh_class_buffers.batch_sets.push(IndirectBatchSet {
1927
indirect_parameters_base,
1928
indirect_parameters_count: 0,
1929
});
1930
1931
self.batch_set_index += 1;
1932
1933
// Record the batch set. The render node later processes this record to
1934
// render the batches.
1935
batch_sets.push(BinnedRenderPhaseBatchSet {
1936
first_batch: BinnedRenderPhaseBatch {
1937
representative_entity: (Entity::PLACEHOLDER, first_bin_entity),
1938
instance_range: current_output_index..(current_output_index + first_bin_len as u32),
1939
extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(
1940
indirect_parameters_base,
1941
)),
1942
},
1943
bin_key: (*first_bin_key).clone(),
1944
batch_count: self.indirect_parameters_index - indirect_parameters_base,
1945
index: current_indexed_batch_set_index,
1946
});
1947
}
1948
}
1949
1950
/// A system that gathers up the per-phase GPU buffers and inserts them into the
1951
/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
1952
///
1953
/// This runs after the [`batch_and_prepare_binned_render_phase`] or
1954
/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
1955
/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
1956
/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
1957
/// [`IndirectParametersBuffers`] tables.
1958
///
1959
/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
1960
/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
1961
/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
1962
/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
1963
/// parallel.
1964
pub fn collect_buffers_for_phase<PI, GFBD>(
1965
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
1966
mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
1967
mut batched_instance_buffers: ResMut<
1968
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1969
>,
1970
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
1971
) where
1972
PI: PhaseItem,
1973
GFBD: GetFullBatchData + Send + Sync + 'static,
1974
{
1975
// Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
1976
// the contents of the per-phase resource with the old batched instance
1977
// buffers in order to reuse allocations.
1978
let untyped_phase_batched_instance_buffers =
1979
mem::take(&mut phase_batched_instance_buffers.buffers);
1980
if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
1981
.phase_instance_buffers
1982
.insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
1983
{
1984
old_untyped_phase_batched_instance_buffers.clear();
1985
phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
1986
}
1987
1988
// Insert the `PhaseIndirectParametersBuffers` into the global table.
1989
// Replace the contents of the per-phase resource with the old indirect
1990
// parameters buffers in order to reuse allocations.
1991
let untyped_phase_indirect_parameters_buffers = mem::replace(
1992
&mut phase_indirect_parameters_buffers.buffers,
1993
UntypedPhaseIndirectParametersBuffers::new(
1994
indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
1995
),
1996
);
1997
if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
1998
.insert(
1999
TypeId::of::<PI>(),
2000
untyped_phase_indirect_parameters_buffers,
2001
)
2002
{
2003
old_untyped_phase_indirect_parameters_buffers.clear();
2004
phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
2005
}
2006
}
2007
2008
/// A system that writes all instance buffers to the GPU.
2009
pub fn write_batched_instance_buffers<GFBD>(
2010
render_device: Res<RenderDevice>,
2011
render_queue: Res<RenderQueue>,
2012
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
2013
) where
2014
GFBD: GetFullBatchData,
2015
{
2016
let BatchedInstanceBuffers {
2017
current_input_buffer,
2018
previous_input_buffer,
2019
phase_instance_buffers,
2020
} = gpu_array_buffer.into_inner();
2021
2022
current_input_buffer
2023
.buffer
2024
.write_buffer(&render_device, &render_queue);
2025
previous_input_buffer
2026
.buffer
2027
.write_buffer(&render_device, &render_queue);
2028
2029
for phase_instance_buffers in phase_instance_buffers.values_mut() {
2030
let UntypedPhaseBatchedInstanceBuffers {
2031
ref mut data_buffer,
2032
ref mut work_item_buffers,
2033
ref mut late_indexed_indirect_parameters_buffer,
2034
ref mut late_non_indexed_indirect_parameters_buffer,
2035
} = *phase_instance_buffers;
2036
2037
data_buffer.write_buffer(&render_device);
2038
late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2039
late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2040
2041
for phase_work_item_buffers in work_item_buffers.values_mut() {
2042
match *phase_work_item_buffers {
2043
PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
2044
buffer_vec.write_buffer(&render_device, &render_queue);
2045
}
2046
PreprocessWorkItemBuffers::Indirect {
2047
ref mut indexed,
2048
ref mut non_indexed,
2049
ref mut gpu_occlusion_culling,
2050
} => {
2051
indexed.write_buffer(&render_device, &render_queue);
2052
non_indexed.write_buffer(&render_device, &render_queue);
2053
2054
if let Some(GpuOcclusionCullingWorkItemBuffers {
2055
ref mut late_indexed,
2056
ref mut late_non_indexed,
2057
late_indirect_parameters_indexed_offset: _,
2058
late_indirect_parameters_non_indexed_offset: _,
2059
}) = *gpu_occlusion_culling
2060
{
2061
if !late_indexed.is_empty() {
2062
late_indexed.write_buffer(&render_device);
2063
}
2064
if !late_non_indexed.is_empty() {
2065
late_non_indexed.write_buffer(&render_device);
2066
}
2067
}
2068
}
2069
}
2070
}
2071
}
2072
}
2073
2074
pub fn clear_indirect_parameters_buffers(
2075
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2076
) {
2077
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2078
phase_indirect_parameters_buffers.clear();
2079
}
2080
}
2081
2082
pub fn write_indirect_parameters_buffers(
2083
render_device: Res<RenderDevice>,
2084
render_queue: Res<RenderQueue>,
2085
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2086
) {
2087
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2088
phase_indirect_parameters_buffers
2089
.indexed
2090
.data
2091
.write_buffer(&render_device);
2092
phase_indirect_parameters_buffers
2093
.non_indexed
2094
.data
2095
.write_buffer(&render_device);
2096
2097
phase_indirect_parameters_buffers
2098
.indexed
2099
.cpu_metadata
2100
.write_buffer(&render_device, &render_queue);
2101
phase_indirect_parameters_buffers
2102
.non_indexed
2103
.cpu_metadata
2104
.write_buffer(&render_device, &render_queue);
2105
2106
phase_indirect_parameters_buffers
2107
.non_indexed
2108
.gpu_metadata
2109
.write_buffer(&render_device);
2110
phase_indirect_parameters_buffers
2111
.indexed
2112
.gpu_metadata
2113
.write_buffer(&render_device);
2114
2115
phase_indirect_parameters_buffers
2116
.indexed
2117
.batch_sets
2118
.write_buffer(&render_device, &render_queue);
2119
phase_indirect_parameters_buffers
2120
.non_indexed
2121
.batch_sets
2122
.write_buffer(&render_device, &render_queue);
2123
}
2124
}
2125
2126
#[cfg(test)]
2127
mod tests {
2128
use super::*;
2129
2130
#[test]
2131
fn instance_buffer_correct_behavior() {
2132
let mut instance_buffer = InstanceInputUniformBuffer::new();
2133
2134
let index = instance_buffer.add(2);
2135
instance_buffer.remove(index);
2136
assert_eq!(instance_buffer.get_unchecked(index), 2);
2137
assert_eq!(instance_buffer.get(index), None);
2138
2139
instance_buffer.add(5);
2140
assert_eq!(instance_buffer.buffer().len(), 1);
2141
}
2142
}
2143
2144