Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bevyengine
GitHub Repository: bevyengine/bevy
Path: blob/main/crates/bevy_render/src/batching/gpu_preprocessing.rs
9325 views
1
//! Batching functionality when GPU preprocessing is in use.
2
3
use core::{any::TypeId, marker::PhantomData, mem};
4
5
use bevy_app::{App, Plugin};
6
use bevy_derive::{Deref, DerefMut};
7
use bevy_ecs::{
8
prelude::Entity,
9
query::{Has, With},
10
resource::Resource,
11
schedule::IntoScheduleConfigs as _,
12
system::{Query, Res, ResMut, StaticSystemParam},
13
world::{FromWorld, World},
14
};
15
use bevy_encase_derive::ShaderType;
16
use bevy_log::{error, info};
17
use bevy_math::UVec4;
18
use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};
19
use bevy_tasks::ComputeTaskPool;
20
use bevy_utils::{default, TypeIdMap};
21
use bytemuck::{Pod, Zeroable};
22
use encase::{internal::WriteInto, ShaderSize};
23
use indexmap::IndexMap;
24
use nonmax::NonMaxU32;
25
use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
26
27
use crate::{
28
occlusion_culling::OcclusionCulling,
29
render_phase::{
30
BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
31
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
32
PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,
33
SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
34
ViewSortedRenderPhases,
35
},
36
render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
37
renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},
38
sync_world::MainEntity,
39
view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
40
Render, RenderApp, RenderDebugFlags, RenderSystems,
41
};
42
43
use super::{BatchMeta, GetBatchData, GetFullBatchData};
44
45
#[derive(Default)]
46
pub struct BatchingPlugin {
47
/// Debugging flags that can optionally be set when constructing the renderer.
48
pub debug_flags: RenderDebugFlags,
49
}
50
51
impl Plugin for BatchingPlugin {
52
fn build(&self, app: &mut App) {
53
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
54
return;
55
};
56
57
render_app
58
.insert_resource(IndirectParametersBuffers::new(
59
self.debug_flags
60
.contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
61
))
62
.add_systems(
63
Render,
64
write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush),
65
)
66
.add_systems(
67
Render,
68
clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews),
69
);
70
}
71
72
fn finish(&self, app: &mut App) {
73
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
74
return;
75
};
76
77
render_app.init_resource::<GpuPreprocessingSupport>();
78
}
79
}
80
81
/// Records whether GPU preprocessing and/or GPU culling are supported on the
82
/// device.
83
///
84
/// No GPU preprocessing is supported on WebGL because of the lack of compute
85
/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a
86
/// `wgpu` limitation] GPU culling is not.
87
///
88
/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
89
#[derive(Clone, Copy, PartialEq, Resource)]
90
pub struct GpuPreprocessingSupport {
91
/// The maximum amount of GPU preprocessing available on this platform.
92
pub max_supported_mode: GpuPreprocessingMode,
93
}
94
95
impl GpuPreprocessingSupport {
96
/// Returns true if this GPU preprocessing support level isn't `None`.
97
#[inline]
98
pub fn is_available(&self) -> bool {
99
self.max_supported_mode != GpuPreprocessingMode::None
100
}
101
102
/// Returns the given GPU preprocessing mode, capped to the current
103
/// preprocessing mode.
104
pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {
105
match (self.max_supported_mode, mode) {
106
(GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {
107
GpuPreprocessingMode::None
108
}
109
(mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,
110
(GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {
111
GpuPreprocessingMode::PreprocessingOnly
112
}
113
}
114
}
115
116
/// Returns true if GPU culling is supported on this platform.
117
pub fn is_culling_supported(&self) -> bool {
118
self.max_supported_mode == GpuPreprocessingMode::Culling
119
}
120
}
121
122
/// The amount of GPU preprocessing (compute and indirect draw) that we do.
123
#[derive(Clone, Copy, PartialEq)]
124
pub enum GpuPreprocessingMode {
125
/// No GPU preprocessing is in use at all.
126
///
127
/// This is used when GPU compute isn't available.
128
None,
129
130
/// GPU preprocessing is in use, but GPU culling isn't.
131
///
132
/// This is used when the [`NoIndirectDrawing`] component is present on the
133
/// camera.
134
PreprocessingOnly,
135
136
/// Both GPU preprocessing and GPU culling are in use.
137
///
138
/// This is used by default.
139
Culling,
140
}
141
142
/// The GPU buffers holding the data needed to render batches.
143
///
144
/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
145
/// `BD` type parameter in that mode.
146
///
147
/// We have a separate *buffer data input* type (`BDI`) here, which a compute
148
/// shader is expected to expand to the full buffer data (`BD`) type. GPU
149
/// uniform building is generally faster and uses less system RAM to VRAM bus
150
/// bandwidth, but only implemented for some pipelines (for example, not in the
151
/// 2D pipeline at present) and only when compute shader is available.
152
#[derive(Resource)]
153
pub struct BatchedInstanceBuffers<BD, BDI>
154
where
155
BD: GpuArrayBufferable + Sync + Send + 'static,
156
BDI: Pod + Default,
157
{
158
/// The uniform data inputs for the current frame.
159
///
160
/// These are uploaded during the extraction phase.
161
pub current_input_buffer: InstanceInputUniformBuffer<BDI>,
162
163
/// The uniform data inputs for the previous frame.
164
///
165
/// The indices don't generally line up between `current_input_buffer`
166
/// and `previous_input_buffer`, because, among other reasons, entities
167
/// can spawn or despawn between frames. Instead, each current buffer
168
/// data input uniform is expected to contain the index of the
169
/// corresponding buffer data input uniform in this list.
170
pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
171
172
/// The data needed to render buffers for each phase.
173
///
174
/// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
175
/// `AlphaMask3d`, etc.
176
pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
177
}
178
179
impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
180
where
181
BD: GpuArrayBufferable + Sync + Send + 'static,
182
BDI: Pod + Sync + Send + Default + 'static,
183
{
184
fn default() -> Self {
185
BatchedInstanceBuffers {
186
current_input_buffer: InstanceInputUniformBuffer::new(),
187
previous_input_buffer: InstanceInputUniformBuffer::new(),
188
phase_instance_buffers: HashMap::default(),
189
}
190
}
191
}
192
193
/// The GPU buffers holding the data needed to render batches for a single
194
/// phase.
195
///
196
/// These are split out per phase so that we can run the phases in parallel.
197
/// This is the version of the structure that has a type parameter, which
198
/// enables Bevy's scheduler to run the batching operations for the different
199
/// phases in parallel.
200
///
201
/// See the documentation for [`BatchedInstanceBuffers`] for more information.
202
#[derive(Resource)]
203
pub struct PhaseBatchedInstanceBuffers<PI, BD>
204
where
205
PI: PhaseItem,
206
BD: GpuArrayBufferable + Sync + Send + 'static,
207
{
208
/// The buffers for this phase.
209
pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
210
phantom: PhantomData<PI>,
211
}
212
213
impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
214
where
215
PI: PhaseItem,
216
BD: GpuArrayBufferable + Sync + Send + 'static,
217
{
218
fn default() -> Self {
219
PhaseBatchedInstanceBuffers {
220
buffers: UntypedPhaseBatchedInstanceBuffers::default(),
221
phantom: PhantomData,
222
}
223
}
224
}
225
226
/// The GPU buffers holding the data needed to render batches for a single
227
/// phase, without a type parameter for that phase.
228
///
229
/// Since this structure doesn't have a type parameter, it can be placed in
230
/// [`BatchedInstanceBuffers::phase_instance_buffers`].
231
pub struct UntypedPhaseBatchedInstanceBuffers<BD>
232
where
233
BD: GpuArrayBufferable + Sync + Send + 'static,
234
{
235
/// A storage area for the buffer data that the GPU compute shader is
236
/// expected to write to.
237
///
238
/// There will be one entry for each index.
239
pub data_buffer: UninitBufferVec<BD>,
240
241
/// The index of the buffer data in the current input buffer that
242
/// corresponds to each instance.
243
///
244
/// This is keyed off each view. Each view has a separate buffer.
245
pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
246
247
/// A buffer that holds the number of indexed meshes that weren't visible in
248
/// the previous frame, when GPU occlusion culling is in use.
249
///
250
/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
251
/// view. Bevy uses this value to determine how many threads to dispatch to
252
/// check meshes that weren't visible next frame to see if they became newly
253
/// visible this frame.
254
pub late_indexed_indirect_parameters_buffer:
255
RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
256
257
/// A buffer that holds the number of non-indexed meshes that weren't
258
/// visible in the previous frame, when GPU occlusion culling is in use.
259
///
260
/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
261
/// view. Bevy uses this value to determine how many threads to dispatch to
262
/// check meshes that weren't visible next frame to see if they became newly
263
/// visible this frame.
264
pub late_non_indexed_indirect_parameters_buffer:
265
RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
266
}
267
268
/// Holds the GPU buffer of instance input data, which is the data about each
269
/// mesh instance that the CPU provides.
270
///
271
/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing
272
/// shader is expected to expand to the full *buffer data* type.
273
pub struct InstanceInputUniformBuffer<BDI>
274
where
275
BDI: Pod + Default,
276
{
277
/// The buffer containing the data that will be uploaded to the GPU.
278
buffer: RawBufferVec<BDI>,
279
280
/// Indices of slots that are free within the buffer.
281
///
282
/// When adding data, we preferentially overwrite these slots first before
283
/// growing the buffer itself.
284
free_uniform_indices: Vec<u32>,
285
}
286
287
impl<BDI> InstanceInputUniformBuffer<BDI>
288
where
289
BDI: Pod + Default,
290
{
291
/// Creates a new, empty buffer.
292
pub fn new() -> InstanceInputUniformBuffer<BDI> {
293
InstanceInputUniformBuffer {
294
buffer: RawBufferVec::new(BufferUsages::STORAGE),
295
free_uniform_indices: vec![],
296
}
297
}
298
299
/// Clears the buffer and entity list out.
300
pub fn clear(&mut self) {
301
self.buffer.clear();
302
self.free_uniform_indices.clear();
303
}
304
305
/// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
306
#[inline]
307
pub fn buffer(&self) -> &RawBufferVec<BDI> {
308
&self.buffer
309
}
310
311
/// Adds a new piece of buffered data to the uniform buffer and returns its
312
/// index.
313
pub fn add(&mut self, element: BDI) -> u32 {
314
match self.free_uniform_indices.pop() {
315
Some(uniform_index) => {
316
self.buffer.values_mut()[uniform_index as usize] = element;
317
uniform_index
318
}
319
None => self.buffer.push(element) as u32,
320
}
321
}
322
323
/// Removes a piece of buffered data from the uniform buffer.
324
///
325
/// This simply marks the data as free.
326
pub fn remove(&mut self, uniform_index: u32) {
327
self.free_uniform_indices.push(uniform_index);
328
}
329
330
/// Returns the piece of buffered data at the given index.
331
///
332
/// Returns [`None`] if the index is out of bounds or the data is removed.
333
pub fn get(&self, uniform_index: u32) -> Option<BDI> {
334
if (uniform_index as usize) >= self.buffer.len()
335
|| self.free_uniform_indices.contains(&uniform_index)
336
{
337
None
338
} else {
339
Some(self.get_unchecked(uniform_index))
340
}
341
}
342
343
/// Returns the piece of buffered data at the given index.
344
/// Can return data that has previously been removed.
345
///
346
/// # Panics
347
/// if `uniform_index` is not in bounds of [`Self::buffer`].
348
pub fn get_unchecked(&self, uniform_index: u32) -> BDI {
349
self.buffer.values()[uniform_index as usize]
350
}
351
352
/// Stores a piece of buffered data at the given index.
353
///
354
/// # Panics
355
/// if `uniform_index` is not in bounds of [`Self::buffer`].
356
pub fn set(&mut self, uniform_index: u32, element: BDI) {
357
self.buffer.values_mut()[uniform_index as usize] = element;
358
}
359
360
// Ensures that the buffers are nonempty, which the GPU requires before an
361
// upload can take place.
362
pub fn ensure_nonempty(&mut self) {
363
if self.buffer.is_empty() {
364
self.buffer.push(default());
365
}
366
}
367
368
/// Returns the number of instances in this buffer.
369
pub fn len(&self) -> usize {
370
self.buffer.len()
371
}
372
373
/// Returns true if this buffer has no instances or false if it contains any
374
/// instances.
375
pub fn is_empty(&self) -> bool {
376
self.buffer.is_empty()
377
}
378
379
/// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
380
/// ready to be uploaded to the GPU.
381
pub fn into_buffer(self) -> RawBufferVec<BDI> {
382
self.buffer
383
}
384
}
385
386
impl<BDI> Default for InstanceInputUniformBuffer<BDI>
387
where
388
BDI: Pod + Default,
389
{
390
fn default() -> Self {
391
Self::new()
392
}
393
}
394
395
/// The buffer of GPU preprocessing work items for a single view.
396
#[cfg_attr(
397
not(target_arch = "wasm32"),
398
expect(
399
clippy::large_enum_variant,
400
reason = "See https://github.com/bevyengine/bevy/issues/19220"
401
)
402
)]
403
pub enum PreprocessWorkItemBuffers {
404
/// The work items we use if we aren't using indirect drawing.
405
///
406
/// Because we don't have to separate indexed from non-indexed meshes in
407
/// direct mode, we only have a single buffer here.
408
Direct(RawBufferVec<PreprocessWorkItem>),
409
410
/// The buffer of work items we use if we are using indirect drawing.
411
///
412
/// We need to separate out indexed meshes from non-indexed meshes in this
413
/// case because the indirect parameters for these two types of meshes have
414
/// different sizes.
415
Indirect {
416
/// The buffer of work items corresponding to indexed meshes.
417
indexed: RawBufferVec<PreprocessWorkItem>,
418
/// The buffer of work items corresponding to non-indexed meshes.
419
non_indexed: RawBufferVec<PreprocessWorkItem>,
420
/// The work item buffers we use when GPU occlusion culling is in use.
421
gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
422
},
423
}
424
425
/// The work item buffers we use when GPU occlusion culling is in use.
426
pub struct GpuOcclusionCullingWorkItemBuffers {
427
/// The buffer of work items corresponding to indexed meshes.
428
pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
429
/// The buffer of work items corresponding to non-indexed meshes.
430
pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
431
/// The offset into the
432
/// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
433
/// where this view's indirect dispatch counts for indexed meshes live.
434
pub late_indirect_parameters_indexed_offset: u32,
435
/// The offset into the
436
/// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
437
/// where this view's indirect dispatch counts for non-indexed meshes live.
438
pub late_indirect_parameters_non_indexed_offset: u32,
439
}
440
441
/// A GPU-side data structure that stores the number of workgroups to dispatch
442
/// for the second phase of GPU occlusion culling.
443
///
444
/// The late mesh preprocessing phase checks meshes that weren't visible frame
445
/// to determine if they're potentially visible this frame.
446
#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
447
#[repr(C)]
448
pub struct LatePreprocessWorkItemIndirectParameters {
449
/// The number of workgroups to dispatch.
450
///
451
/// This will be equal to `work_item_count / 64`, rounded *up*.
452
dispatch_x: u32,
453
/// The number of workgroups along the abstract Y axis to dispatch: always
454
/// 1.
455
dispatch_y: u32,
456
/// The number of workgroups along the abstract Z axis to dispatch: always
457
/// 1.
458
dispatch_z: u32,
459
/// The actual number of work items.
460
///
461
/// The GPU indirect dispatch doesn't read this, but it's used internally to
462
/// determine the actual number of work items that exist in the late
463
/// preprocessing work item buffer.
464
work_item_count: u32,
465
/// Padding to 64-byte boundaries for some hardware.
466
pad: UVec4,
467
}
468
469
impl Default for LatePreprocessWorkItemIndirectParameters {
470
fn default() -> LatePreprocessWorkItemIndirectParameters {
471
LatePreprocessWorkItemIndirectParameters {
472
dispatch_x: 0,
473
dispatch_y: 1,
474
dispatch_z: 1,
475
work_item_count: 0,
476
pad: default(),
477
}
478
}
479
}
480
481
/// Returns the set of work item buffers for the given view, first creating it
482
/// if necessary.
483
///
484
/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
485
/// which meshes are to be drawn.
486
///
487
/// You may need to call this function if you're implementing your own custom
488
/// render phases. See the `specialized_mesh_pipeline` example.
489
pub fn get_or_create_work_item_buffer<'a, I>(
490
work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
491
view: RetainedViewEntity,
492
no_indirect_drawing: bool,
493
enable_gpu_occlusion_culling: bool,
494
) -> &'a mut PreprocessWorkItemBuffers
495
where
496
I: 'static,
497
{
498
let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
499
Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
500
Entry::Vacant(vacant_entry) => {
501
if no_indirect_drawing {
502
vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
503
BufferUsages::STORAGE,
504
)))
505
} else {
506
vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
507
indexed: RawBufferVec::new(BufferUsages::STORAGE),
508
non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
509
// We fill this in below if `enable_gpu_occlusion_culling`
510
// is set.
511
gpu_occlusion_culling: None,
512
})
513
}
514
}
515
};
516
517
// Initialize the GPU occlusion culling buffers if necessary.
518
if let PreprocessWorkItemBuffers::Indirect {
519
ref mut gpu_occlusion_culling,
520
..
521
} = *preprocess_work_item_buffers
522
{
523
match (
524
enable_gpu_occlusion_culling,
525
gpu_occlusion_culling.is_some(),
526
) {
527
(false, false) | (true, true) => {}
528
(false, true) => {
529
*gpu_occlusion_culling = None;
530
}
531
(true, false) => {
532
*gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {
533
late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
534
late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
535
late_indirect_parameters_indexed_offset: 0,
536
late_indirect_parameters_non_indexed_offset: 0,
537
});
538
}
539
}
540
}
541
542
preprocess_work_item_buffers
543
}
544
545
/// Initializes work item buffers for a phase in preparation for a new frame.
546
pub fn init_work_item_buffers(
547
work_item_buffers: &mut PreprocessWorkItemBuffers,
548
late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
549
LatePreprocessWorkItemIndirectParameters,
550
>,
551
late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
552
LatePreprocessWorkItemIndirectParameters,
553
>,
554
) {
555
// Add the offsets for indirect parameters that the late phase of mesh
556
// preprocessing writes to.
557
if let PreprocessWorkItemBuffers::Indirect {
558
gpu_occlusion_culling:
559
Some(GpuOcclusionCullingWorkItemBuffers {
560
ref mut late_indirect_parameters_indexed_offset,
561
ref mut late_indirect_parameters_non_indexed_offset,
562
..
563
}),
564
..
565
} = *work_item_buffers
566
{
567
*late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer
568
.push(LatePreprocessWorkItemIndirectParameters::default())
569
as u32;
570
*late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer
571
.push(LatePreprocessWorkItemIndirectParameters::default())
572
as u32;
573
}
574
}
575
576
impl PreprocessWorkItemBuffers {
577
/// Adds a new work item to the appropriate buffer.
578
///
579
/// `indexed` specifies whether the work item corresponds to an indexed
580
/// mesh.
581
pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
582
match *self {
583
PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
584
buffer.push(preprocess_work_item);
585
}
586
PreprocessWorkItemBuffers::Indirect {
587
indexed: ref mut indexed_buffer,
588
non_indexed: ref mut non_indexed_buffer,
589
ref mut gpu_occlusion_culling,
590
} => {
591
if indexed {
592
indexed_buffer.push(preprocess_work_item);
593
} else {
594
non_indexed_buffer.push(preprocess_work_item);
595
}
596
597
if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
598
if indexed {
599
gpu_occlusion_culling.late_indexed.add();
600
} else {
601
gpu_occlusion_culling.late_non_indexed.add();
602
}
603
}
604
}
605
}
606
}
607
608
/// Clears out the GPU work item buffers in preparation for a new frame.
609
pub fn clear(&mut self) {
610
match *self {
611
PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
612
buffer.clear();
613
}
614
PreprocessWorkItemBuffers::Indirect {
615
indexed: ref mut indexed_buffer,
616
non_indexed: ref mut non_indexed_buffer,
617
ref mut gpu_occlusion_culling,
618
} => {
619
indexed_buffer.clear();
620
non_indexed_buffer.clear();
621
622
if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
623
gpu_occlusion_culling.late_indexed.clear();
624
gpu_occlusion_culling.late_non_indexed.clear();
625
gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;
626
gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;
627
}
628
}
629
}
630
}
631
}
632
633
/// One invocation of the preprocessing shader: i.e. one mesh instance in a
634
/// view.
635
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
636
#[repr(C)]
637
pub struct PreprocessWorkItem {
638
/// The index of the batch input data in the input buffer that the shader
639
/// reads from.
640
pub input_index: u32,
641
642
/// In direct mode, the index of the mesh uniform; in indirect mode, the
643
/// index of the [`IndirectParametersGpuMetadata`].
644
///
645
/// In indirect mode, this is the index of the
646
/// [`IndirectParametersGpuMetadata`] in the
647
/// `IndirectParametersBuffers::indexed_metadata` or
648
/// `IndirectParametersBuffers::non_indexed_metadata`.
649
pub output_or_indirect_parameters_index: u32,
650
}
651
652
/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
653
///
654
/// This is the variant for indexed meshes. We generate the instances of this
655
/// structure in the `build_indirect_params.wgsl` compute shader.
656
#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
657
#[repr(C)]
658
pub struct IndirectParametersIndexed {
659
/// The number of indices that this mesh has.
660
pub index_count: u32,
661
/// The number of instances we are to draw.
662
pub instance_count: u32,
663
/// The offset of the first index for this mesh in the index buffer slab.
664
pub first_index: u32,
665
/// The offset of the first vertex for this mesh in the vertex buffer slab.
666
pub base_vertex: u32,
667
/// The index of the first mesh instance in the `MeshUniform` buffer.
668
pub first_instance: u32,
669
}
670
671
/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
672
///
673
/// This is the variant for non-indexed meshes. We generate the instances of
674
/// this structure in the `build_indirect_params.wgsl` compute shader.
675
#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
676
#[repr(C)]
677
pub struct IndirectParametersNonIndexed {
678
/// The number of vertices that this mesh has.
679
pub vertex_count: u32,
680
/// The number of instances we are to draw.
681
pub instance_count: u32,
682
/// The offset of the first vertex for this mesh in the vertex buffer slab.
683
pub base_vertex: u32,
684
/// The index of the first mesh instance in the `Mesh` buffer.
685
pub first_instance: u32,
686
}
687
688
/// A structure, initialized on CPU and read on GPU, that contains metadata
689
/// about each batch.
690
///
691
/// Each batch will have one instance of this structure.
692
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
693
#[repr(C)]
694
pub struct IndirectParametersCpuMetadata {
695
/// The index of the first instance of this mesh in the array of
696
/// `MeshUniform`s.
697
///
698
/// Note that this is the *first* output index in this batch. Since each
699
/// instance of this structure refers to arbitrarily many instances, the
700
/// `MeshUniform`s corresponding to this batch span the indices
701
/// `base_output_index..(base_output_index + instance_count)`.
702
pub base_output_index: u32,
703
704
/// The index of the batch set that this batch belongs to in the
705
/// [`IndirectBatchSet`] buffer.
706
///
707
/// A *batch set* is a set of meshes that may be multi-drawn together.
708
/// Multiple batches (and therefore multiple instances of
709
/// [`IndirectParametersGpuMetadata`] structures) can be part of the same
710
/// batch set.
711
pub batch_set_index: u32,
712
}
713
714
/// A structure, written and read GPU, that records how many instances of each
715
/// mesh are actually to be drawn.
716
///
717
/// The GPU mesh preprocessing shader increments the
718
/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
719
/// determines that meshes are visible. The indirect parameter building shader
720
/// reads this metadata in order to construct the indirect draw parameters.
721
///
722
/// Each batch will have one instance of this structure.
723
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
724
#[repr(C)]
725
pub struct IndirectParametersGpuMetadata {
726
/// The index of the first mesh in this batch in the array of
727
/// `MeshInputUniform`s.
728
pub mesh_index: u32,
729
730
/// The number of instances that were judged visible last frame.
731
///
732
/// The CPU sets this value to 0, and the GPU mesh preprocessing shader
733
/// increments it as it culls mesh instances.
734
pub early_instance_count: u32,
735
736
/// The number of instances that have been judged potentially visible this
737
/// frame that weren't in the last frame's potentially visible set.
738
///
739
/// The CPU sets this value to 0, and the GPU mesh preprocessing shader
740
/// increments it as it culls mesh instances.
741
pub late_instance_count: u32,
742
}
743
744
/// A structure, shared between CPU and GPU, that holds the number of on-GPU
745
/// indirect draw commands for each *batch set*.
746
///
747
/// A *batch set* is a set of meshes that may be multi-drawn together.
748
///
749
/// If the current hardware and driver support `multi_draw_indirect_count`, the
750
/// indirect parameters building shader increments
751
/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
752
/// `multi_draw_indirect_count` command reads
753
/// [`Self::indirect_parameters_count`] in order to determine how many commands
754
/// belong to each batch set.
755
#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
756
#[repr(C)]
757
pub struct IndirectBatchSet {
758
/// The number of indirect parameter commands (i.e. batches) in this batch
759
/// set.
760
///
761
/// The CPU sets this value to 0 before uploading this structure to GPU. The
762
/// indirect parameters building shader increments this value as it creates
763
/// indirect parameters. Then the `multi_draw_indirect_count` command reads
764
/// this value in order to determine how many indirect draw commands to
765
/// process.
766
pub indirect_parameters_count: u32,
767
768
/// The offset within the `IndirectParametersBuffers::indexed_data` or
769
/// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
770
/// command for this batch set.
771
///
772
/// The CPU fills out this value.
773
pub indirect_parameters_base: u32,
774
}
775
776
/// The buffers containing all the information that indirect draw commands
777
/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
778
///
779
/// In addition to the indirect draw buffers themselves, this structure contains
780
/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
781
/// structures that culling writes to so that the indirect parameter building
782
/// pass can determine how many meshes are actually to be drawn.
783
///
784
/// These buffers will remain empty if indirect drawing isn't in use.
785
#[derive(Resource, Deref, DerefMut)]
786
pub struct IndirectParametersBuffers {
787
/// A mapping from a phase type ID to the indirect parameters buffers for
788
/// that phase.
789
///
790
/// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
791
#[deref]
792
pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
793
/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
794
/// that they can be read back to CPU.
795
///
796
/// This is a debugging feature that may reduce performance. It primarily
797
/// exists for the `occlusion_culling` example.
798
pub allow_copies_from_indirect_parameter_buffers: bool,
799
}
800
801
impl IndirectParametersBuffers {
802
/// Initializes a new [`IndirectParametersBuffers`] resource.
803
pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
804
IndirectParametersBuffers {
805
buffers: TypeIdMap::default(),
806
allow_copies_from_indirect_parameter_buffers,
807
}
808
}
809
}
810
811
/// The buffers containing all the information that indirect draw commands use
812
/// to draw the scene, for a single phase.
813
///
814
/// This is the version of the structure that has a type parameter, so that the
815
/// batching for different phases can run in parallel.
816
///
817
/// See the [`IndirectParametersBuffers`] documentation for more information.
818
#[derive(Resource)]
819
pub struct PhaseIndirectParametersBuffers<PI>
820
where
821
PI: PhaseItem,
822
{
823
/// The indirect draw buffers for the phase.
824
pub buffers: UntypedPhaseIndirectParametersBuffers,
825
phantom: PhantomData<PI>,
826
}
827
828
impl<PI> PhaseIndirectParametersBuffers<PI>
829
where
830
PI: PhaseItem,
831
{
832
pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
833
PhaseIndirectParametersBuffers {
834
buffers: UntypedPhaseIndirectParametersBuffers::new(
835
allow_copies_from_indirect_parameter_buffers,
836
),
837
phantom: PhantomData,
838
}
839
}
840
}
841
842
/// The buffers containing all the information that indirect draw commands use
843
/// to draw the scene, for a single phase.
844
///
845
/// This is the version of the structure that doesn't have a type parameter, so
846
/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
847
///
848
/// See the [`IndirectParametersBuffers`] documentation for more information.
849
pub struct UntypedPhaseIndirectParametersBuffers {
850
/// Information that indirect draw commands use to draw indexed meshes in
851
/// the scene.
852
pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
853
/// Information that indirect draw commands use to draw non-indexed meshes
854
/// in the scene.
855
pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
856
}
857
858
impl UntypedPhaseIndirectParametersBuffers {
859
/// Creates the indirect parameters buffers.
860
pub fn new(
861
allow_copies_from_indirect_parameter_buffers: bool,
862
) -> UntypedPhaseIndirectParametersBuffers {
863
UntypedPhaseIndirectParametersBuffers {
864
non_indexed: MeshClassIndirectParametersBuffers::new(
865
allow_copies_from_indirect_parameter_buffers,
866
),
867
indexed: MeshClassIndirectParametersBuffers::new(
868
allow_copies_from_indirect_parameter_buffers,
869
),
870
}
871
}
872
873
/// Reserves space for `count` new batches.
874
///
875
/// The `indexed` parameter specifies whether the meshes that these batches
876
/// correspond to are indexed or not.
877
pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
878
if indexed {
879
self.indexed.allocate(count)
880
} else {
881
self.non_indexed.allocate(count)
882
}
883
}
884
885
/// Returns the number of batches currently allocated.
886
///
887
/// The `indexed` parameter specifies whether the meshes that these batches
888
/// correspond to are indexed or not.
889
fn batch_count(&self, indexed: bool) -> usize {
890
if indexed {
891
self.indexed.batch_count()
892
} else {
893
self.non_indexed.batch_count()
894
}
895
}
896
897
/// Returns the number of batch sets currently allocated.
898
///
899
/// The `indexed` parameter specifies whether the meshes that these batch
900
/// sets correspond to are indexed or not.
901
pub fn batch_set_count(&self, indexed: bool) -> usize {
902
if indexed {
903
self.indexed.batch_sets.len()
904
} else {
905
self.non_indexed.batch_sets.len()
906
}
907
}
908
909
/// Adds a new batch set to `Self::indexed_batch_sets` or
910
/// `Self::non_indexed_batch_sets` as appropriate.
911
///
912
/// `indexed` specifies whether the meshes that these batch sets correspond
913
/// to are indexed or not. `indirect_parameters_base` specifies the offset
914
/// within `Self::indexed_data` or `Self::non_indexed_data` of the first
915
/// batch in this batch set.
916
#[inline]
917
pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
918
if indexed {
919
self.indexed.batch_sets.push(IndirectBatchSet {
920
indirect_parameters_base,
921
indirect_parameters_count: 0,
922
});
923
} else {
924
self.non_indexed.batch_sets.push(IndirectBatchSet {
925
indirect_parameters_base,
926
indirect_parameters_count: 0,
927
});
928
}
929
}
930
931
/// Returns the index that a newly-added batch set will have.
932
///
933
/// The `indexed` parameter specifies whether the meshes in such a batch set
934
/// are indexed or not.
935
pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
936
NonMaxU32::new(self.batch_set_count(indexed) as u32)
937
}
938
939
/// Clears out the buffers in preparation for a new frame.
940
pub fn clear(&mut self) {
941
self.indexed.clear();
942
self.non_indexed.clear();
943
}
944
}
945
946
/// The buffers containing all the information that indirect draw commands use
947
/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
948
/// single phase.
949
pub struct MeshClassIndirectParametersBuffers<IP>
950
where
951
IP: Clone + ShaderSize + WriteInto,
952
{
953
/// The GPU buffer that stores the indirect draw parameters for the meshes.
954
///
955
/// The indirect parameters building shader writes to this buffer, while the
956
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
957
/// it to perform the draws.
958
data: UninitBufferVec<IP>,
959
960
/// The GPU buffer that holds the data used to construct indirect draw
961
/// parameters for meshes.
962
///
963
/// The GPU mesh preprocessing shader writes to this buffer, and the
964
/// indirect parameters building shader reads this buffer to construct the
965
/// indirect draw parameters.
966
cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
967
968
/// The GPU buffer that holds data built by the GPU used to construct
969
/// indirect draw parameters for meshes.
970
///
971
/// The GPU mesh preprocessing shader writes to this buffer, and the
972
/// indirect parameters building shader reads this buffer to construct the
973
/// indirect draw parameters.
974
gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
975
976
/// The GPU buffer that holds the number of indirect draw commands for each
977
/// phase of each view, for meshes.
978
///
979
/// The indirect parameters building shader writes to this buffer, and the
980
/// `multi_draw_indirect_count` command reads from it in order to know how
981
/// many indirect draw commands to process.
982
batch_sets: RawBufferVec<IndirectBatchSet>,
983
}
984
985
impl<IP> MeshClassIndirectParametersBuffers<IP>
986
where
987
IP: Clone + ShaderSize + WriteInto,
988
{
989
fn new(
990
allow_copies_from_indirect_parameter_buffers: bool,
991
) -> MeshClassIndirectParametersBuffers<IP> {
992
let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
993
if allow_copies_from_indirect_parameter_buffers {
994
indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
995
}
996
997
MeshClassIndirectParametersBuffers {
998
data: UninitBufferVec::new(indirect_parameter_buffer_usages),
999
cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
1000
gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
1001
batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
1002
}
1003
}
1004
1005
/// Returns the GPU buffer that stores the indirect draw parameters for
1006
/// indexed meshes.
1007
///
1008
/// The indirect parameters building shader writes to this buffer, while the
1009
/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
1010
/// it to perform the draws.
1011
#[inline]
1012
pub fn data_buffer(&self) -> Option<&Buffer> {
1013
self.data.buffer()
1014
}
1015
1016
/// Returns the GPU buffer that holds the CPU-constructed data used to
1017
/// construct indirect draw parameters for meshes.
1018
///
1019
/// The CPU writes to this buffer, and the indirect parameters building
1020
/// shader reads this buffer to construct the indirect draw parameters.
1021
#[inline]
1022
pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
1023
self.cpu_metadata.buffer()
1024
}
1025
1026
/// Returns the GPU buffer that holds the GPU-constructed data used to
1027
/// construct indirect draw parameters for meshes.
1028
///
1029
/// The GPU mesh preprocessing shader writes to this buffer, and the
1030
/// indirect parameters building shader reads this buffer to construct the
1031
/// indirect draw parameters.
1032
#[inline]
1033
pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
1034
self.gpu_metadata.buffer()
1035
}
1036
1037
/// Returns the GPU buffer that holds the number of indirect draw commands
1038
/// for each phase of each view.
1039
///
1040
/// The indirect parameters building shader writes to this buffer, and the
1041
/// `multi_draw_indirect_count` command reads from it in order to know how
1042
/// many indirect draw commands to process.
1043
#[inline]
1044
pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
1045
self.batch_sets.buffer()
1046
}
1047
1048
/// Reserves space for `count` new batches.
1049
///
1050
/// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
1051
/// and [`Self::data`] buffers.
1052
fn allocate(&mut self, count: u32) -> u32 {
1053
let length = self.data.len();
1054
self.cpu_metadata.reserve_internal(count as usize);
1055
self.gpu_metadata.add_multiple(count as usize);
1056
for _ in 0..count {
1057
self.data.add();
1058
self.cpu_metadata
1059
.push(IndirectParametersCpuMetadata::default());
1060
}
1061
length as u32
1062
}
1063
1064
/// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
1065
/// index.
1066
pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
1067
self.cpu_metadata.set(index, value);
1068
}
1069
1070
/// Returns the number of batches corresponding to meshes that are currently
1071
/// allocated.
1072
#[inline]
1073
pub fn batch_count(&self) -> usize {
1074
self.data.len()
1075
}
1076
1077
/// Clears out all the buffers in preparation for a new frame.
1078
pub fn clear(&mut self) {
1079
self.data.clear();
1080
self.cpu_metadata.clear();
1081
self.gpu_metadata.clear();
1082
self.batch_sets.clear();
1083
}
1084
}
1085
1086
impl Default for IndirectParametersBuffers {
1087
fn default() -> Self {
1088
// By default, we don't allow GPU indirect parameter mapping, since
1089
// that's a debugging option.
1090
Self::new(false)
1091
}
1092
}
1093
1094
impl FromWorld for GpuPreprocessingSupport {
1095
fn from_world(world: &mut World) -> Self {
1096
let adapter = world.resource::<RenderAdapter>();
1097
let device = world.resource::<RenderDevice>();
1098
1099
// Filter Android drivers that are incompatible with GPU preprocessing:
1100
// - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
1101
// than 730).
1102
// - We filter out Mali GPUs with driver versions lower than 48.
1103
fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool {
1104
crate::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730)
1105
|| crate::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48)
1106
}
1107
1108
let culling_feature_support = device
1109
.features()
1110
.contains(Features::INDIRECT_FIRST_INSTANCE | Features::IMMEDIATES);
1111
// Depth downsampling for occlusion culling requires 12 textures
1112
let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
1113
// Even if the adapter supports compute, we might be simulating a lack of
1114
// compute via device limits (see `WgpuSettingsPriority::WebGL2` and
1115
// `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
1116
// `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
1117
device.limits().max_compute_workgroup_storage_size != 0;
1118
1119
let downlevel_support = adapter
1120
.get_downlevel_capabilities()
1121
.flags
1122
.contains(DownlevelFlags::COMPUTE_SHADERS);
1123
1124
let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info()));
1125
1126
let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
1127
|| is_non_supported_android_device(&adapter_info)
1128
|| adapter_info.backend == wgpu::Backend::Gl
1129
{
1130
info!(
1131
"GPU preprocessing is not supported on this device. \
1132
Falling back to CPU preprocessing.",
1133
);
1134
GpuPreprocessingMode::None
1135
} else if !(culling_feature_support && limit_support && downlevel_support) {
1136
info!("Some GPU preprocessing are limited on this device.");
1137
GpuPreprocessingMode::PreprocessingOnly
1138
} else {
1139
info!("GPU preprocessing is fully supported on this device.");
1140
GpuPreprocessingMode::Culling
1141
};
1142
1143
GpuPreprocessingSupport { max_supported_mode }
1144
}
1145
}
1146
1147
impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
1148
where
1149
BD: GpuArrayBufferable + Sync + Send + 'static,
1150
BDI: Pod + Sync + Send + Default + 'static,
1151
{
1152
/// Creates new buffers.
1153
pub fn new() -> Self {
1154
Self::default()
1155
}
1156
1157
/// Clears out the buffers in preparation for a new frame.
1158
pub fn clear(&mut self) {
1159
for phase_instance_buffer in self.phase_instance_buffers.values_mut() {
1160
phase_instance_buffer.clear();
1161
}
1162
}
1163
}
1164
1165
impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
1166
where
1167
BD: GpuArrayBufferable + Sync + Send + 'static,
1168
{
1169
pub fn new() -> Self {
1170
UntypedPhaseBatchedInstanceBuffers {
1171
data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
1172
work_item_buffers: HashMap::default(),
1173
late_indexed_indirect_parameters_buffer: RawBufferVec::new(
1174
BufferUsages::STORAGE | BufferUsages::INDIRECT,
1175
),
1176
late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
1177
BufferUsages::STORAGE | BufferUsages::INDIRECT,
1178
),
1179
}
1180
}
1181
1182
/// Returns the binding of the buffer that contains the per-instance data.
1183
///
1184
/// This buffer needs to be filled in via a compute shader.
1185
pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {
1186
self.data_buffer
1187
.buffer()
1188
.map(|buffer| buffer.as_entire_binding())
1189
}
1190
1191
/// Clears out the buffers in preparation for a new frame.
1192
pub fn clear(&mut self) {
1193
self.data_buffer.clear();
1194
self.late_indexed_indirect_parameters_buffer.clear();
1195
self.late_non_indexed_indirect_parameters_buffer.clear();
1196
1197
// Clear each individual set of buffers, but don't depopulate the hash
1198
// table. We want to avoid reallocating these vectors every frame.
1199
for view_work_item_buffers in self.work_item_buffers.values_mut() {
1200
view_work_item_buffers.clear();
1201
}
1202
}
1203
}
1204
1205
impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
1206
where
1207
BD: GpuArrayBufferable + Sync + Send + 'static,
1208
{
1209
fn default() -> Self {
1210
Self::new()
1211
}
1212
}
1213
1214
/// Information about a render batch that we're building up during a sorted
1215
/// render phase.
1216
struct SortedRenderBatch<F>
1217
where
1218
F: GetBatchData,
1219
{
1220
/// The index of the first phase item in this batch in the list of phase
1221
/// items.
1222
phase_item_start_index: u32,
1223
1224
/// The index of the first instance in this batch in the instance buffer.
1225
instance_start_index: u32,
1226
1227
/// True if the mesh in question has an index buffer; false otherwise.
1228
indexed: bool,
1229
1230
/// The index of the indirect parameters for this batch in the
1231
/// [`IndirectParametersBuffers`].
1232
///
1233
/// If CPU culling is being used, then this will be `None`.
1234
indirect_parameters_index: Option<NonMaxU32>,
1235
1236
/// Metadata that can be used to determine whether an instance can be placed
1237
/// into this batch.
1238
///
1239
/// If `None`, the item inside is unbatchable.
1240
meta: Option<BatchMeta<F::CompareData>>,
1241
}
1242
1243
impl<F> SortedRenderBatch<F>
1244
where
1245
F: GetBatchData,
1246
{
1247
/// Finalizes this batch and updates the [`SortedRenderPhase`] with the
1248
/// appropriate indices.
1249
///
1250
/// `instance_end_index` is the index of the last instance in this batch
1251
/// plus one.
1252
fn flush<I>(
1253
self,
1254
instance_end_index: u32,
1255
phase: &mut SortedRenderPhase<I>,
1256
phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
1257
) where
1258
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1259
{
1260
let (batch_range, batch_extra_index) =
1261
phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
1262
*batch_range = self.instance_start_index..instance_end_index;
1263
*batch_extra_index = match self.indirect_parameters_index {
1264
Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
1265
range: u32::from(indirect_parameters_index)
1266
..(u32::from(indirect_parameters_index) + 1),
1267
batch_set_index: None,
1268
},
1269
None => PhaseItemExtraIndex::None,
1270
};
1271
if let Some(indirect_parameters_index) = self.indirect_parameters_index {
1272
phase_indirect_parameters_buffers
1273
.add_batch_set(self.indexed, indirect_parameters_index.into());
1274
}
1275
}
1276
}
1277
1278
/// A system that runs early in extraction and clears out all the
1279
/// [`BatchedInstanceBuffers`] for the frame.
1280
///
1281
/// We have to run this during extraction because, if GPU preprocessing is in
1282
/// use, the extraction phase will write to the mesh input uniform buffers
1283
/// directly, so the buffers need to be cleared before then.
1284
pub fn clear_batched_gpu_instance_buffers<GFBD>(
1285
gpu_batched_instance_buffers: Option<
1286
ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
1287
>,
1288
) where
1289
GFBD: GetFullBatchData,
1290
{
1291
// Don't clear the entire table, because that would delete the buffers, and
1292
// we want to reuse those allocations.
1293
if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
1294
gpu_batched_instance_buffers.clear();
1295
}
1296
}
1297
1298
/// A system that removes GPU preprocessing work item buffers that correspond to
1299
/// deleted [`ExtractedView`]s.
1300
///
1301
/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
1302
/// because [`ExtractedView`]s aren't created until after the extraction phase
1303
/// is completed.
1304
pub fn delete_old_work_item_buffers<GFBD>(
1305
mut gpu_batched_instance_buffers: ResMut<
1306
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1307
>,
1308
extracted_views: Query<&ExtractedView>,
1309
) where
1310
GFBD: GetFullBatchData,
1311
{
1312
let retained_view_entities: HashSet<_> = extracted_views
1313
.iter()
1314
.map(|extracted_view| extracted_view.retained_view_entity)
1315
.collect();
1316
for phase_instance_buffers in gpu_batched_instance_buffers
1317
.phase_instance_buffers
1318
.values_mut()
1319
{
1320
phase_instance_buffers
1321
.work_item_buffers
1322
.retain(|retained_view_entity, _| {
1323
retained_view_entities.contains(retained_view_entity)
1324
});
1325
}
1326
}
1327
1328
/// Batch the items in a sorted render phase, when GPU instance buffer building
1329
/// is in use. This means comparing metadata needed to draw each phase item and
1330
/// trying to combine the draws into a batch.
1331
pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
1332
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,
1333
mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,
1334
mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
1335
mut views: Query<(
1336
&ExtractedView,
1337
Has<NoIndirectDrawing>,
1338
Has<OcclusionCulling>,
1339
)>,
1340
system_param_item: StaticSystemParam<GFBD::Param>,
1341
) where
1342
I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1343
GFBD: GetFullBatchData,
1344
{
1345
// We only process GPU-built batch data in this function.
1346
let UntypedPhaseBatchedInstanceBuffers {
1347
ref mut data_buffer,
1348
ref mut work_item_buffers,
1349
ref mut late_indexed_indirect_parameters_buffer,
1350
ref mut late_non_indexed_indirect_parameters_buffer,
1351
} = phase_batched_instance_buffers.buffers;
1352
1353
for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1354
let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1355
continue;
1356
};
1357
1358
// Create the work item buffer if necessary.
1359
let work_item_buffer = get_or_create_work_item_buffer::<I>(
1360
work_item_buffers,
1361
extracted_view.retained_view_entity,
1362
no_indirect_drawing,
1363
gpu_occlusion_culling,
1364
);
1365
1366
// Initialize those work item buffers in preparation for this new frame.
1367
init_work_item_buffers(
1368
work_item_buffer,
1369
late_indexed_indirect_parameters_buffer,
1370
late_non_indexed_indirect_parameters_buffer,
1371
);
1372
1373
// Walk through the list of phase items, building up batches as we go.
1374
let mut batch: Option<SortedRenderBatch<GFBD>> = None;
1375
1376
for current_index in 0..phase.items.len() {
1377
// Get the index of the input data, and comparison metadata, for
1378
// this entity.
1379
let item = &phase.items[current_index];
1380
let entity = item.main_entity();
1381
let item_is_indexed = item.indexed();
1382
let current_batch_input_index =
1383
GFBD::get_index_and_compare_data(&system_param_item, entity);
1384
1385
// Unpack that index and metadata. Note that it's possible for index
1386
// and/or metadata to not be present, which signifies that this
1387
// entity is unbatchable. In that case, we break the batch here.
1388
// If the index isn't present the item is not part of this pipeline and so will be skipped.
1389
let Some((current_input_index, current_meta)) = current_batch_input_index else {
1390
// Break a batch if we need to.
1391
if let Some(batch) = batch.take() {
1392
batch.flush(
1393
data_buffer.len() as u32,
1394
phase,
1395
&mut phase_indirect_parameters_buffers.buffers,
1396
);
1397
}
1398
1399
continue;
1400
};
1401
let current_meta =
1402
current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
1403
1404
// Determine if this entity can be included in the batch we're
1405
// building up.
1406
let can_batch = batch.as_ref().is_some_and(|batch| {
1407
// `None` for metadata indicates that the items are unbatchable.
1408
match (&current_meta, &batch.meta) {
1409
(Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
1410
(_, _) => false,
1411
}
1412
});
1413
1414
// Make space in the data buffer for this instance.
1415
let output_index = data_buffer.add() as u32;
1416
1417
// If we can't batch, break the existing batch and make a new one.
1418
if !can_batch {
1419
// Break a batch if we need to.
1420
if let Some(batch) = batch.take() {
1421
batch.flush(
1422
output_index,
1423
phase,
1424
&mut phase_indirect_parameters_buffers.buffers,
1425
);
1426
}
1427
1428
let indirect_parameters_index = if no_indirect_drawing {
1429
None
1430
} else if item_is_indexed {
1431
Some(
1432
phase_indirect_parameters_buffers
1433
.buffers
1434
.indexed
1435
.allocate(1),
1436
)
1437
} else {
1438
Some(
1439
phase_indirect_parameters_buffers
1440
.buffers
1441
.non_indexed
1442
.allocate(1),
1443
)
1444
};
1445
1446
// Start a new batch.
1447
if let Some(indirect_parameters_index) = indirect_parameters_index {
1448
GFBD::write_batch_indirect_parameters_metadata(
1449
item_is_indexed,
1450
output_index,
1451
None,
1452
&mut phase_indirect_parameters_buffers.buffers,
1453
indirect_parameters_index,
1454
);
1455
};
1456
1457
batch = Some(SortedRenderBatch {
1458
phase_item_start_index: current_index as u32,
1459
instance_start_index: output_index,
1460
indexed: item_is_indexed,
1461
indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
1462
meta: current_meta,
1463
});
1464
}
1465
1466
// Add a new preprocessing work item so that the preprocessing
1467
// shader will copy the per-instance data over.
1468
if let Some(batch) = batch.as_ref() {
1469
work_item_buffer.push(
1470
item_is_indexed,
1471
PreprocessWorkItem {
1472
input_index: current_input_index.into(),
1473
output_or_indirect_parameters_index: match (
1474
no_indirect_drawing,
1475
batch.indirect_parameters_index,
1476
) {
1477
(true, _) => output_index,
1478
(false, Some(indirect_parameters_index)) => {
1479
indirect_parameters_index.into()
1480
}
1481
(false, None) => 0,
1482
},
1483
},
1484
);
1485
}
1486
}
1487
1488
// Flush the final batch if necessary.
1489
if let Some(batch) = batch.take() {
1490
batch.flush(
1491
data_buffer.len() as u32,
1492
phase,
1493
&mut phase_indirect_parameters_buffers.buffers,
1494
);
1495
}
1496
}
1497
}
1498
1499
/// Creates batches for a render phase that uses bins.
1500
pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
1501
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
1502
phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
1503
mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
1504
mut views: Query<
1505
(
1506
&ExtractedView,
1507
Has<NoIndirectDrawing>,
1508
Has<OcclusionCulling>,
1509
),
1510
With<ExtractedView>,
1511
>,
1512
param: StaticSystemParam<GFBD::Param>,
1513
) where
1514
BPI: BinnedPhaseItem,
1515
GFBD: GetFullBatchData,
1516
{
1517
let system_param_item = param.into_inner();
1518
1519
let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();
1520
1521
let UntypedPhaseBatchedInstanceBuffers {
1522
ref mut data_buffer,
1523
ref mut work_item_buffers,
1524
ref mut late_indexed_indirect_parameters_buffer,
1525
ref mut late_non_indexed_indirect_parameters_buffer,
1526
} = phase_batched_instance_buffers.buffers;
1527
1528
for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1529
let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1530
continue;
1531
};
1532
1533
// Create the work item buffer if necessary; otherwise, just mark it as
1534
// used this frame.
1535
let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
1536
work_item_buffers,
1537
extracted_view.retained_view_entity,
1538
no_indirect_drawing,
1539
gpu_occlusion_culling,
1540
);
1541
1542
// Initialize those work item buffers in preparation for this new frame.
1543
init_work_item_buffers(
1544
work_item_buffer,
1545
late_indexed_indirect_parameters_buffer,
1546
late_non_indexed_indirect_parameters_buffer,
1547
);
1548
1549
// Prepare multidrawables.
1550
1551
if let (
1552
&mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),
1553
&mut PreprocessWorkItemBuffers::Indirect {
1554
indexed: ref mut indexed_work_item_buffer,
1555
non_indexed: ref mut non_indexed_work_item_buffer,
1556
gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,
1557
},
1558
) = (&mut phase.batch_sets, &mut *work_item_buffer)
1559
{
1560
let mut output_index = data_buffer.len() as u32;
1561
1562
// Initialize the state for both indexed and non-indexed meshes.
1563
let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1564
MultidrawableBatchSetPreparer::new(
1565
phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,
1566
phase_indirect_parameters_buffers
1567
.buffers
1568
.indexed
1569
.batch_sets
1570
.len() as u32,
1571
);
1572
let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1573
MultidrawableBatchSetPreparer::new(
1574
phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,
1575
phase_indirect_parameters_buffers
1576
.buffers
1577
.non_indexed
1578
.batch_sets
1579
.len() as u32,
1580
);
1581
1582
// Prepare each batch set.
1583
for (batch_set_key, bins) in &phase.multidrawable_meshes {
1584
if batch_set_key.indexed() {
1585
indexed_preparer.prepare_multidrawable_binned_batch_set(
1586
bins,
1587
&mut output_index,
1588
data_buffer,
1589
indexed_work_item_buffer,
1590
&mut phase_indirect_parameters_buffers.buffers.indexed,
1591
batch_sets,
1592
);
1593
} else {
1594
non_indexed_preparer.prepare_multidrawable_binned_batch_set(
1595
bins,
1596
&mut output_index,
1597
data_buffer,
1598
non_indexed_work_item_buffer,
1599
&mut phase_indirect_parameters_buffers.buffers.non_indexed,
1600
batch_sets,
1601
);
1602
}
1603
}
1604
1605
// Reserve space in the occlusion culling buffers, if necessary.
1606
if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {
1607
gpu_occlusion_culling_buffers
1608
.late_indexed
1609
.add_multiple(indexed_preparer.work_item_count);
1610
gpu_occlusion_culling_buffers
1611
.late_non_indexed
1612
.add_multiple(non_indexed_preparer.work_item_count);
1613
}
1614
}
1615
1616
// Prepare batchables.
1617
1618
for (key, bin) in &phase.batchable_meshes {
1619
let mut batch: Option<BinnedRenderPhaseBatch> = None;
1620
for (&main_entity, &input_index) in bin.entities() {
1621
let output_index = data_buffer.add() as u32;
1622
1623
match batch {
1624
Some(ref mut batch) => {
1625
batch.instance_range.end = output_index + 1;
1626
1627
// Append to the current batch.
1628
//
1629
// If we're in indirect mode, then we write the first
1630
// output index of this batch, so that we have a
1631
// tightly-packed buffer if GPU culling discards some of
1632
// the instances. Otherwise, we can just write the
1633
// output index directly.
1634
work_item_buffer.push(
1635
key.0.indexed(),
1636
PreprocessWorkItem {
1637
input_index: *input_index,
1638
output_or_indirect_parameters_index: match (
1639
no_indirect_drawing,
1640
&batch.extra_index,
1641
) {
1642
(true, _) => output_index,
1643
(
1644
false,
1645
PhaseItemExtraIndex::IndirectParametersIndex {
1646
range: indirect_parameters_range,
1647
..
1648
},
1649
) => indirect_parameters_range.start,
1650
(false, &PhaseItemExtraIndex::DynamicOffset(_))
1651
| (false, &PhaseItemExtraIndex::None) => 0,
1652
},
1653
},
1654
);
1655
}
1656
1657
None if !no_indirect_drawing => {
1658
// Start a new batch, in indirect mode.
1659
let indirect_parameters_index = phase_indirect_parameters_buffers
1660
.buffers
1661
.allocate(key.0.indexed(), 1);
1662
let batch_set_index = phase_indirect_parameters_buffers
1663
.buffers
1664
.get_next_batch_set_index(key.0.indexed());
1665
1666
GFBD::write_batch_indirect_parameters_metadata(
1667
key.0.indexed(),
1668
output_index,
1669
batch_set_index,
1670
&mut phase_indirect_parameters_buffers.buffers,
1671
indirect_parameters_index,
1672
);
1673
work_item_buffer.push(
1674
key.0.indexed(),
1675
PreprocessWorkItem {
1676
input_index: *input_index,
1677
output_or_indirect_parameters_index: indirect_parameters_index,
1678
},
1679
);
1680
batch = Some(BinnedRenderPhaseBatch {
1681
representative_entity: (Entity::PLACEHOLDER, main_entity),
1682
instance_range: output_index..output_index + 1,
1683
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1684
range: indirect_parameters_index..(indirect_parameters_index + 1),
1685
batch_set_index: None,
1686
},
1687
});
1688
}
1689
1690
None => {
1691
// Start a new batch, in direct mode.
1692
work_item_buffer.push(
1693
key.0.indexed(),
1694
PreprocessWorkItem {
1695
input_index: *input_index,
1696
output_or_indirect_parameters_index: output_index,
1697
},
1698
);
1699
batch = Some(BinnedRenderPhaseBatch {
1700
representative_entity: (Entity::PLACEHOLDER, main_entity),
1701
instance_range: output_index..output_index + 1,
1702
extra_index: PhaseItemExtraIndex::None,
1703
});
1704
}
1705
}
1706
}
1707
1708
if let Some(batch) = batch {
1709
match phase.batch_sets {
1710
BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {
1711
error!("Dynamic uniform batch sets shouldn't be used here");
1712
}
1713
BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {
1714
vec.push(batch);
1715
}
1716
BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {
1717
// The Bevy renderer will never mark a mesh as batchable
1718
// but not multidrawable if multidraw is in use.
1719
// However, custom render pipelines might do so, such as
1720
// the `specialized_mesh_pipeline` example.
1721
vec.push(BinnedRenderPhaseBatchSet {
1722
first_batch: batch,
1723
batch_count: 1,
1724
bin_key: key.1.clone(),
1725
index: phase_indirect_parameters_buffers
1726
.buffers
1727
.batch_set_count(key.0.indexed())
1728
as u32,
1729
});
1730
}
1731
}
1732
}
1733
}
1734
1735
// Prepare unbatchables.
1736
for (key, unbatchables) in &mut phase.unbatchable_meshes {
1737
// Allocate the indirect parameters if necessary.
1738
let mut indirect_parameters_offset = if no_indirect_drawing {
1739
None
1740
} else if key.0.indexed() {
1741
Some(
1742
phase_indirect_parameters_buffers
1743
.buffers
1744
.indexed
1745
.allocate(unbatchables.entities.len() as u32),
1746
)
1747
} else {
1748
Some(
1749
phase_indirect_parameters_buffers
1750
.buffers
1751
.non_indexed
1752
.allocate(unbatchables.entities.len() as u32),
1753
)
1754
};
1755
1756
for main_entity in unbatchables.entities.keys() {
1757
let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1758
else {
1759
continue;
1760
};
1761
let output_index = data_buffer.add() as u32;
1762
1763
if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
1764
// We're in indirect mode, so add an indirect parameters
1765
// index.
1766
GFBD::write_batch_indirect_parameters_metadata(
1767
key.0.indexed(),
1768
output_index,
1769
None,
1770
&mut phase_indirect_parameters_buffers.buffers,
1771
*indirect_parameters_index,
1772
);
1773
work_item_buffer.push(
1774
key.0.indexed(),
1775
PreprocessWorkItem {
1776
input_index: input_index.into(),
1777
output_or_indirect_parameters_index: *indirect_parameters_index,
1778
},
1779
);
1780
unbatchables
1781
.buffer_indices
1782
.add(UnbatchableBinnedEntityIndices {
1783
instance_index: *indirect_parameters_index,
1784
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1785
range: *indirect_parameters_index..(*indirect_parameters_index + 1),
1786
batch_set_index: None,
1787
},
1788
});
1789
phase_indirect_parameters_buffers
1790
.buffers
1791
.add_batch_set(key.0.indexed(), *indirect_parameters_index);
1792
*indirect_parameters_index += 1;
1793
} else {
1794
work_item_buffer.push(
1795
key.0.indexed(),
1796
PreprocessWorkItem {
1797
input_index: input_index.into(),
1798
output_or_indirect_parameters_index: output_index,
1799
},
1800
);
1801
unbatchables
1802
.buffer_indices
1803
.add(UnbatchableBinnedEntityIndices {
1804
instance_index: output_index,
1805
extra_index: PhaseItemExtraIndex::None,
1806
});
1807
}
1808
}
1809
}
1810
}
1811
}
1812
1813
/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct
1814
/// multidrawable batch sets.
1815
///
1816
/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:
1817
/// one for indexed meshes and one for non-indexed meshes.
1818
struct MultidrawableBatchSetPreparer<BPI, GFBD>
1819
where
1820
BPI: BinnedPhaseItem,
1821
GFBD: GetFullBatchData,
1822
{
1823
/// The offset in the indirect parameters buffer at which the next indirect
1824
/// parameters will be written.
1825
indirect_parameters_index: u32,
1826
/// The number of batch sets we've built so far for this mesh class.
1827
batch_set_index: u32,
1828
/// The number of work items we've emitted so far for this mesh class.
1829
work_item_count: usize,
1830
phantom: PhantomData<(BPI, GFBD)>,
1831
}
1832
1833
impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>
1834
where
1835
BPI: BinnedPhaseItem,
1836
GFBD: GetFullBatchData,
1837
{
1838
/// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing
1839
/// indirect parameters and batch sets at the given indices.
1840
#[inline]
1841
fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {
1842
MultidrawableBatchSetPreparer {
1843
indirect_parameters_index: initial_indirect_parameters_index,
1844
batch_set_index: initial_batch_set_index,
1845
work_item_count: 0,
1846
phantom: PhantomData,
1847
}
1848
}
1849
1850
/// Creates batch sets and writes the GPU data needed to draw all visible
1851
/// entities of one mesh class in the given batch set.
1852
///
1853
/// The *mesh class* represents whether the mesh has indices or not.
1854
#[inline]
1855
fn prepare_multidrawable_binned_batch_set<IP>(
1856
&mut self,
1857
bins: &IndexMap<BPI::BinKey, RenderBin>,
1858
output_index: &mut u32,
1859
data_buffer: &mut UninitBufferVec<GFBD::BufferData>,
1860
indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,
1861
mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,
1862
batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,
1863
) where
1864
IP: Clone + ShaderSize + WriteInto,
1865
{
1866
let current_indexed_batch_set_index = self.batch_set_index;
1867
let current_output_index = *output_index;
1868
1869
let indirect_parameters_base = self.indirect_parameters_index;
1870
1871
// We're going to write the first entity into the batch set. Do this
1872
// here so that we can preload the bin into cache as a side effect.
1873
let Some((first_bin_key, first_bin)) = bins.iter().next() else {
1874
return;
1875
};
1876
let first_bin_len = first_bin.entities().len();
1877
let first_bin_entity = first_bin
1878
.entities()
1879
.keys()
1880
.next()
1881
.copied()
1882
.unwrap_or(MainEntity::from(Entity::PLACEHOLDER));
1883
1884
// Traverse the batch set, processing each bin.
1885
for bin in bins.values() {
1886
// Record the first output index for this batch, as well as its own
1887
// index.
1888
mesh_class_buffers
1889
.cpu_metadata
1890
.push(IndirectParametersCpuMetadata {
1891
base_output_index: *output_index,
1892
batch_set_index: self.batch_set_index,
1893
});
1894
1895
// Traverse the bin, pushing `PreprocessWorkItem`s for each entity
1896
// within it. This is a hot loop, so make it as fast as possible.
1897
for &input_index in bin.entities().values() {
1898
indexed_work_item_buffer.push(PreprocessWorkItem {
1899
input_index: *input_index,
1900
output_or_indirect_parameters_index: self.indirect_parameters_index,
1901
});
1902
}
1903
1904
// Reserve space for the appropriate number of entities in the data
1905
// buffer. Also, advance the output index and work item count.
1906
let bin_entity_count = bin.entities().len();
1907
data_buffer.add_multiple(bin_entity_count);
1908
*output_index += bin_entity_count as u32;
1909
self.work_item_count += bin_entity_count;
1910
1911
self.indirect_parameters_index += 1;
1912
}
1913
1914
// Reserve space for the bins in this batch set in the GPU buffers.
1915
let bin_count = bins.len();
1916
mesh_class_buffers.gpu_metadata.add_multiple(bin_count);
1917
mesh_class_buffers.data.add_multiple(bin_count);
1918
1919
// Write the information the GPU will need about this batch set.
1920
mesh_class_buffers.batch_sets.push(IndirectBatchSet {
1921
indirect_parameters_base,
1922
indirect_parameters_count: 0,
1923
});
1924
1925
self.batch_set_index += 1;
1926
1927
// Record the batch set. The render node later processes this record to
1928
// render the batches.
1929
batch_sets.push(BinnedRenderPhaseBatchSet {
1930
first_batch: BinnedRenderPhaseBatch {
1931
representative_entity: (Entity::PLACEHOLDER, first_bin_entity),
1932
instance_range: current_output_index..(current_output_index + first_bin_len as u32),
1933
extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(
1934
indirect_parameters_base,
1935
)),
1936
},
1937
bin_key: (*first_bin_key).clone(),
1938
batch_count: self.indirect_parameters_index - indirect_parameters_base,
1939
index: current_indexed_batch_set_index,
1940
});
1941
}
1942
}
1943
1944
/// A system that gathers up the per-phase GPU buffers and inserts them into the
1945
/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
1946
///
1947
/// This runs after the [`batch_and_prepare_binned_render_phase`] or
1948
/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
1949
/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
1950
/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
1951
/// [`IndirectParametersBuffers`] tables.
1952
///
1953
/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
1954
/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
1955
/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
1956
/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
1957
/// parallel.
1958
pub fn collect_buffers_for_phase<PI, GFBD>(
1959
mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
1960
mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
1961
mut batched_instance_buffers: ResMut<
1962
BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1963
>,
1964
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
1965
) where
1966
PI: PhaseItem,
1967
GFBD: GetFullBatchData + Send + Sync + 'static,
1968
{
1969
// Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
1970
// the contents of the per-phase resource with the old batched instance
1971
// buffers in order to reuse allocations.
1972
let untyped_phase_batched_instance_buffers =
1973
mem::take(&mut phase_batched_instance_buffers.buffers);
1974
if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
1975
.phase_instance_buffers
1976
.insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
1977
{
1978
old_untyped_phase_batched_instance_buffers.clear();
1979
phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
1980
}
1981
1982
// Insert the `PhaseIndirectParametersBuffers` into the global table.
1983
// Replace the contents of the per-phase resource with the old indirect
1984
// parameters buffers in order to reuse allocations.
1985
let untyped_phase_indirect_parameters_buffers = mem::replace(
1986
&mut phase_indirect_parameters_buffers.buffers,
1987
UntypedPhaseIndirectParametersBuffers::new(
1988
indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
1989
),
1990
);
1991
if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
1992
.insert(
1993
TypeId::of::<PI>(),
1994
untyped_phase_indirect_parameters_buffers,
1995
)
1996
{
1997
old_untyped_phase_indirect_parameters_buffers.clear();
1998
phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
1999
}
2000
}
2001
2002
/// A system that writes all instance buffers to the GPU.
2003
pub fn write_batched_instance_buffers<GFBD>(
2004
render_device: Res<RenderDevice>,
2005
render_queue: Res<RenderQueue>,
2006
gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
2007
) where
2008
GFBD: GetFullBatchData,
2009
{
2010
let BatchedInstanceBuffers {
2011
current_input_buffer,
2012
previous_input_buffer,
2013
phase_instance_buffers,
2014
} = gpu_array_buffer.into_inner();
2015
2016
let render_device = &*render_device;
2017
let render_queue = &*render_queue;
2018
2019
ComputeTaskPool::get().scope(|scope| {
2020
scope.spawn(async {
2021
let _span = bevy_log::info_span!("write_current_input_buffers").entered();
2022
current_input_buffer
2023
.buffer
2024
.write_buffer(render_device, render_queue);
2025
});
2026
scope.spawn(async {
2027
let _span = bevy_log::info_span!("write_previous_input_buffers").entered();
2028
previous_input_buffer
2029
.buffer
2030
.write_buffer(render_device, render_queue);
2031
});
2032
2033
for phase_instance_buffers in phase_instance_buffers.values_mut() {
2034
let UntypedPhaseBatchedInstanceBuffers {
2035
ref mut data_buffer,
2036
ref mut work_item_buffers,
2037
ref mut late_indexed_indirect_parameters_buffer,
2038
ref mut late_non_indexed_indirect_parameters_buffer,
2039
} = *phase_instance_buffers;
2040
2041
scope.spawn(async {
2042
let _span = bevy_log::info_span!("write_phase_instance_buffers").entered();
2043
data_buffer.write_buffer(render_device);
2044
late_indexed_indirect_parameters_buffer.write_buffer(render_device, render_queue);
2045
late_non_indexed_indirect_parameters_buffer
2046
.write_buffer(render_device, render_queue);
2047
});
2048
2049
for phase_work_item_buffers in work_item_buffers.values_mut() {
2050
scope.spawn(async {
2051
let _span = bevy_log::info_span!("write_work_item_buffers").entered();
2052
match *phase_work_item_buffers {
2053
PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
2054
buffer_vec.write_buffer(render_device, render_queue);
2055
}
2056
PreprocessWorkItemBuffers::Indirect {
2057
ref mut indexed,
2058
ref mut non_indexed,
2059
ref mut gpu_occlusion_culling,
2060
} => {
2061
indexed.write_buffer(render_device, render_queue);
2062
non_indexed.write_buffer(render_device, render_queue);
2063
2064
if let Some(GpuOcclusionCullingWorkItemBuffers {
2065
ref mut late_indexed,
2066
ref mut late_non_indexed,
2067
late_indirect_parameters_indexed_offset: _,
2068
late_indirect_parameters_non_indexed_offset: _,
2069
}) = *gpu_occlusion_culling
2070
{
2071
if !late_indexed.is_empty() {
2072
late_indexed.write_buffer(render_device);
2073
}
2074
if !late_non_indexed.is_empty() {
2075
late_non_indexed.write_buffer(render_device);
2076
}
2077
}
2078
}
2079
}
2080
});
2081
}
2082
}
2083
});
2084
}
2085
2086
pub fn clear_indirect_parameters_buffers(
2087
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2088
) {
2089
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2090
phase_indirect_parameters_buffers.clear();
2091
}
2092
}
2093
2094
pub fn write_indirect_parameters_buffers(
2095
render_device: Res<RenderDevice>,
2096
render_queue: Res<RenderQueue>,
2097
mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2098
) {
2099
let render_device = &*render_device;
2100
let render_queue = &*render_queue;
2101
ComputeTaskPool::get().scope(|scope| {
2102
for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2103
scope.spawn(async {
2104
let _span = bevy_log::info_span!("indexed_data").entered();
2105
phase_indirect_parameters_buffers
2106
.indexed
2107
.data
2108
.write_buffer(render_device);
2109
});
2110
scope.spawn(async {
2111
let _span = bevy_log::info_span!("non_indexed_data").entered();
2112
phase_indirect_parameters_buffers
2113
.non_indexed
2114
.data
2115
.write_buffer(render_device);
2116
});
2117
2118
scope.spawn(async {
2119
let _span = bevy_log::info_span!("indexed_cpu_metadata").entered();
2120
phase_indirect_parameters_buffers
2121
.indexed
2122
.cpu_metadata
2123
.write_buffer(render_device, render_queue);
2124
});
2125
scope.spawn(async {
2126
let _span = bevy_log::info_span!("non_indexed_cpu_metadata").entered();
2127
phase_indirect_parameters_buffers
2128
.non_indexed
2129
.cpu_metadata
2130
.write_buffer(render_device, render_queue);
2131
});
2132
2133
scope.spawn(async {
2134
let _span = bevy_log::info_span!("non_indexed_gpu_metadata").entered();
2135
phase_indirect_parameters_buffers
2136
.non_indexed
2137
.gpu_metadata
2138
.write_buffer(render_device);
2139
});
2140
scope.spawn(async {
2141
let _span = bevy_log::info_span!("indexed_gpu_metadata").entered();
2142
phase_indirect_parameters_buffers
2143
.indexed
2144
.gpu_metadata
2145
.write_buffer(render_device);
2146
});
2147
2148
scope.spawn(async {
2149
let _span = bevy_log::info_span!("indexed_batch_sets").entered();
2150
phase_indirect_parameters_buffers
2151
.indexed
2152
.batch_sets
2153
.write_buffer(render_device, render_queue);
2154
});
2155
scope.spawn(async {
2156
let _span = bevy_log::info_span!("non_indexed_batch_sets").entered();
2157
phase_indirect_parameters_buffers
2158
.non_indexed
2159
.batch_sets
2160
.write_buffer(render_device, render_queue);
2161
});
2162
}
2163
});
2164
}
2165
2166
#[cfg(test)]
2167
mod tests {
2168
use super::*;
2169
2170
#[test]
2171
fn instance_buffer_correct_behavior() {
2172
let mut instance_buffer = InstanceInputUniformBuffer::new();
2173
2174
let index = instance_buffer.add(2);
2175
instance_buffer.remove(index);
2176
assert_eq!(instance_buffer.get_unchecked(index), 2);
2177
assert_eq!(instance_buffer.get(index), None);
2178
2179
instance_buffer.add(5);
2180
assert_eq!(instance_buffer.buffer().len(), 1);
2181
}
2182
}
2183
2184