Path: blob/main/crates/bevy_render/src/batching/gpu_preprocessing.rs
9325 views
//! Batching functionality when GPU preprocessing is in use.12use core::{any::TypeId, marker::PhantomData, mem};34use bevy_app::{App, Plugin};5use bevy_derive::{Deref, DerefMut};6use bevy_ecs::{7prelude::Entity,8query::{Has, With},9resource::Resource,10schedule::IntoScheduleConfigs as _,11system::{Query, Res, ResMut, StaticSystemParam},12world::{FromWorld, World},13};14use bevy_encase_derive::ShaderType;15use bevy_log::{error, info};16use bevy_math::UVec4;17use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};18use bevy_tasks::ComputeTaskPool;19use bevy_utils::{default, TypeIdMap};20use bytemuck::{Pod, Zeroable};21use encase::{internal::WriteInto, ShaderSize};22use indexmap::IndexMap;23use nonmax::NonMaxU32;24use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};2526use crate::{27occlusion_culling::OcclusionCulling,28render_phase::{29BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,30BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,31PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,32SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,33ViewSortedRenderPhases,34},35render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},36renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},37sync_world::MainEntity,38view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},39Render, RenderApp, RenderDebugFlags, RenderSystems,40};4142use super::{BatchMeta, GetBatchData, GetFullBatchData};4344#[derive(Default)]45pub struct BatchingPlugin {46/// Debugging flags that can optionally be set when constructing the renderer.47pub debug_flags: RenderDebugFlags,48}4950impl Plugin for BatchingPlugin {51fn build(&self, app: &mut App) {52let Some(render_app) = app.get_sub_app_mut(RenderApp) else {53return;54};5556render_app57.insert_resource(IndirectParametersBuffers::new(58self.debug_flags59.contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),60))61.add_systems(62Render,63write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush),64)65.add_systems(66Render,67clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews),68);69}7071fn finish(&self, app: &mut App) {72let Some(render_app) = app.get_sub_app_mut(RenderApp) else {73return;74};7576render_app.init_resource::<GpuPreprocessingSupport>();77}78}7980/// Records whether GPU preprocessing and/or GPU culling are supported on the81/// device.82///83/// No GPU preprocessing is supported on WebGL because of the lack of compute84/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a85/// `wgpu` limitation] GPU culling is not.86///87/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/247188#[derive(Clone, Copy, PartialEq, Resource)]89pub struct GpuPreprocessingSupport {90/// The maximum amount of GPU preprocessing available on this platform.91pub max_supported_mode: GpuPreprocessingMode,92}9394impl GpuPreprocessingSupport {95/// Returns true if this GPU preprocessing support level isn't `None`.96#[inline]97pub fn is_available(&self) -> bool {98self.max_supported_mode != GpuPreprocessingMode::None99}100101/// Returns the given GPU preprocessing mode, capped to the current102/// preprocessing mode.103pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {104match (self.max_supported_mode, mode) {105(GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {106GpuPreprocessingMode::None107}108(mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,109(GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {110GpuPreprocessingMode::PreprocessingOnly111}112}113}114115/// Returns true if GPU culling is supported on this platform.116pub fn is_culling_supported(&self) -> bool {117self.max_supported_mode == GpuPreprocessingMode::Culling118}119}120121/// The amount of GPU preprocessing (compute and indirect draw) that we do.122#[derive(Clone, Copy, PartialEq)]123pub enum GpuPreprocessingMode {124/// No GPU preprocessing is in use at all.125///126/// This is used when GPU compute isn't available.127None,128129/// GPU preprocessing is in use, but GPU culling isn't.130///131/// This is used when the [`NoIndirectDrawing`] component is present on the132/// camera.133PreprocessingOnly,134135/// Both GPU preprocessing and GPU culling are in use.136///137/// This is used by default.138Culling,139}140141/// The GPU buffers holding the data needed to render batches.142///143/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the144/// `BD` type parameter in that mode.145///146/// We have a separate *buffer data input* type (`BDI`) here, which a compute147/// shader is expected to expand to the full buffer data (`BD`) type. GPU148/// uniform building is generally faster and uses less system RAM to VRAM bus149/// bandwidth, but only implemented for some pipelines (for example, not in the150/// 2D pipeline at present) and only when compute shader is available.151#[derive(Resource)]152pub struct BatchedInstanceBuffers<BD, BDI>153where154BD: GpuArrayBufferable + Sync + Send + 'static,155BDI: Pod + Default,156{157/// The uniform data inputs for the current frame.158///159/// These are uploaded during the extraction phase.160pub current_input_buffer: InstanceInputUniformBuffer<BDI>,161162/// The uniform data inputs for the previous frame.163///164/// The indices don't generally line up between `current_input_buffer`165/// and `previous_input_buffer`, because, among other reasons, entities166/// can spawn or despawn between frames. Instead, each current buffer167/// data input uniform is expected to contain the index of the168/// corresponding buffer data input uniform in this list.169pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,170171/// The data needed to render buffers for each phase.172///173/// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,174/// `AlphaMask3d`, etc.175pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,176}177178impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>179where180BD: GpuArrayBufferable + Sync + Send + 'static,181BDI: Pod + Sync + Send + Default + 'static,182{183fn default() -> Self {184BatchedInstanceBuffers {185current_input_buffer: InstanceInputUniformBuffer::new(),186previous_input_buffer: InstanceInputUniformBuffer::new(),187phase_instance_buffers: HashMap::default(),188}189}190}191192/// The GPU buffers holding the data needed to render batches for a single193/// phase.194///195/// These are split out per phase so that we can run the phases in parallel.196/// This is the version of the structure that has a type parameter, which197/// enables Bevy's scheduler to run the batching operations for the different198/// phases in parallel.199///200/// See the documentation for [`BatchedInstanceBuffers`] for more information.201#[derive(Resource)]202pub struct PhaseBatchedInstanceBuffers<PI, BD>203where204PI: PhaseItem,205BD: GpuArrayBufferable + Sync + Send + 'static,206{207/// The buffers for this phase.208pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,209phantom: PhantomData<PI>,210}211212impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>213where214PI: PhaseItem,215BD: GpuArrayBufferable + Sync + Send + 'static,216{217fn default() -> Self {218PhaseBatchedInstanceBuffers {219buffers: UntypedPhaseBatchedInstanceBuffers::default(),220phantom: PhantomData,221}222}223}224225/// The GPU buffers holding the data needed to render batches for a single226/// phase, without a type parameter for that phase.227///228/// Since this structure doesn't have a type parameter, it can be placed in229/// [`BatchedInstanceBuffers::phase_instance_buffers`].230pub struct UntypedPhaseBatchedInstanceBuffers<BD>231where232BD: GpuArrayBufferable + Sync + Send + 'static,233{234/// A storage area for the buffer data that the GPU compute shader is235/// expected to write to.236///237/// There will be one entry for each index.238pub data_buffer: UninitBufferVec<BD>,239240/// The index of the buffer data in the current input buffer that241/// corresponds to each instance.242///243/// This is keyed off each view. Each view has a separate buffer.244pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,245246/// A buffer that holds the number of indexed meshes that weren't visible in247/// the previous frame, when GPU occlusion culling is in use.248///249/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per250/// view. Bevy uses this value to determine how many threads to dispatch to251/// check meshes that weren't visible next frame to see if they became newly252/// visible this frame.253pub late_indexed_indirect_parameters_buffer:254RawBufferVec<LatePreprocessWorkItemIndirectParameters>,255256/// A buffer that holds the number of non-indexed meshes that weren't257/// visible in the previous frame, when GPU occlusion culling is in use.258///259/// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per260/// view. Bevy uses this value to determine how many threads to dispatch to261/// check meshes that weren't visible next frame to see if they became newly262/// visible this frame.263pub late_non_indexed_indirect_parameters_buffer:264RawBufferVec<LatePreprocessWorkItemIndirectParameters>,265}266267/// Holds the GPU buffer of instance input data, which is the data about each268/// mesh instance that the CPU provides.269///270/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing271/// shader is expected to expand to the full *buffer data* type.272pub struct InstanceInputUniformBuffer<BDI>273where274BDI: Pod + Default,275{276/// The buffer containing the data that will be uploaded to the GPU.277buffer: RawBufferVec<BDI>,278279/// Indices of slots that are free within the buffer.280///281/// When adding data, we preferentially overwrite these slots first before282/// growing the buffer itself.283free_uniform_indices: Vec<u32>,284}285286impl<BDI> InstanceInputUniformBuffer<BDI>287where288BDI: Pod + Default,289{290/// Creates a new, empty buffer.291pub fn new() -> InstanceInputUniformBuffer<BDI> {292InstanceInputUniformBuffer {293buffer: RawBufferVec::new(BufferUsages::STORAGE),294free_uniform_indices: vec![],295}296}297298/// Clears the buffer and entity list out.299pub fn clear(&mut self) {300self.buffer.clear();301self.free_uniform_indices.clear();302}303304/// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.305#[inline]306pub fn buffer(&self) -> &RawBufferVec<BDI> {307&self.buffer308}309310/// Adds a new piece of buffered data to the uniform buffer and returns its311/// index.312pub fn add(&mut self, element: BDI) -> u32 {313match self.free_uniform_indices.pop() {314Some(uniform_index) => {315self.buffer.values_mut()[uniform_index as usize] = element;316uniform_index317}318None => self.buffer.push(element) as u32,319}320}321322/// Removes a piece of buffered data from the uniform buffer.323///324/// This simply marks the data as free.325pub fn remove(&mut self, uniform_index: u32) {326self.free_uniform_indices.push(uniform_index);327}328329/// Returns the piece of buffered data at the given index.330///331/// Returns [`None`] if the index is out of bounds or the data is removed.332pub fn get(&self, uniform_index: u32) -> Option<BDI> {333if (uniform_index as usize) >= self.buffer.len()334|| self.free_uniform_indices.contains(&uniform_index)335{336None337} else {338Some(self.get_unchecked(uniform_index))339}340}341342/// Returns the piece of buffered data at the given index.343/// Can return data that has previously been removed.344///345/// # Panics346/// if `uniform_index` is not in bounds of [`Self::buffer`].347pub fn get_unchecked(&self, uniform_index: u32) -> BDI {348self.buffer.values()[uniform_index as usize]349}350351/// Stores a piece of buffered data at the given index.352///353/// # Panics354/// if `uniform_index` is not in bounds of [`Self::buffer`].355pub fn set(&mut self, uniform_index: u32, element: BDI) {356self.buffer.values_mut()[uniform_index as usize] = element;357}358359// Ensures that the buffers are nonempty, which the GPU requires before an360// upload can take place.361pub fn ensure_nonempty(&mut self) {362if self.buffer.is_empty() {363self.buffer.push(default());364}365}366367/// Returns the number of instances in this buffer.368pub fn len(&self) -> usize {369self.buffer.len()370}371372/// Returns true if this buffer has no instances or false if it contains any373/// instances.374pub fn is_empty(&self) -> bool {375self.buffer.is_empty()376}377378/// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer379/// ready to be uploaded to the GPU.380pub fn into_buffer(self) -> RawBufferVec<BDI> {381self.buffer382}383}384385impl<BDI> Default for InstanceInputUniformBuffer<BDI>386where387BDI: Pod + Default,388{389fn default() -> Self {390Self::new()391}392}393394/// The buffer of GPU preprocessing work items for a single view.395#[cfg_attr(396not(target_arch = "wasm32"),397expect(398clippy::large_enum_variant,399reason = "See https://github.com/bevyengine/bevy/issues/19220"400)401)]402pub enum PreprocessWorkItemBuffers {403/// The work items we use if we aren't using indirect drawing.404///405/// Because we don't have to separate indexed from non-indexed meshes in406/// direct mode, we only have a single buffer here.407Direct(RawBufferVec<PreprocessWorkItem>),408409/// The buffer of work items we use if we are using indirect drawing.410///411/// We need to separate out indexed meshes from non-indexed meshes in this412/// case because the indirect parameters for these two types of meshes have413/// different sizes.414Indirect {415/// The buffer of work items corresponding to indexed meshes.416indexed: RawBufferVec<PreprocessWorkItem>,417/// The buffer of work items corresponding to non-indexed meshes.418non_indexed: RawBufferVec<PreprocessWorkItem>,419/// The work item buffers we use when GPU occlusion culling is in use.420gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,421},422}423424/// The work item buffers we use when GPU occlusion culling is in use.425pub struct GpuOcclusionCullingWorkItemBuffers {426/// The buffer of work items corresponding to indexed meshes.427pub late_indexed: UninitBufferVec<PreprocessWorkItem>,428/// The buffer of work items corresponding to non-indexed meshes.429pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,430/// The offset into the431/// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]432/// where this view's indirect dispatch counts for indexed meshes live.433pub late_indirect_parameters_indexed_offset: u32,434/// The offset into the435/// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]436/// where this view's indirect dispatch counts for non-indexed meshes live.437pub late_indirect_parameters_non_indexed_offset: u32,438}439440/// A GPU-side data structure that stores the number of workgroups to dispatch441/// for the second phase of GPU occlusion culling.442///443/// The late mesh preprocessing phase checks meshes that weren't visible frame444/// to determine if they're potentially visible this frame.445#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]446#[repr(C)]447pub struct LatePreprocessWorkItemIndirectParameters {448/// The number of workgroups to dispatch.449///450/// This will be equal to `work_item_count / 64`, rounded *up*.451dispatch_x: u32,452/// The number of workgroups along the abstract Y axis to dispatch: always453/// 1.454dispatch_y: u32,455/// The number of workgroups along the abstract Z axis to dispatch: always456/// 1.457dispatch_z: u32,458/// The actual number of work items.459///460/// The GPU indirect dispatch doesn't read this, but it's used internally to461/// determine the actual number of work items that exist in the late462/// preprocessing work item buffer.463work_item_count: u32,464/// Padding to 64-byte boundaries for some hardware.465pad: UVec4,466}467468impl Default for LatePreprocessWorkItemIndirectParameters {469fn default() -> LatePreprocessWorkItemIndirectParameters {470LatePreprocessWorkItemIndirectParameters {471dispatch_x: 0,472dispatch_y: 1,473dispatch_z: 1,474work_item_count: 0,475pad: default(),476}477}478}479480/// Returns the set of work item buffers for the given view, first creating it481/// if necessary.482///483/// Bevy uses work item buffers to tell the mesh preprocessing compute shader484/// which meshes are to be drawn.485///486/// You may need to call this function if you're implementing your own custom487/// render phases. See the `specialized_mesh_pipeline` example.488pub fn get_or_create_work_item_buffer<'a, I>(489work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,490view: RetainedViewEntity,491no_indirect_drawing: bool,492enable_gpu_occlusion_culling: bool,493) -> &'a mut PreprocessWorkItemBuffers494where495I: 'static,496{497let preprocess_work_item_buffers = match work_item_buffers.entry(view) {498Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),499Entry::Vacant(vacant_entry) => {500if no_indirect_drawing {501vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(502BufferUsages::STORAGE,503)))504} else {505vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {506indexed: RawBufferVec::new(BufferUsages::STORAGE),507non_indexed: RawBufferVec::new(BufferUsages::STORAGE),508// We fill this in below if `enable_gpu_occlusion_culling`509// is set.510gpu_occlusion_culling: None,511})512}513}514};515516// Initialize the GPU occlusion culling buffers if necessary.517if let PreprocessWorkItemBuffers::Indirect {518ref mut gpu_occlusion_culling,519..520} = *preprocess_work_item_buffers521{522match (523enable_gpu_occlusion_culling,524gpu_occlusion_culling.is_some(),525) {526(false, false) | (true, true) => {}527(false, true) => {528*gpu_occlusion_culling = None;529}530(true, false) => {531*gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {532late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),533late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),534late_indirect_parameters_indexed_offset: 0,535late_indirect_parameters_non_indexed_offset: 0,536});537}538}539}540541preprocess_work_item_buffers542}543544/// Initializes work item buffers for a phase in preparation for a new frame.545pub fn init_work_item_buffers(546work_item_buffers: &mut PreprocessWorkItemBuffers,547late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<548LatePreprocessWorkItemIndirectParameters,549>,550late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<551LatePreprocessWorkItemIndirectParameters,552>,553) {554// Add the offsets for indirect parameters that the late phase of mesh555// preprocessing writes to.556if let PreprocessWorkItemBuffers::Indirect {557gpu_occlusion_culling:558Some(GpuOcclusionCullingWorkItemBuffers {559ref mut late_indirect_parameters_indexed_offset,560ref mut late_indirect_parameters_non_indexed_offset,561..562}),563..564} = *work_item_buffers565{566*late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer567.push(LatePreprocessWorkItemIndirectParameters::default())568as u32;569*late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer570.push(LatePreprocessWorkItemIndirectParameters::default())571as u32;572}573}574575impl PreprocessWorkItemBuffers {576/// Adds a new work item to the appropriate buffer.577///578/// `indexed` specifies whether the work item corresponds to an indexed579/// mesh.580pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {581match *self {582PreprocessWorkItemBuffers::Direct(ref mut buffer) => {583buffer.push(preprocess_work_item);584}585PreprocessWorkItemBuffers::Indirect {586indexed: ref mut indexed_buffer,587non_indexed: ref mut non_indexed_buffer,588ref mut gpu_occlusion_culling,589} => {590if indexed {591indexed_buffer.push(preprocess_work_item);592} else {593non_indexed_buffer.push(preprocess_work_item);594}595596if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {597if indexed {598gpu_occlusion_culling.late_indexed.add();599} else {600gpu_occlusion_culling.late_non_indexed.add();601}602}603}604}605}606607/// Clears out the GPU work item buffers in preparation for a new frame.608pub fn clear(&mut self) {609match *self {610PreprocessWorkItemBuffers::Direct(ref mut buffer) => {611buffer.clear();612}613PreprocessWorkItemBuffers::Indirect {614indexed: ref mut indexed_buffer,615non_indexed: ref mut non_indexed_buffer,616ref mut gpu_occlusion_culling,617} => {618indexed_buffer.clear();619non_indexed_buffer.clear();620621if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {622gpu_occlusion_culling.late_indexed.clear();623gpu_occlusion_culling.late_non_indexed.clear();624gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;625gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;626}627}628}629}630}631632/// One invocation of the preprocessing shader: i.e. one mesh instance in a633/// view.634#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]635#[repr(C)]636pub struct PreprocessWorkItem {637/// The index of the batch input data in the input buffer that the shader638/// reads from.639pub input_index: u32,640641/// In direct mode, the index of the mesh uniform; in indirect mode, the642/// index of the [`IndirectParametersGpuMetadata`].643///644/// In indirect mode, this is the index of the645/// [`IndirectParametersGpuMetadata`] in the646/// `IndirectParametersBuffers::indexed_metadata` or647/// `IndirectParametersBuffers::non_indexed_metadata`.648pub output_or_indirect_parameters_index: u32,649}650651/// The `wgpu` indirect parameters structure that specifies a GPU draw command.652///653/// This is the variant for indexed meshes. We generate the instances of this654/// structure in the `build_indirect_params.wgsl` compute shader.655#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]656#[repr(C)]657pub struct IndirectParametersIndexed {658/// The number of indices that this mesh has.659pub index_count: u32,660/// The number of instances we are to draw.661pub instance_count: u32,662/// The offset of the first index for this mesh in the index buffer slab.663pub first_index: u32,664/// The offset of the first vertex for this mesh in the vertex buffer slab.665pub base_vertex: u32,666/// The index of the first mesh instance in the `MeshUniform` buffer.667pub first_instance: u32,668}669670/// The `wgpu` indirect parameters structure that specifies a GPU draw command.671///672/// This is the variant for non-indexed meshes. We generate the instances of673/// this structure in the `build_indirect_params.wgsl` compute shader.674#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]675#[repr(C)]676pub struct IndirectParametersNonIndexed {677/// The number of vertices that this mesh has.678pub vertex_count: u32,679/// The number of instances we are to draw.680pub instance_count: u32,681/// The offset of the first vertex for this mesh in the vertex buffer slab.682pub base_vertex: u32,683/// The index of the first mesh instance in the `Mesh` buffer.684pub first_instance: u32,685}686687/// A structure, initialized on CPU and read on GPU, that contains metadata688/// about each batch.689///690/// Each batch will have one instance of this structure.691#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]692#[repr(C)]693pub struct IndirectParametersCpuMetadata {694/// The index of the first instance of this mesh in the array of695/// `MeshUniform`s.696///697/// Note that this is the *first* output index in this batch. Since each698/// instance of this structure refers to arbitrarily many instances, the699/// `MeshUniform`s corresponding to this batch span the indices700/// `base_output_index..(base_output_index + instance_count)`.701pub base_output_index: u32,702703/// The index of the batch set that this batch belongs to in the704/// [`IndirectBatchSet`] buffer.705///706/// A *batch set* is a set of meshes that may be multi-drawn together.707/// Multiple batches (and therefore multiple instances of708/// [`IndirectParametersGpuMetadata`] structures) can be part of the same709/// batch set.710pub batch_set_index: u32,711}712713/// A structure, written and read GPU, that records how many instances of each714/// mesh are actually to be drawn.715///716/// The GPU mesh preprocessing shader increments the717/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it718/// determines that meshes are visible. The indirect parameter building shader719/// reads this metadata in order to construct the indirect draw parameters.720///721/// Each batch will have one instance of this structure.722#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]723#[repr(C)]724pub struct IndirectParametersGpuMetadata {725/// The index of the first mesh in this batch in the array of726/// `MeshInputUniform`s.727pub mesh_index: u32,728729/// The number of instances that were judged visible last frame.730///731/// The CPU sets this value to 0, and the GPU mesh preprocessing shader732/// increments it as it culls mesh instances.733pub early_instance_count: u32,734735/// The number of instances that have been judged potentially visible this736/// frame that weren't in the last frame's potentially visible set.737///738/// The CPU sets this value to 0, and the GPU mesh preprocessing shader739/// increments it as it culls mesh instances.740pub late_instance_count: u32,741}742743/// A structure, shared between CPU and GPU, that holds the number of on-GPU744/// indirect draw commands for each *batch set*.745///746/// A *batch set* is a set of meshes that may be multi-drawn together.747///748/// If the current hardware and driver support `multi_draw_indirect_count`, the749/// indirect parameters building shader increments750/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The751/// `multi_draw_indirect_count` command reads752/// [`Self::indirect_parameters_count`] in order to determine how many commands753/// belong to each batch set.754#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]755#[repr(C)]756pub struct IndirectBatchSet {757/// The number of indirect parameter commands (i.e. batches) in this batch758/// set.759///760/// The CPU sets this value to 0 before uploading this structure to GPU. The761/// indirect parameters building shader increments this value as it creates762/// indirect parameters. Then the `multi_draw_indirect_count` command reads763/// this value in order to determine how many indirect draw commands to764/// process.765pub indirect_parameters_count: u32,766767/// The offset within the `IndirectParametersBuffers::indexed_data` or768/// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw769/// command for this batch set.770///771/// The CPU fills out this value.772pub indirect_parameters_base: u32,773}774775/// The buffers containing all the information that indirect draw commands776/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.777///778/// In addition to the indirect draw buffers themselves, this structure contains779/// the buffers that store [`IndirectParametersGpuMetadata`], which are the780/// structures that culling writes to so that the indirect parameter building781/// pass can determine how many meshes are actually to be drawn.782///783/// These buffers will remain empty if indirect drawing isn't in use.784#[derive(Resource, Deref, DerefMut)]785pub struct IndirectParametersBuffers {786/// A mapping from a phase type ID to the indirect parameters buffers for787/// that phase.788///789/// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.790#[deref]791pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,792/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so793/// that they can be read back to CPU.794///795/// This is a debugging feature that may reduce performance. It primarily796/// exists for the `occlusion_culling` example.797pub allow_copies_from_indirect_parameter_buffers: bool,798}799800impl IndirectParametersBuffers {801/// Initializes a new [`IndirectParametersBuffers`] resource.802pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {803IndirectParametersBuffers {804buffers: TypeIdMap::default(),805allow_copies_from_indirect_parameter_buffers,806}807}808}809810/// The buffers containing all the information that indirect draw commands use811/// to draw the scene, for a single phase.812///813/// This is the version of the structure that has a type parameter, so that the814/// batching for different phases can run in parallel.815///816/// See the [`IndirectParametersBuffers`] documentation for more information.817#[derive(Resource)]818pub struct PhaseIndirectParametersBuffers<PI>819where820PI: PhaseItem,821{822/// The indirect draw buffers for the phase.823pub buffers: UntypedPhaseIndirectParametersBuffers,824phantom: PhantomData<PI>,825}826827impl<PI> PhaseIndirectParametersBuffers<PI>828where829PI: PhaseItem,830{831pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {832PhaseIndirectParametersBuffers {833buffers: UntypedPhaseIndirectParametersBuffers::new(834allow_copies_from_indirect_parameter_buffers,835),836phantom: PhantomData,837}838}839}840841/// The buffers containing all the information that indirect draw commands use842/// to draw the scene, for a single phase.843///844/// This is the version of the structure that doesn't have a type parameter, so845/// that it can be inserted into [`IndirectParametersBuffers::buffers`]846///847/// See the [`IndirectParametersBuffers`] documentation for more information.848pub struct UntypedPhaseIndirectParametersBuffers {849/// Information that indirect draw commands use to draw indexed meshes in850/// the scene.851pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,852/// Information that indirect draw commands use to draw non-indexed meshes853/// in the scene.854pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,855}856857impl UntypedPhaseIndirectParametersBuffers {858/// Creates the indirect parameters buffers.859pub fn new(860allow_copies_from_indirect_parameter_buffers: bool,861) -> UntypedPhaseIndirectParametersBuffers {862UntypedPhaseIndirectParametersBuffers {863non_indexed: MeshClassIndirectParametersBuffers::new(864allow_copies_from_indirect_parameter_buffers,865),866indexed: MeshClassIndirectParametersBuffers::new(867allow_copies_from_indirect_parameter_buffers,868),869}870}871872/// Reserves space for `count` new batches.873///874/// The `indexed` parameter specifies whether the meshes that these batches875/// correspond to are indexed or not.876pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {877if indexed {878self.indexed.allocate(count)879} else {880self.non_indexed.allocate(count)881}882}883884/// Returns the number of batches currently allocated.885///886/// The `indexed` parameter specifies whether the meshes that these batches887/// correspond to are indexed or not.888fn batch_count(&self, indexed: bool) -> usize {889if indexed {890self.indexed.batch_count()891} else {892self.non_indexed.batch_count()893}894}895896/// Returns the number of batch sets currently allocated.897///898/// The `indexed` parameter specifies whether the meshes that these batch899/// sets correspond to are indexed or not.900pub fn batch_set_count(&self, indexed: bool) -> usize {901if indexed {902self.indexed.batch_sets.len()903} else {904self.non_indexed.batch_sets.len()905}906}907908/// Adds a new batch set to `Self::indexed_batch_sets` or909/// `Self::non_indexed_batch_sets` as appropriate.910///911/// `indexed` specifies whether the meshes that these batch sets correspond912/// to are indexed or not. `indirect_parameters_base` specifies the offset913/// within `Self::indexed_data` or `Self::non_indexed_data` of the first914/// batch in this batch set.915#[inline]916pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {917if indexed {918self.indexed.batch_sets.push(IndirectBatchSet {919indirect_parameters_base,920indirect_parameters_count: 0,921});922} else {923self.non_indexed.batch_sets.push(IndirectBatchSet {924indirect_parameters_base,925indirect_parameters_count: 0,926});927}928}929930/// Returns the index that a newly-added batch set will have.931///932/// The `indexed` parameter specifies whether the meshes in such a batch set933/// are indexed or not.934pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {935NonMaxU32::new(self.batch_set_count(indexed) as u32)936}937938/// Clears out the buffers in preparation for a new frame.939pub fn clear(&mut self) {940self.indexed.clear();941self.non_indexed.clear();942}943}944945/// The buffers containing all the information that indirect draw commands use946/// to draw the scene, for a single mesh class (indexed or non-indexed), for a947/// single phase.948pub struct MeshClassIndirectParametersBuffers<IP>949where950IP: Clone + ShaderSize + WriteInto,951{952/// The GPU buffer that stores the indirect draw parameters for the meshes.953///954/// The indirect parameters building shader writes to this buffer, while the955/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from956/// it to perform the draws.957data: UninitBufferVec<IP>,958959/// The GPU buffer that holds the data used to construct indirect draw960/// parameters for meshes.961///962/// The GPU mesh preprocessing shader writes to this buffer, and the963/// indirect parameters building shader reads this buffer to construct the964/// indirect draw parameters.965cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,966967/// The GPU buffer that holds data built by the GPU used to construct968/// indirect draw parameters for meshes.969///970/// The GPU mesh preprocessing shader writes to this buffer, and the971/// indirect parameters building shader reads this buffer to construct the972/// indirect draw parameters.973gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,974975/// The GPU buffer that holds the number of indirect draw commands for each976/// phase of each view, for meshes.977///978/// The indirect parameters building shader writes to this buffer, and the979/// `multi_draw_indirect_count` command reads from it in order to know how980/// many indirect draw commands to process.981batch_sets: RawBufferVec<IndirectBatchSet>,982}983984impl<IP> MeshClassIndirectParametersBuffers<IP>985where986IP: Clone + ShaderSize + WriteInto,987{988fn new(989allow_copies_from_indirect_parameter_buffers: bool,990) -> MeshClassIndirectParametersBuffers<IP> {991let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;992if allow_copies_from_indirect_parameter_buffers {993indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;994}995996MeshClassIndirectParametersBuffers {997data: UninitBufferVec::new(indirect_parameter_buffer_usages),998cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),999gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),1000batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),1001}1002}10031004/// Returns the GPU buffer that stores the indirect draw parameters for1005/// indexed meshes.1006///1007/// The indirect parameters building shader writes to this buffer, while the1008/// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from1009/// it to perform the draws.1010#[inline]1011pub fn data_buffer(&self) -> Option<&Buffer> {1012self.data.buffer()1013}10141015/// Returns the GPU buffer that holds the CPU-constructed data used to1016/// construct indirect draw parameters for meshes.1017///1018/// The CPU writes to this buffer, and the indirect parameters building1019/// shader reads this buffer to construct the indirect draw parameters.1020#[inline]1021pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {1022self.cpu_metadata.buffer()1023}10241025/// Returns the GPU buffer that holds the GPU-constructed data used to1026/// construct indirect draw parameters for meshes.1027///1028/// The GPU mesh preprocessing shader writes to this buffer, and the1029/// indirect parameters building shader reads this buffer to construct the1030/// indirect draw parameters.1031#[inline]1032pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {1033self.gpu_metadata.buffer()1034}10351036/// Returns the GPU buffer that holds the number of indirect draw commands1037/// for each phase of each view.1038///1039/// The indirect parameters building shader writes to this buffer, and the1040/// `multi_draw_indirect_count` command reads from it in order to know how1041/// many indirect draw commands to process.1042#[inline]1043pub fn batch_sets_buffer(&self) -> Option<&Buffer> {1044self.batch_sets.buffer()1045}10461047/// Reserves space for `count` new batches.1048///1049/// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],1050/// and [`Self::data`] buffers.1051fn allocate(&mut self, count: u32) -> u32 {1052let length = self.data.len();1053self.cpu_metadata.reserve_internal(count as usize);1054self.gpu_metadata.add_multiple(count as usize);1055for _ in 0..count {1056self.data.add();1057self.cpu_metadata1058.push(IndirectParametersCpuMetadata::default());1059}1060length as u321061}10621063/// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given1064/// index.1065pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {1066self.cpu_metadata.set(index, value);1067}10681069/// Returns the number of batches corresponding to meshes that are currently1070/// allocated.1071#[inline]1072pub fn batch_count(&self) -> usize {1073self.data.len()1074}10751076/// Clears out all the buffers in preparation for a new frame.1077pub fn clear(&mut self) {1078self.data.clear();1079self.cpu_metadata.clear();1080self.gpu_metadata.clear();1081self.batch_sets.clear();1082}1083}10841085impl Default for IndirectParametersBuffers {1086fn default() -> Self {1087// By default, we don't allow GPU indirect parameter mapping, since1088// that's a debugging option.1089Self::new(false)1090}1091}10921093impl FromWorld for GpuPreprocessingSupport {1094fn from_world(world: &mut World) -> Self {1095let adapter = world.resource::<RenderAdapter>();1096let device = world.resource::<RenderDevice>();10971098// Filter Android drivers that are incompatible with GPU preprocessing:1099// - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer1100// than 730).1101// - We filter out Mali GPUs with driver versions lower than 48.1102fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool {1103crate::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730)1104|| crate::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48)1105}11061107let culling_feature_support = device1108.features()1109.contains(Features::INDIRECT_FIRST_INSTANCE | Features::IMMEDIATES);1110// Depth downsampling for occlusion culling requires 12 textures1111let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&1112// Even if the adapter supports compute, we might be simulating a lack of1113// compute via device limits (see `WgpuSettingsPriority::WebGL2` and1114// `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the1115// `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.1116device.limits().max_compute_workgroup_storage_size != 0;11171118let downlevel_support = adapter1119.get_downlevel_capabilities()1120.flags1121.contains(DownlevelFlags::COMPUTE_SHADERS);11221123let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info()));11241125let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 01126|| is_non_supported_android_device(&adapter_info)1127|| adapter_info.backend == wgpu::Backend::Gl1128{1129info!(1130"GPU preprocessing is not supported on this device. \1131Falling back to CPU preprocessing.",1132);1133GpuPreprocessingMode::None1134} else if !(culling_feature_support && limit_support && downlevel_support) {1135info!("Some GPU preprocessing are limited on this device.");1136GpuPreprocessingMode::PreprocessingOnly1137} else {1138info!("GPU preprocessing is fully supported on this device.");1139GpuPreprocessingMode::Culling1140};11411142GpuPreprocessingSupport { max_supported_mode }1143}1144}11451146impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>1147where1148BD: GpuArrayBufferable + Sync + Send + 'static,1149BDI: Pod + Sync + Send + Default + 'static,1150{1151/// Creates new buffers.1152pub fn new() -> Self {1153Self::default()1154}11551156/// Clears out the buffers in preparation for a new frame.1157pub fn clear(&mut self) {1158for phase_instance_buffer in self.phase_instance_buffers.values_mut() {1159phase_instance_buffer.clear();1160}1161}1162}11631164impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>1165where1166BD: GpuArrayBufferable + Sync + Send + 'static,1167{1168pub fn new() -> Self {1169UntypedPhaseBatchedInstanceBuffers {1170data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),1171work_item_buffers: HashMap::default(),1172late_indexed_indirect_parameters_buffer: RawBufferVec::new(1173BufferUsages::STORAGE | BufferUsages::INDIRECT,1174),1175late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(1176BufferUsages::STORAGE | BufferUsages::INDIRECT,1177),1178}1179}11801181/// Returns the binding of the buffer that contains the per-instance data.1182///1183/// This buffer needs to be filled in via a compute shader.1184pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {1185self.data_buffer1186.buffer()1187.map(|buffer| buffer.as_entire_binding())1188}11891190/// Clears out the buffers in preparation for a new frame.1191pub fn clear(&mut self) {1192self.data_buffer.clear();1193self.late_indexed_indirect_parameters_buffer.clear();1194self.late_non_indexed_indirect_parameters_buffer.clear();11951196// Clear each individual set of buffers, but don't depopulate the hash1197// table. We want to avoid reallocating these vectors every frame.1198for view_work_item_buffers in self.work_item_buffers.values_mut() {1199view_work_item_buffers.clear();1200}1201}1202}12031204impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>1205where1206BD: GpuArrayBufferable + Sync + Send + 'static,1207{1208fn default() -> Self {1209Self::new()1210}1211}12121213/// Information about a render batch that we're building up during a sorted1214/// render phase.1215struct SortedRenderBatch<F>1216where1217F: GetBatchData,1218{1219/// The index of the first phase item in this batch in the list of phase1220/// items.1221phase_item_start_index: u32,12221223/// The index of the first instance in this batch in the instance buffer.1224instance_start_index: u32,12251226/// True if the mesh in question has an index buffer; false otherwise.1227indexed: bool,12281229/// The index of the indirect parameters for this batch in the1230/// [`IndirectParametersBuffers`].1231///1232/// If CPU culling is being used, then this will be `None`.1233indirect_parameters_index: Option<NonMaxU32>,12341235/// Metadata that can be used to determine whether an instance can be placed1236/// into this batch.1237///1238/// If `None`, the item inside is unbatchable.1239meta: Option<BatchMeta<F::CompareData>>,1240}12411242impl<F> SortedRenderBatch<F>1243where1244F: GetBatchData,1245{1246/// Finalizes this batch and updates the [`SortedRenderPhase`] with the1247/// appropriate indices.1248///1249/// `instance_end_index` is the index of the last instance in this batch1250/// plus one.1251fn flush<I>(1252self,1253instance_end_index: u32,1254phase: &mut SortedRenderPhase<I>,1255phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,1256) where1257I: CachedRenderPipelinePhaseItem + SortedPhaseItem,1258{1259let (batch_range, batch_extra_index) =1260phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();1261*batch_range = self.instance_start_index..instance_end_index;1262*batch_extra_index = match self.indirect_parameters_index {1263Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {1264range: u32::from(indirect_parameters_index)1265..(u32::from(indirect_parameters_index) + 1),1266batch_set_index: None,1267},1268None => PhaseItemExtraIndex::None,1269};1270if let Some(indirect_parameters_index) = self.indirect_parameters_index {1271phase_indirect_parameters_buffers1272.add_batch_set(self.indexed, indirect_parameters_index.into());1273}1274}1275}12761277/// A system that runs early in extraction and clears out all the1278/// [`BatchedInstanceBuffers`] for the frame.1279///1280/// We have to run this during extraction because, if GPU preprocessing is in1281/// use, the extraction phase will write to the mesh input uniform buffers1282/// directly, so the buffers need to be cleared before then.1283pub fn clear_batched_gpu_instance_buffers<GFBD>(1284gpu_batched_instance_buffers: Option<1285ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,1286>,1287) where1288GFBD: GetFullBatchData,1289{1290// Don't clear the entire table, because that would delete the buffers, and1291// we want to reuse those allocations.1292if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {1293gpu_batched_instance_buffers.clear();1294}1295}12961297/// A system that removes GPU preprocessing work item buffers that correspond to1298/// deleted [`ExtractedView`]s.1299///1300/// This is a separate system from [`clear_batched_gpu_instance_buffers`]1301/// because [`ExtractedView`]s aren't created until after the extraction phase1302/// is completed.1303pub fn delete_old_work_item_buffers<GFBD>(1304mut gpu_batched_instance_buffers: ResMut<1305BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,1306>,1307extracted_views: Query<&ExtractedView>,1308) where1309GFBD: GetFullBatchData,1310{1311let retained_view_entities: HashSet<_> = extracted_views1312.iter()1313.map(|extracted_view| extracted_view.retained_view_entity)1314.collect();1315for phase_instance_buffers in gpu_batched_instance_buffers1316.phase_instance_buffers1317.values_mut()1318{1319phase_instance_buffers1320.work_item_buffers1321.retain(|retained_view_entity, _| {1322retained_view_entities.contains(retained_view_entity)1323});1324}1325}13261327/// Batch the items in a sorted render phase, when GPU instance buffer building1328/// is in use. This means comparing metadata needed to draw each phase item and1329/// trying to combine the draws into a batch.1330pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(1331mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,1332mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,1333mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,1334mut views: Query<(1335&ExtractedView,1336Has<NoIndirectDrawing>,1337Has<OcclusionCulling>,1338)>,1339system_param_item: StaticSystemParam<GFBD::Param>,1340) where1341I: CachedRenderPipelinePhaseItem + SortedPhaseItem,1342GFBD: GetFullBatchData,1343{1344// We only process GPU-built batch data in this function.1345let UntypedPhaseBatchedInstanceBuffers {1346ref mut data_buffer,1347ref mut work_item_buffers,1348ref mut late_indexed_indirect_parameters_buffer,1349ref mut late_non_indexed_indirect_parameters_buffer,1350} = phase_batched_instance_buffers.buffers;13511352for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {1353let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {1354continue;1355};13561357// Create the work item buffer if necessary.1358let work_item_buffer = get_or_create_work_item_buffer::<I>(1359work_item_buffers,1360extracted_view.retained_view_entity,1361no_indirect_drawing,1362gpu_occlusion_culling,1363);13641365// Initialize those work item buffers in preparation for this new frame.1366init_work_item_buffers(1367work_item_buffer,1368late_indexed_indirect_parameters_buffer,1369late_non_indexed_indirect_parameters_buffer,1370);13711372// Walk through the list of phase items, building up batches as we go.1373let mut batch: Option<SortedRenderBatch<GFBD>> = None;13741375for current_index in 0..phase.items.len() {1376// Get the index of the input data, and comparison metadata, for1377// this entity.1378let item = &phase.items[current_index];1379let entity = item.main_entity();1380let item_is_indexed = item.indexed();1381let current_batch_input_index =1382GFBD::get_index_and_compare_data(&system_param_item, entity);13831384// Unpack that index and metadata. Note that it's possible for index1385// and/or metadata to not be present, which signifies that this1386// entity is unbatchable. In that case, we break the batch here.1387// If the index isn't present the item is not part of this pipeline and so will be skipped.1388let Some((current_input_index, current_meta)) = current_batch_input_index else {1389// Break a batch if we need to.1390if let Some(batch) = batch.take() {1391batch.flush(1392data_buffer.len() as u32,1393phase,1394&mut phase_indirect_parameters_buffers.buffers,1395);1396}13971398continue;1399};1400let current_meta =1401current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));14021403// Determine if this entity can be included in the batch we're1404// building up.1405let can_batch = batch.as_ref().is_some_and(|batch| {1406// `None` for metadata indicates that the items are unbatchable.1407match (¤t_meta, &batch.meta) {1408(Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,1409(_, _) => false,1410}1411});14121413// Make space in the data buffer for this instance.1414let output_index = data_buffer.add() as u32;14151416// If we can't batch, break the existing batch and make a new one.1417if !can_batch {1418// Break a batch if we need to.1419if let Some(batch) = batch.take() {1420batch.flush(1421output_index,1422phase,1423&mut phase_indirect_parameters_buffers.buffers,1424);1425}14261427let indirect_parameters_index = if no_indirect_drawing {1428None1429} else if item_is_indexed {1430Some(1431phase_indirect_parameters_buffers1432.buffers1433.indexed1434.allocate(1),1435)1436} else {1437Some(1438phase_indirect_parameters_buffers1439.buffers1440.non_indexed1441.allocate(1),1442)1443};14441445// Start a new batch.1446if let Some(indirect_parameters_index) = indirect_parameters_index {1447GFBD::write_batch_indirect_parameters_metadata(1448item_is_indexed,1449output_index,1450None,1451&mut phase_indirect_parameters_buffers.buffers,1452indirect_parameters_index,1453);1454};14551456batch = Some(SortedRenderBatch {1457phase_item_start_index: current_index as u32,1458instance_start_index: output_index,1459indexed: item_is_indexed,1460indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),1461meta: current_meta,1462});1463}14641465// Add a new preprocessing work item so that the preprocessing1466// shader will copy the per-instance data over.1467if let Some(batch) = batch.as_ref() {1468work_item_buffer.push(1469item_is_indexed,1470PreprocessWorkItem {1471input_index: current_input_index.into(),1472output_or_indirect_parameters_index: match (1473no_indirect_drawing,1474batch.indirect_parameters_index,1475) {1476(true, _) => output_index,1477(false, Some(indirect_parameters_index)) => {1478indirect_parameters_index.into()1479}1480(false, None) => 0,1481},1482},1483);1484}1485}14861487// Flush the final batch if necessary.1488if let Some(batch) = batch.take() {1489batch.flush(1490data_buffer.len() as u32,1491phase,1492&mut phase_indirect_parameters_buffers.buffers,1493);1494}1495}1496}14971498/// Creates batches for a render phase that uses bins.1499pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(1500mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,1501phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,1502mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,1503mut views: Query<1504(1505&ExtractedView,1506Has<NoIndirectDrawing>,1507Has<OcclusionCulling>,1508),1509With<ExtractedView>,1510>,1511param: StaticSystemParam<GFBD::Param>,1512) where1513BPI: BinnedPhaseItem,1514GFBD: GetFullBatchData,1515{1516let system_param_item = param.into_inner();15171518let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();15191520let UntypedPhaseBatchedInstanceBuffers {1521ref mut data_buffer,1522ref mut work_item_buffers,1523ref mut late_indexed_indirect_parameters_buffer,1524ref mut late_non_indexed_indirect_parameters_buffer,1525} = phase_batched_instance_buffers.buffers;15261527for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {1528let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {1529continue;1530};15311532// Create the work item buffer if necessary; otherwise, just mark it as1533// used this frame.1534let work_item_buffer = get_or_create_work_item_buffer::<BPI>(1535work_item_buffers,1536extracted_view.retained_view_entity,1537no_indirect_drawing,1538gpu_occlusion_culling,1539);15401541// Initialize those work item buffers in preparation for this new frame.1542init_work_item_buffers(1543work_item_buffer,1544late_indexed_indirect_parameters_buffer,1545late_non_indexed_indirect_parameters_buffer,1546);15471548// Prepare multidrawables.15491550if let (1551&mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),1552&mut PreprocessWorkItemBuffers::Indirect {1553indexed: ref mut indexed_work_item_buffer,1554non_indexed: ref mut non_indexed_work_item_buffer,1555gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,1556},1557) = (&mut phase.batch_sets, &mut *work_item_buffer)1558{1559let mut output_index = data_buffer.len() as u32;15601561// Initialize the state for both indexed and non-indexed meshes.1562let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =1563MultidrawableBatchSetPreparer::new(1564phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,1565phase_indirect_parameters_buffers1566.buffers1567.indexed1568.batch_sets1569.len() as u32,1570);1571let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =1572MultidrawableBatchSetPreparer::new(1573phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,1574phase_indirect_parameters_buffers1575.buffers1576.non_indexed1577.batch_sets1578.len() as u32,1579);15801581// Prepare each batch set.1582for (batch_set_key, bins) in &phase.multidrawable_meshes {1583if batch_set_key.indexed() {1584indexed_preparer.prepare_multidrawable_binned_batch_set(1585bins,1586&mut output_index,1587data_buffer,1588indexed_work_item_buffer,1589&mut phase_indirect_parameters_buffers.buffers.indexed,1590batch_sets,1591);1592} else {1593non_indexed_preparer.prepare_multidrawable_binned_batch_set(1594bins,1595&mut output_index,1596data_buffer,1597non_indexed_work_item_buffer,1598&mut phase_indirect_parameters_buffers.buffers.non_indexed,1599batch_sets,1600);1601}1602}16031604// Reserve space in the occlusion culling buffers, if necessary.1605if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {1606gpu_occlusion_culling_buffers1607.late_indexed1608.add_multiple(indexed_preparer.work_item_count);1609gpu_occlusion_culling_buffers1610.late_non_indexed1611.add_multiple(non_indexed_preparer.work_item_count);1612}1613}16141615// Prepare batchables.16161617for (key, bin) in &phase.batchable_meshes {1618let mut batch: Option<BinnedRenderPhaseBatch> = None;1619for (&main_entity, &input_index) in bin.entities() {1620let output_index = data_buffer.add() as u32;16211622match batch {1623Some(ref mut batch) => {1624batch.instance_range.end = output_index + 1;16251626// Append to the current batch.1627//1628// If we're in indirect mode, then we write the first1629// output index of this batch, so that we have a1630// tightly-packed buffer if GPU culling discards some of1631// the instances. Otherwise, we can just write the1632// output index directly.1633work_item_buffer.push(1634key.0.indexed(),1635PreprocessWorkItem {1636input_index: *input_index,1637output_or_indirect_parameters_index: match (1638no_indirect_drawing,1639&batch.extra_index,1640) {1641(true, _) => output_index,1642(1643false,1644PhaseItemExtraIndex::IndirectParametersIndex {1645range: indirect_parameters_range,1646..1647},1648) => indirect_parameters_range.start,1649(false, &PhaseItemExtraIndex::DynamicOffset(_))1650| (false, &PhaseItemExtraIndex::None) => 0,1651},1652},1653);1654}16551656None if !no_indirect_drawing => {1657// Start a new batch, in indirect mode.1658let indirect_parameters_index = phase_indirect_parameters_buffers1659.buffers1660.allocate(key.0.indexed(), 1);1661let batch_set_index = phase_indirect_parameters_buffers1662.buffers1663.get_next_batch_set_index(key.0.indexed());16641665GFBD::write_batch_indirect_parameters_metadata(1666key.0.indexed(),1667output_index,1668batch_set_index,1669&mut phase_indirect_parameters_buffers.buffers,1670indirect_parameters_index,1671);1672work_item_buffer.push(1673key.0.indexed(),1674PreprocessWorkItem {1675input_index: *input_index,1676output_or_indirect_parameters_index: indirect_parameters_index,1677},1678);1679batch = Some(BinnedRenderPhaseBatch {1680representative_entity: (Entity::PLACEHOLDER, main_entity),1681instance_range: output_index..output_index + 1,1682extra_index: PhaseItemExtraIndex::IndirectParametersIndex {1683range: indirect_parameters_index..(indirect_parameters_index + 1),1684batch_set_index: None,1685},1686});1687}16881689None => {1690// Start a new batch, in direct mode.1691work_item_buffer.push(1692key.0.indexed(),1693PreprocessWorkItem {1694input_index: *input_index,1695output_or_indirect_parameters_index: output_index,1696},1697);1698batch = Some(BinnedRenderPhaseBatch {1699representative_entity: (Entity::PLACEHOLDER, main_entity),1700instance_range: output_index..output_index + 1,1701extra_index: PhaseItemExtraIndex::None,1702});1703}1704}1705}17061707if let Some(batch) = batch {1708match phase.batch_sets {1709BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {1710error!("Dynamic uniform batch sets shouldn't be used here");1711}1712BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {1713vec.push(batch);1714}1715BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {1716// The Bevy renderer will never mark a mesh as batchable1717// but not multidrawable if multidraw is in use.1718// However, custom render pipelines might do so, such as1719// the `specialized_mesh_pipeline` example.1720vec.push(BinnedRenderPhaseBatchSet {1721first_batch: batch,1722batch_count: 1,1723bin_key: key.1.clone(),1724index: phase_indirect_parameters_buffers1725.buffers1726.batch_set_count(key.0.indexed())1727as u32,1728});1729}1730}1731}1732}17331734// Prepare unbatchables.1735for (key, unbatchables) in &mut phase.unbatchable_meshes {1736// Allocate the indirect parameters if necessary.1737let mut indirect_parameters_offset = if no_indirect_drawing {1738None1739} else if key.0.indexed() {1740Some(1741phase_indirect_parameters_buffers1742.buffers1743.indexed1744.allocate(unbatchables.entities.len() as u32),1745)1746} else {1747Some(1748phase_indirect_parameters_buffers1749.buffers1750.non_indexed1751.allocate(unbatchables.entities.len() as u32),1752)1753};17541755for main_entity in unbatchables.entities.keys() {1756let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)1757else {1758continue;1759};1760let output_index = data_buffer.add() as u32;17611762if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {1763// We're in indirect mode, so add an indirect parameters1764// index.1765GFBD::write_batch_indirect_parameters_metadata(1766key.0.indexed(),1767output_index,1768None,1769&mut phase_indirect_parameters_buffers.buffers,1770*indirect_parameters_index,1771);1772work_item_buffer.push(1773key.0.indexed(),1774PreprocessWorkItem {1775input_index: input_index.into(),1776output_or_indirect_parameters_index: *indirect_parameters_index,1777},1778);1779unbatchables1780.buffer_indices1781.add(UnbatchableBinnedEntityIndices {1782instance_index: *indirect_parameters_index,1783extra_index: PhaseItemExtraIndex::IndirectParametersIndex {1784range: *indirect_parameters_index..(*indirect_parameters_index + 1),1785batch_set_index: None,1786},1787});1788phase_indirect_parameters_buffers1789.buffers1790.add_batch_set(key.0.indexed(), *indirect_parameters_index);1791*indirect_parameters_index += 1;1792} else {1793work_item_buffer.push(1794key.0.indexed(),1795PreprocessWorkItem {1796input_index: input_index.into(),1797output_or_indirect_parameters_index: output_index,1798},1799);1800unbatchables1801.buffer_indices1802.add(UnbatchableBinnedEntityIndices {1803instance_index: output_index,1804extra_index: PhaseItemExtraIndex::None,1805});1806}1807}1808}1809}1810}18111812/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct1813/// multidrawable batch sets.1814///1815/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:1816/// one for indexed meshes and one for non-indexed meshes.1817struct MultidrawableBatchSetPreparer<BPI, GFBD>1818where1819BPI: BinnedPhaseItem,1820GFBD: GetFullBatchData,1821{1822/// The offset in the indirect parameters buffer at which the next indirect1823/// parameters will be written.1824indirect_parameters_index: u32,1825/// The number of batch sets we've built so far for this mesh class.1826batch_set_index: u32,1827/// The number of work items we've emitted so far for this mesh class.1828work_item_count: usize,1829phantom: PhantomData<(BPI, GFBD)>,1830}18311832impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>1833where1834BPI: BinnedPhaseItem,1835GFBD: GetFullBatchData,1836{1837/// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing1838/// indirect parameters and batch sets at the given indices.1839#[inline]1840fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {1841MultidrawableBatchSetPreparer {1842indirect_parameters_index: initial_indirect_parameters_index,1843batch_set_index: initial_batch_set_index,1844work_item_count: 0,1845phantom: PhantomData,1846}1847}18481849/// Creates batch sets and writes the GPU data needed to draw all visible1850/// entities of one mesh class in the given batch set.1851///1852/// The *mesh class* represents whether the mesh has indices or not.1853#[inline]1854fn prepare_multidrawable_binned_batch_set<IP>(1855&mut self,1856bins: &IndexMap<BPI::BinKey, RenderBin>,1857output_index: &mut u32,1858data_buffer: &mut UninitBufferVec<GFBD::BufferData>,1859indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,1860mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,1861batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,1862) where1863IP: Clone + ShaderSize + WriteInto,1864{1865let current_indexed_batch_set_index = self.batch_set_index;1866let current_output_index = *output_index;18671868let indirect_parameters_base = self.indirect_parameters_index;18691870// We're going to write the first entity into the batch set. Do this1871// here so that we can preload the bin into cache as a side effect.1872let Some((first_bin_key, first_bin)) = bins.iter().next() else {1873return;1874};1875let first_bin_len = first_bin.entities().len();1876let first_bin_entity = first_bin1877.entities()1878.keys()1879.next()1880.copied()1881.unwrap_or(MainEntity::from(Entity::PLACEHOLDER));18821883// Traverse the batch set, processing each bin.1884for bin in bins.values() {1885// Record the first output index for this batch, as well as its own1886// index.1887mesh_class_buffers1888.cpu_metadata1889.push(IndirectParametersCpuMetadata {1890base_output_index: *output_index,1891batch_set_index: self.batch_set_index,1892});18931894// Traverse the bin, pushing `PreprocessWorkItem`s for each entity1895// within it. This is a hot loop, so make it as fast as possible.1896for &input_index in bin.entities().values() {1897indexed_work_item_buffer.push(PreprocessWorkItem {1898input_index: *input_index,1899output_or_indirect_parameters_index: self.indirect_parameters_index,1900});1901}19021903// Reserve space for the appropriate number of entities in the data1904// buffer. Also, advance the output index and work item count.1905let bin_entity_count = bin.entities().len();1906data_buffer.add_multiple(bin_entity_count);1907*output_index += bin_entity_count as u32;1908self.work_item_count += bin_entity_count;19091910self.indirect_parameters_index += 1;1911}19121913// Reserve space for the bins in this batch set in the GPU buffers.1914let bin_count = bins.len();1915mesh_class_buffers.gpu_metadata.add_multiple(bin_count);1916mesh_class_buffers.data.add_multiple(bin_count);19171918// Write the information the GPU will need about this batch set.1919mesh_class_buffers.batch_sets.push(IndirectBatchSet {1920indirect_parameters_base,1921indirect_parameters_count: 0,1922});19231924self.batch_set_index += 1;19251926// Record the batch set. The render node later processes this record to1927// render the batches.1928batch_sets.push(BinnedRenderPhaseBatchSet {1929first_batch: BinnedRenderPhaseBatch {1930representative_entity: (Entity::PLACEHOLDER, first_bin_entity),1931instance_range: current_output_index..(current_output_index + first_bin_len as u32),1932extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(1933indirect_parameters_base,1934)),1935},1936bin_key: (*first_bin_key).clone(),1937batch_count: self.indirect_parameters_index - indirect_parameters_base,1938index: current_indexed_batch_set_index,1939});1940}1941}19421943/// A system that gathers up the per-phase GPU buffers and inserts them into the1944/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.1945///1946/// This runs after the [`batch_and_prepare_binned_render_phase`] or1947/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase1948/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]1949/// resources and inserts them into the global [`BatchedInstanceBuffers`] and1950/// [`IndirectParametersBuffers`] tables.1951///1952/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and1953/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one1954/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and1955/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in1956/// parallel.1957pub fn collect_buffers_for_phase<PI, GFBD>(1958mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,1959mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,1960mut batched_instance_buffers: ResMut<1961BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,1962>,1963mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,1964) where1965PI: PhaseItem,1966GFBD: GetFullBatchData + Send + Sync + 'static,1967{1968// Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace1969// the contents of the per-phase resource with the old batched instance1970// buffers in order to reuse allocations.1971let untyped_phase_batched_instance_buffers =1972mem::take(&mut phase_batched_instance_buffers.buffers);1973if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers1974.phase_instance_buffers1975.insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)1976{1977old_untyped_phase_batched_instance_buffers.clear();1978phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;1979}19801981// Insert the `PhaseIndirectParametersBuffers` into the global table.1982// Replace the contents of the per-phase resource with the old indirect1983// parameters buffers in order to reuse allocations.1984let untyped_phase_indirect_parameters_buffers = mem::replace(1985&mut phase_indirect_parameters_buffers.buffers,1986UntypedPhaseIndirectParametersBuffers::new(1987indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,1988),1989);1990if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers1991.insert(1992TypeId::of::<PI>(),1993untyped_phase_indirect_parameters_buffers,1994)1995{1996old_untyped_phase_indirect_parameters_buffers.clear();1997phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;1998}1999}20002001/// A system that writes all instance buffers to the GPU.2002pub fn write_batched_instance_buffers<GFBD>(2003render_device: Res<RenderDevice>,2004render_queue: Res<RenderQueue>,2005gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,2006) where2007GFBD: GetFullBatchData,2008{2009let BatchedInstanceBuffers {2010current_input_buffer,2011previous_input_buffer,2012phase_instance_buffers,2013} = gpu_array_buffer.into_inner();20142015let render_device = &*render_device;2016let render_queue = &*render_queue;20172018ComputeTaskPool::get().scope(|scope| {2019scope.spawn(async {2020let _span = bevy_log::info_span!("write_current_input_buffers").entered();2021current_input_buffer2022.buffer2023.write_buffer(render_device, render_queue);2024});2025scope.spawn(async {2026let _span = bevy_log::info_span!("write_previous_input_buffers").entered();2027previous_input_buffer2028.buffer2029.write_buffer(render_device, render_queue);2030});20312032for phase_instance_buffers in phase_instance_buffers.values_mut() {2033let UntypedPhaseBatchedInstanceBuffers {2034ref mut data_buffer,2035ref mut work_item_buffers,2036ref mut late_indexed_indirect_parameters_buffer,2037ref mut late_non_indexed_indirect_parameters_buffer,2038} = *phase_instance_buffers;20392040scope.spawn(async {2041let _span = bevy_log::info_span!("write_phase_instance_buffers").entered();2042data_buffer.write_buffer(render_device);2043late_indexed_indirect_parameters_buffer.write_buffer(render_device, render_queue);2044late_non_indexed_indirect_parameters_buffer2045.write_buffer(render_device, render_queue);2046});20472048for phase_work_item_buffers in work_item_buffers.values_mut() {2049scope.spawn(async {2050let _span = bevy_log::info_span!("write_work_item_buffers").entered();2051match *phase_work_item_buffers {2052PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {2053buffer_vec.write_buffer(render_device, render_queue);2054}2055PreprocessWorkItemBuffers::Indirect {2056ref mut indexed,2057ref mut non_indexed,2058ref mut gpu_occlusion_culling,2059} => {2060indexed.write_buffer(render_device, render_queue);2061non_indexed.write_buffer(render_device, render_queue);20622063if let Some(GpuOcclusionCullingWorkItemBuffers {2064ref mut late_indexed,2065ref mut late_non_indexed,2066late_indirect_parameters_indexed_offset: _,2067late_indirect_parameters_non_indexed_offset: _,2068}) = *gpu_occlusion_culling2069{2070if !late_indexed.is_empty() {2071late_indexed.write_buffer(render_device);2072}2073if !late_non_indexed.is_empty() {2074late_non_indexed.write_buffer(render_device);2075}2076}2077}2078}2079});2080}2081}2082});2083}20842085pub fn clear_indirect_parameters_buffers(2086mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,2087) {2088for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {2089phase_indirect_parameters_buffers.clear();2090}2091}20922093pub fn write_indirect_parameters_buffers(2094render_device: Res<RenderDevice>,2095render_queue: Res<RenderQueue>,2096mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,2097) {2098let render_device = &*render_device;2099let render_queue = &*render_queue;2100ComputeTaskPool::get().scope(|scope| {2101for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {2102scope.spawn(async {2103let _span = bevy_log::info_span!("indexed_data").entered();2104phase_indirect_parameters_buffers2105.indexed2106.data2107.write_buffer(render_device);2108});2109scope.spawn(async {2110let _span = bevy_log::info_span!("non_indexed_data").entered();2111phase_indirect_parameters_buffers2112.non_indexed2113.data2114.write_buffer(render_device);2115});21162117scope.spawn(async {2118let _span = bevy_log::info_span!("indexed_cpu_metadata").entered();2119phase_indirect_parameters_buffers2120.indexed2121.cpu_metadata2122.write_buffer(render_device, render_queue);2123});2124scope.spawn(async {2125let _span = bevy_log::info_span!("non_indexed_cpu_metadata").entered();2126phase_indirect_parameters_buffers2127.non_indexed2128.cpu_metadata2129.write_buffer(render_device, render_queue);2130});21312132scope.spawn(async {2133let _span = bevy_log::info_span!("non_indexed_gpu_metadata").entered();2134phase_indirect_parameters_buffers2135.non_indexed2136.gpu_metadata2137.write_buffer(render_device);2138});2139scope.spawn(async {2140let _span = bevy_log::info_span!("indexed_gpu_metadata").entered();2141phase_indirect_parameters_buffers2142.indexed2143.gpu_metadata2144.write_buffer(render_device);2145});21462147scope.spawn(async {2148let _span = bevy_log::info_span!("indexed_batch_sets").entered();2149phase_indirect_parameters_buffers2150.indexed2151.batch_sets2152.write_buffer(render_device, render_queue);2153});2154scope.spawn(async {2155let _span = bevy_log::info_span!("non_indexed_batch_sets").entered();2156phase_indirect_parameters_buffers2157.non_indexed2158.batch_sets2159.write_buffer(render_device, render_queue);2160});2161}2162});2163}21642165#[cfg(test)]2166mod tests {2167use super::*;21682169#[test]2170fn instance_buffer_correct_behavior() {2171let mut instance_buffer = InstanceInputUniformBuffer::new();21722173let index = instance_buffer.add(2);2174instance_buffer.remove(index);2175assert_eq!(instance_buffer.get_unchecked(index), 2);2176assert_eq!(instance_buffer.get(index), None);21772178instance_buffer.add(5);2179assert_eq!(instance_buffer.buffer().len(), 1);2180}2181}218221832184