Path: blob/main/crates/bevy_render/src/render_resource/sparse_buffer_vec.rs
30636 views
//! GPU buffers that support sparse updates if only a small number of elements1//! have changed.23use alloc::sync::{Arc, Weak};4use core::{5iter, slice,6sync::atomic::{AtomicU64, Ordering},7};89use bevy_app::{App, Plugin};10use bevy_asset::{embedded_asset, load_embedded_asset, Handle};11use bevy_derive::{Deref, DerefMut};12use bevy_ecs::{13resource::Resource,14schedule::IntoScheduleConfigs as _,15system::{Res, ResMut},16world::{FromWorld, World},17};18use bevy_log::{error, info};19use bevy_material::{20bind_group_layout_entries::{21binding_types::{storage_buffer, storage_buffer_read_only, uniform_buffer},22BindGroupLayoutEntries,23},24descriptor::{BindGroupLayoutDescriptor, CachedComputePipelineId, ComputePipelineDescriptor},25};26use bevy_shader::Shader;27use bytemuck::{Pod, Zeroable};28use encase::ShaderType;29use weak_table::WeakKeyHashMap;30use wgpu::{BufferDescriptor, BufferUsages, ComputePassDescriptor, ShaderStages};3132use crate::{33diagnostic::{DiagnosticsRecorder, RecordDiagnostics as _},34render_resource::{35AtomicPod, BindGroup, BindGroupEntries, Buffer, PipelineCache, RawBufferVec,36SpecializedComputePipeline, SpecializedComputePipelines, UniformBuffer,37},38renderer::{RenderDevice, RenderGraph, RenderGraphSystems, RenderQueue},39ExtractSchedule, RenderApp,40};4142/// A plugin that allows sparse updates of GPU buffers if only a small number of43/// elements have changed.44pub struct SparseBufferPlugin;4546impl Plugin for SparseBufferPlugin {47fn build(&self, app: &mut App) {48embedded_asset!(app, "sparse_buffer_update.wgsl");49}5051fn finish(&self, app: &mut App) {52let Some(render_app) = app.get_sub_app_mut(RenderApp) else {53return;54};5556render_app57.init_resource::<SparseBufferUpdateJobs>()58.init_resource::<SparseBufferUpdatePipelines>()59.init_resource::<SpecializedComputePipelines<SparseBufferUpdatePipelines>>()60.init_resource::<SparseBufferUpdateBindGroups>()61.add_systems(ExtractSchedule, clear_sparse_buffer_jobs)62.add_systems(63RenderGraph,64// We perform sparse buffer updates very early so that sparse65// buffers can be used in any render pass.66update_sparse_buffers.in_set(RenderGraphSystems::Begin),67);68}69}7071/// A globally-unique ID that identifies this sparse buffer.72#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Deref, DerefMut)]73pub struct SparseBufferId(pub u64);7475/// An object that allows the sparse buffer ID to be query and holds the bind76/// group for that sparse buffer alive.77///78/// Each sparse buffer holds a strong reference to this handle, and the79/// [`SparseBufferUpdateBindGroups`] resource contains a weak map from this80/// handle to the bind group. This setup ensures that, when the sparse buffer is81/// freed, the bind groups for that sparse buffer are freed as well.82pub type SparseBufferHandle = Arc<SparseBufferId>;8384/// The next sparse buffer ID to be assigned.85static NEXT_SPARSE_BUFFER_ID: AtomicU64 = AtomicU64::new(0);8687/// The size of a single workgroup in the sparse buffer shader.88const SPARSE_BUFFER_UPDATE_WORKGROUP_SIZE: u32 = 256;8990/// The fraction of the buffer that may be changed before we fall back to full91/// reupload.92///93/// This is set to 15% by default. This was obtained experimentally by testing94/// very large scenes and roughly matches the values used by other engines that95/// perform sparse buffer updates.96const SPARSE_UPLOAD_THRESHOLD: f64 = 0.15;9798/// The WebGPU limit on the number of workgroups that can be dispatched.99const MAX_WORKGROUPS: u32 = 65535;100101/// We round all allocations up to the nearest power of this.102const REALLOCATION_FACTOR: f64 = 1.5;103/// We round all allocations up to the nearest multiple of this.104const REALLOCATION_SIZE_MULTIPLE: usize = 256;105106/// The number of dirty-page bits packed into each [`AtomicU64`] word.107const PAGES_PER_DIRTY_WORD: u32 = 64;108109/// Pipelines for the sparse buffer update shader.110///111/// This shader is shared among all sparse buffer vectors.112#[derive(Resource)]113pub struct SparseBufferUpdatePipelines {114/// The bind group layout.115///116/// We only have one bind group layout shared among all sparse buffer117/// vectors.118bind_group_layout: Option<BindGroupLayoutDescriptor>,119/// The shader that performs the scatter operation.120shader: Option<Handle<Shader>>,121}122123/// A resource, part of the render world, that stores the bind groups for each124/// sparse buffer.125#[derive(Resource)]126pub struct SparseBufferUpdateBindGroups {127/// The bind groups for each sparse buffer.128///129/// These are stored in a weak map so that when the sparse buffer goes away,130/// the bind group for that buffer goes away as well.131bind_groups: WeakKeyHashMap<Weak<SparseBufferId>, SparseBufferUpdateBindGroup>,132/// The ID of the update shader pipeline shared among all sparse buffers.133pipeline_id: CachedComputePipelineId,134}135136/// A single bind group for the sparse buffer update shader.137pub struct SparseBufferUpdateBindGroup {138/// The actual bind group.139bind_group: BindGroup,140}141142/// A resource, part of the render world, that stores all pending sparse updates143/// to buffers.144#[derive(Resource, Default, Deref, DerefMut)]145pub struct SparseBufferUpdateJobs(pub Vec<SparseBufferUpdateJob>);146147/// Describes a sparse update operation for a buffer.148pub struct SparseBufferUpdateJob {149/// A handle to the buffer to be updated.150sparse_buffer_handle: SparseBufferHandle,151/// The number of pages to update.152updated_page_count: u32,153/// The base-2 logarithm of the size of a page for the buffer.154///155/// The actual page size can be computed as `1 << page_size_log2`.156page_size_log2: u32,157/// The size of each element in 32-bit words.158element_word_size: u32,159/// A debugging label for the buffer.160label: Arc<str>,161}162163impl SparseBufferUpdateJob {164/// The number of elements per page.165fn page_size(&self) -> u32 {1661 << self.page_size_log2167}168169/// Calculates the number of words that need to be updated.170fn words_to_update(&self) -> u32 {171self.updated_page_count * self.page_size() * self.element_word_size172}173174/// Calculates the number of workgroups that need to be dispatched.175fn workgroup_count(&self) -> u32 {176self.words_to_update()177.div_ceil(SPARSE_BUFFER_UPDATE_WORKGROUP_SIZE)178}179}180181/// A GPU type that describes a sparse update that is to be performed.182#[derive(Clone, Copy, Default, ShaderType, Pod, Zeroable)]183#[repr(C)]184struct GpuSparseBufferUpdateMetadata {185/// The size of a single element in 32-bit words.186element_size: u32,187/// The number of pages that need to be updated.188updated_page_count: u32,189/// The base-2 logarithm of the page size.190///191/// That is, the page size is `1 << page_size_log2`.192page_size_log2: u32,193}194195/// A system, part of the render graph, that performs sparse buffer updates to196/// buffers for which only a small number of elements have changed.197///198/// This runs as early in the pipeline as possible so that sparse buffers can be199/// used for any subsequent pass.200fn update_sparse_buffers(201sparse_buffer_update_jobs: Res<SparseBufferUpdateJobs>,202sparse_buffer_update_bind_groups: Res<SparseBufferUpdateBindGroups>,203pipeline_cache: Res<PipelineCache>,204mut diagnostics: Option<ResMut<DiagnosticsRecorder>>,205render_device: Res<RenderDevice>,206render_queue: Res<RenderQueue>,207) {208// Bail if we have nothing to do.209if sparse_buffer_update_jobs.is_empty() {210return;211}212213// We need to create a command encoder since this pass isn't associated with214// a view.215let mut command_encoder =216render_device.create_command_encoder(&wgpu::CommandEncoderDescriptor {217label: Some("sparse buffer update"),218});219220let time_span = diagnostics221.as_mut()222.map(|diagnostics| diagnostics.time_span(&mut command_encoder, "sparse buffer update"));223224command_encoder.push_debug_group("sparse buffer update");225226let Some(compute_pipeline) =227pipeline_cache.get_compute_pipeline(sparse_buffer_update_bind_groups.pipeline_id)228else {229return;230};231232// Process each sparse buffer update job.233for sparse_buffer_update_job in sparse_buffer_update_jobs.iter() {234let Some(sparse_buffer_update_bind_group) = sparse_buffer_update_bind_groups235.bind_groups236.get(&sparse_buffer_update_job.sparse_buffer_handle)237else {238continue;239};240241let mut sparse_buffer_update_pass =242command_encoder.begin_compute_pass(&ComputePassDescriptor {243label: Some(&*format!(244"sparse buffer update ({})",245&sparse_buffer_update_job.label246)),247timestamp_writes: None,248});249sparse_buffer_update_pass.set_pipeline(compute_pipeline);250sparse_buffer_update_pass.set_bind_group(2510,252&sparse_buffer_update_bind_group.bind_group,253&[],254);255sparse_buffer_update_pass.dispatch_workgroups(256sparse_buffer_update_job.workgroup_count(),2571,2581,259);260}261262command_encoder.pop_debug_group();263if let Some(time_span) = time_span {264time_span.end(&mut command_encoder);265}266267render_queue.submit([command_encoder.finish()]);268}269270/// A system that clears out the sparse buffer update jobs in preparation for a271/// new frame.272fn clear_sparse_buffer_jobs(mut sparse_buffer_update_jobs: ResMut<SparseBufferUpdateJobs>) {273sparse_buffer_update_jobs.clear();274}275276impl FromWorld for SparseBufferUpdatePipelines {277fn from_world(world: &mut World) -> Self {278let render_device = world.resource::<RenderDevice>();279let limit = render_device.limits().max_storage_buffers_per_shader_stage;280281if limit < 3 {282info!(283"Sparse buffer updates disabled. RenderDevice lacks support: max_storage_buffers_per_shader_stage ({}) < 3.",284limit285);286287return SparseBufferUpdatePipelines {288bind_group_layout: None,289shader: None,290};291}292293let bind_group_layout = BindGroupLayoutDescriptor::new(294"sparse buffer update bind group layout",295&BindGroupLayoutEntries::sequential(296ShaderStages::COMPUTE,297(298// @group(0) @binding(0) var<storage, read_write> dest_buffer: array<u32>;299storage_buffer::<u32>(false),300// @group(0) @binding(1) var<storage> src_buffer: array<u32>;301storage_buffer_read_only::<u32>(false),302// @group(0) @binding(2) var<storage> indices: array<u32>;303storage_buffer_read_only::<u32>(false),304// @group(0) @binding(3) var<uniform> metadata:305// SparseBufferUpdateMetadata;306uniform_buffer::<GpuSparseBufferUpdateMetadata>(false),307),308),309);310311SparseBufferUpdatePipelines {312bind_group_layout: Some(bind_group_layout),313shader: Some(load_embedded_asset!(world, "sparse_buffer_update.wgsl")),314}315}316}317318impl SpecializedComputePipeline for SparseBufferUpdatePipelines {319type Key = ();320321fn specialize(&self, _: Self::Key) -> ComputePipelineDescriptor {322ComputePipelineDescriptor {323label: Some("sparse buffer update pipeline".into()),324layout: self.bind_group_layout.clone().into_iter().collect(),325shader: self.shader.clone().unwrap_or_default(),326shader_defs: vec![],327..ComputePipelineDescriptor::default()328}329}330}331332/// The buffers that we use to sparsely scatter new data to the GPU.333///334/// There's one such set of buffers per sparse buffer vector.335struct SparseBufferStagingBuffers {336/// All pages that have changed and need to be updated.337source_data: RawBufferVec<u32>,338339/// The index at which we write each page in [`Self::source_data`].340///341/// The length of this buffer is equal to [`Self::source_data`] divided by342/// 2^[`Self::page_size_log2`].343indices: RawBufferVec<u32>,344345/// The size of each element in 32-bit words.346element_word_size: u32,347348/// The base-2 logarithm of the page size in elements.349///350/// That is, the page size in elements is `1 << page_size_log2`.351page_size_log2: u32,352}353354impl SparseBufferStagingBuffers {355/// The number of elements per page.356fn page_size(&self) -> usize {3571 << self.page_size_log2358}359360/// Creates a new set of staging buffers for a sparse buffer vector.361fn new(label: &str, element_word_size: u32, page_size_log2: u32) -> SparseBufferStagingBuffers {362let mut source_data_buffer =363RawBufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);364source_data_buffer.set_label(Some(&*format!("{} staging buffer", label)));365366let mut indices_buffer = RawBufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);367indices_buffer.set_label(Some(&*format!("{} index buffer", label)));368369SparseBufferStagingBuffers {370source_data: source_data_buffer,371indices: indices_buffer,372element_word_size,373page_size_log2,374}375}376377/// Returns the number of updated pages.378fn updated_page_count(&self) -> u32 {379// Note that we don't have to round up here because data is always380// uploaded in increments of a whole page.381let element_count = self.source_data.len() / self.element_word_size as usize;382(element_count / self.page_size()) as u32383}384385/// Writes the buffers that contain all the data necessary to perform a386/// sparse upload to the GPU.387///388/// This includes the buffer associated with the supplied389/// `metadata_uniform`.390fn write_buffers(391&mut self,392metadata_uniform: &mut UniformBuffer<GpuSparseBufferUpdateMetadata>,393render_device: &RenderDevice,394render_queue: &RenderQueue,395) {396metadata_uniform.get_mut().updated_page_count = self.updated_page_count();397metadata_uniform.write_buffer(render_device, render_queue);398399self.source_data.write_buffer(render_device, render_queue);400self.indices.write_buffer(render_device, render_queue);401}402403/// Returns true if a sparse buffer update should *not* be performed because404/// too many words changed.405fn should_perform_full_reupload(&self, changed_page_count: u32, buffer_length: usize) -> bool {406// Calculate the number of changed words. If it's greater than the407// maximum number of workgroups as defined by `wgpu`, we must perform a408// full reupload.409let total_changed_word_count =410changed_page_count * self.page_size() as u32 * self.element_word_size;411if total_changed_word_count > MAX_WORKGROUPS * SPARSE_BUFFER_UPDATE_WORKGROUP_SIZE {412return true;413}414415// Don't perform a sparse upload if too many words changed, as it'll end416// up being slower than just uploading the whole buffer afresh.417let sparse_upload_fraction =418changed_page_count as f64 / buffer_length.div_ceil(self.page_size()) as f64;419sparse_upload_fraction > SPARSE_UPLOAD_THRESHOLD420}421}422423/// A GPU buffer that can grow, can be updated atomically from multiple threads424/// on the CPU, and is sparsely updated on the GPU if only a small number of425/// elements have changed.426///427/// This type is similar to428/// [`crate::render_resource::buffer_vec::AtomicRawBufferVec`], but instead of429/// reuploading the entire buffer to the GPU when it's changed, it tracks430/// changes on a per-page level and uploads only the pages that changed if the431/// number of such pages is small. It uses a compute shader to scatter the432/// changed pages.433///434/// As the stored data is [`AtomicPod`], multiple threads may update the buffer435/// simultaneously. Note that, like436/// [`crate::render_resource::buffer_vec::AtomicRawBufferVec`], only existing437/// elements may be updated from multiple threads; new data still requires438/// exclusive access.439///440/// `T` must have a size that's a multiple of 4.441pub struct AtomicSparseBufferVec<T>442where443T: AtomicPod,444{445/// An ID that uniquely identifies this [`AtomicSparseBufferVec`].446handle: SparseBufferHandle,447/// The underlying values.448///449/// These are stored as their blob representation to allow for thread-safe450/// update.451values: Vec<T::Blob>,452/// The GPU buffer, if allocated.453data_buffer: Option<Buffer>,454/// The GPU buffers that data is copied into in preparation to be scattered455/// to the [`Self::data_buffer`].456staging_buffers: SparseBufferStagingBuffers,457/// A GPU buffer that stores information such as the element size and stride458/// that's needed to perform sparse updates.459metadata_uniform: UniformBuffer<GpuSparseBufferUpdateMetadata>,460/// The capacity of the GPU buffer in elements.461capacity: usize,462/// The allowed `wgpu` buffer usages for the GPU buffer.463buffer_usages: BufferUsages,464/// An optional debug label to identify this buffer.465label: Arc<str>,466/// A bit set of dirty pages.467///468/// The size of this vector in bits is the number of elements divided by the469/// page size, rounded up. A 1 in a bit indicates that the page has changed470/// since the last upload, while a 0 indicates that the page hasn't changed.471dirty_pages: Vec<AtomicU64>,472/// True if the entire buffer needs to be reuploaded because it resized.473needs_full_reupload: bool,474/// True if a sparse update is to be performed.475sparse_update_scheduled: bool,476}477478impl<T> AtomicSparseBufferVec<T>479where480T: AtomicPod,481{482/// The number of elements per page.483fn page_size(&self) -> u32 {4841 << self.staging_buffers.page_size_log2485}486487/// Creates a new [`AtomicSparseBufferVec`] with the given set of buffer488/// usages, page size, and label.489///490/// `buffer_usages` specifies the set of allowed `wgpu` buffer usages for491/// the buffer that [`AtomicSparseBufferVec`] manages.492/// `BufferUsages::COPY_DST` is automatically added to this set.493///494/// The `page_size_log2` parameter is the base-2 logarithm of the page size.495/// That is, the page size is `1 << page_size_log2`.496pub fn new(buffer_usages: BufferUsages, page_size_log2: u32, label: Arc<str>) -> Self {497// Make sure the value is word-aligned.498debug_assert_eq!(size_of::<T>() % 4, 0);499let element_word_size = size_of::<T>() / 4;500501// Create a unique ID.502let id = Arc::new(SparseBufferId(503NEXT_SPARSE_BUFFER_ID.fetch_add(1, Ordering::Relaxed),504));505506Self {507handle: id,508values: vec![],509data_buffer: None,510staging_buffers: SparseBufferStagingBuffers::new(511&label,512element_word_size as u32,513page_size_log2,514),515metadata_uniform: UniformBuffer::from(GpuSparseBufferUpdateMetadata::new::<T>(516page_size_log2,517)),518capacity: 0,519buffer_usages: buffer_usages | BufferUsages::COPY_DST,520label,521dirty_pages: vec![],522needs_full_reupload: false,523sparse_update_scheduled: false,524}525}526527/// Returns the number of elements in the CPU side copy of the buffer.528pub fn len(&self) -> u32 {529self.values.len() as u32530}531532/// Returns true if there are no elements in the CPU side copy of the buffer.533pub fn is_empty(&self) -> bool {534self.values.is_empty()535}536537/// Returns a handle to the buffer, if the data has been uploaded.538pub fn buffer(&self) -> Option<&Buffer> {539self.data_buffer.as_ref()540}541542/// Removes all elements from the buffer.543pub fn clear(&mut self) {544self.truncate(0);545}546547/// Copies a value out of the buffer.548pub fn get(&self, index: u32) -> T {549T::read_from_blob(&self.values[index as usize])550}551552/// Sets the value at the given index.553///554/// If the index isn't in range of the buffer, this method panics.555///556/// Internally, the value is converted to its blob representation.557///558/// Note that this method is thread-safe and doesn't require `&mut self`.559/// It's your responsibility, however, to ensure synchronization; though560/// this method is memory-safe, it's possible for other threads to observe561/// partially-overwritten values if [`Self::get`] or similar methods are562/// called while the write operation is occurring.563pub fn set(&self, index: u32, value: T) {564value.write_to_blob(&self.values[index as usize]);565self.note_changed_index(index);566}567568/// Adds a new value and returns its index.569pub fn push(&mut self, value: T) -> u32 {570let index = self.values.len() as u32;571self.values.push(T::Blob::default());572value.write_to_blob(&self.values[index as usize]);573574let page_word = (self.index_to_page(index) / PAGES_PER_DIRTY_WORD) as usize;575while self.dirty_pages.len() < page_word + 1 {576self.dirty_pages.push(AtomicU64::default());577}578self.note_changed_index(index);579580index581}582583/// Marks the page corresponding to the given element index as dirty so that584/// we know that we need to upload it.585fn note_changed_index(&self, index: u32) {586let page = self.index_to_page(index);587let (page_word, page_in_word) = (page / PAGES_PER_DIRTY_WORD, page % PAGES_PER_DIRTY_WORD);588self.dirty_pages[page_word as usize].fetch_or(1 << page_in_word, Ordering::Relaxed);589}590591/// Returns the page corresponding to the given element index.592fn index_to_page(&self, index: u32) -> u32 {593index / self.page_size()594}595596/// Ensures that the backing buffer for this buffer vector is present and597/// appropriately sized on the GPU.598pub fn reserve(&mut self, new_capacity: usize, render_device: &RenderDevice) {599reserve(600new_capacity,601&mut self.capacity,602&self.label,603&mut self.data_buffer,604self.buffer_usages,605&mut self.needs_full_reupload,606size_of::<T::Blob>(),607render_device,608);609}610611/// Grows the buffer by adding default values so that it's at least the612/// given size.613///614/// If the buffer is already large enough, this method does nothing.615pub fn grow(&mut self, new_len: u32) {616let old_len = self.values.len() as u32;617if old_len >= new_len {618return;619}620621self.values.reserve(new_len as usize - old_len as usize);622self.values.resize_with(new_len as usize, T::Blob::default);623624// This is a bit tricky. We want to set the dirty bits corresponding to625// all pages that we added, if any. First, we compute the index of the626// last page word before the append operation.627let old_final_page = self.index_to_page(old_len);628let old_final_page_word_index = old_final_page / PAGES_PER_DIRTY_WORD;629let old_final_page_in_word = old_final_page % PAGES_PER_DIRTY_WORD;630631// Next, we set the bits corresponding to every page that we added to632// that final page word. Note that this might set bits corresponding to633// pages past the end of our buffer; that's OK as we ignore them.634if old_final_page_in_word != 0635&& let Some(ref mut old_final_atomic_page_word) =636self.dirty_pages.get_mut(old_final_page_word_index as usize)637{638*old_final_atomic_page_word.get_mut() |= !((1u64 << old_final_page_in_word) - 1);639}640641// Finally, we add any new page words, with all bits set.642let new_page_count = self.index_to_page(new_len);643self.dirty_pages.resize_with(644(new_page_count as usize).div_ceil(PAGES_PER_DIRTY_WORD as usize),645|| AtomicU64::new(u64::MAX),646);647}648649/// Truncates the buffer to the given length.650///651/// If the buffer is already that length or shorter, this method does652/// nothing.653pub fn truncate(&mut self, len: u32) {654self.values.truncate(len as usize);655656let page = self.index_to_page(len);657self.dirty_pages658.truncate(page.div_ceil(PAGES_PER_DIRTY_WORD) as usize);659}660661/// Writes the data to the GPU, either via a sparse upload or a bulk data662/// upload.663pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {664if self.values.is_empty() {665return;666}667668// Round up the size to a good value to balance reallocation frequency669// against memory waste.670let good_size = calculate_allocation_size(self.values.len());671self.reserve(good_size, render_device);672673if self.should_perform_full_reupload(render_device) {674self.write_entire_buffer(render_queue);675} else {676self.prepare_sparse_upload(render_device, render_queue);677}678}679680/// Returns true if the sparse buffer should perform a full reupload, either681/// because it was resized or because too much data changed for a sparse682/// update to be worthwhile.683fn should_perform_full_reupload(&self, render_device: &RenderDevice) -> bool {684if self.needs_full_reupload {685return true;686}687688if render_device.limits().max_storage_buffers_per_shader_stage < 3 {689return true;690}691692// Calculate the number of changed pages via population count.693let changed_page_count: u32 = self694.dirty_pages695.iter()696.map(|atomic_page_word| atomic_page_word.load(Ordering::Relaxed).count_ones())697.sum();698699self.staging_buffers700.should_perform_full_reupload(changed_page_count, self.values.len())701}702703/// Writes the entire buffer in bulk.704///705/// This is the method used when a sparse update is not used, either because706/// the buffer resized or because too much data changed for a sparse update707/// to be worthwhile.708fn write_entire_buffer(&mut self, render_queue: &RenderQueue) {709let Some(ref mut data_buffer) = self.data_buffer else {710error!("Dirty sparse buffer should have created a data buffer by now");711return;712};713714// SAFETY: We're just writing atomic data to the GPU. The worst that715// can happen is that we race with somebody, which is unfortunate716// but not memory-unsafe.717unsafe {718render_queue.write_buffer(719data_buffer,7200,721slice::from_raw_parts(722self.values.as_ptr().cast::<u8>(),723self.values.len() * size_of::<T::Blob>(),724),725);726}727728// Mark all pages as clean.729for atomic_page_word in self.dirty_pages.iter() {730atomic_page_word.store(0, Ordering::Relaxed);731}732self.sparse_update_scheduled = false;733}734735/// Schedules a sparse upload of only the pages that changed.736fn prepare_sparse_upload(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) {737// Iterate over all dirty pages.738for (page_word_index, atomic_page_word) in self.dirty_pages.iter().enumerate() {739let page_word = atomic_page_word.load(Ordering::Relaxed);740for page_index_in_word in BitIter::new(page_word) {741let page = page_word_index as u32 * PAGES_PER_DIRTY_WORD + page_index_in_word;742743// Write the index of the page so the shader will know where to744// scatter the data to.745self.staging_buffers.indices.push(page);746747// Copy the page to the GPU staging buffer.748let page_size = self.staging_buffers.page_size();749let page_start = page as usize * page_size;750let page_end = page_start + page_size;751for value_index in page_start..page_end {752match self.values.get(value_index) {753Some(blob) => {754let value = T::read_from_blob(blob);755self.staging_buffers756.source_data757.extend(bytemuck::cast_slice(&[value]).iter().copied());758}759None => {760self.staging_buffers.source_data.extend(iter::repeat_n(7610,762self.staging_buffers.element_word_size as usize,763));764}765}766}767768// Make sure we're aligned up to a full page.769debug_assert_eq!(770self.staging_buffers.source_data.len()771% (self.staging_buffers.element_word_size as usize772* self.staging_buffers.page_size()),7730774);775}776777// Mark the page as clean.778atomic_page_word.store(0, Ordering::Relaxed);779}780781// Schedule a sparse update if there was something to do.782self.sparse_update_scheduled = !self.staging_buffers.source_data.is_empty();783if self.sparse_update_scheduled {784self.staging_buffers.write_buffers(785&mut self.metadata_uniform,786render_device,787render_queue,788);789}790}791792/// If a sparse update has been scheduled, prepares all GPU resources793/// necessary to perform a sparse buffer update, other than updating the794/// metadata uniform.795pub fn prepare_to_populate_buffers(796&mut self,797render_device: &RenderDevice,798pipeline_cache: &PipelineCache,799sparse_buffer_update_jobs: &mut SparseBufferUpdateJobs,800sparse_buffer_update_bind_groups: &mut SparseBufferUpdateBindGroups,801sparse_buffer_update_pipelines: &SparseBufferUpdatePipelines,802) {803if self.sparse_update_scheduled {804match (&self.data_buffer, self.metadata_uniform.buffer()) {805(Some(data_buffer), Some(metadata_buffer)) => {806prepare_to_populate_buffers(807self.handle.clone(),808&self.label,809data_buffer,810&mut self.staging_buffers,811metadata_buffer,812render_device,813pipeline_cache,814sparse_buffer_update_jobs,815sparse_buffer_update_bind_groups,816sparse_buffer_update_pipelines,817);818}819_ => {820error!("Buffers should have been created by now");821}822}823}824825// Clear out the staging buffers, now that we know the data is already826// on the GPU.827self.staging_buffers.source_data.clear();828self.staging_buffers.indices.clear();829830// Reset the `needs_full_reupload` and `needs_sparse_update` flags.831self.needs_full_reupload = false;832self.sparse_update_scheduled = false;833}834}835836impl FromWorld for SparseBufferUpdateBindGroups {837fn from_world(world: &mut World) -> Self {838world.resource_scope::<SpecializedComputePipelines<SparseBufferUpdatePipelines>, _>(839|world, mut specialized_sparse_buffer_update_pipelines| {840let pipeline_cache = world.resource::<PipelineCache>();841let sparse_buffer_update_pipelines =842world.resource::<SparseBufferUpdatePipelines>();843let pipeline_id = specialized_sparse_buffer_update_pipelines.specialize(844pipeline_cache,845sparse_buffer_update_pipelines,846(),847);848849SparseBufferUpdateBindGroups {850bind_groups: WeakKeyHashMap::default(),851pipeline_id,852}853},854)855}856}857858/// Prepares all GPU resources necessary to perform a sparse buffer update,859/// other than updating the metadata uniform.860///861/// This function creates the [`SparseBufferUpdateJob`] and ensures the bind862/// group and pipeline are up to date.863fn prepare_to_populate_buffers(864sparse_buffer_handle: SparseBufferHandle,865label: &Arc<str>,866data_buffer: &Buffer,867staging_buffers: &mut SparseBufferStagingBuffers,868metadata_buffer: &Buffer,869render_device: &RenderDevice,870pipeline_cache: &PipelineCache,871sparse_buffer_update_jobs: &mut SparseBufferUpdateJobs,872sparse_buffer_update_bind_groups: &mut SparseBufferUpdateBindGroups,873sparse_buffer_update_pipelines: &SparseBufferUpdatePipelines,874) {875let (Some(source_data_staging_buffer), Some(indices_staging_buffer)) = (876staging_buffers.source_data.buffer(),877staging_buffers.indices.buffer(),878) else {879error!("Staging buffers should have been created by now");880return;881};882883let Some(bind_group_layout) = &sparse_buffer_update_pipelines.bind_group_layout else {884return;885};886887// Record the update job.888sparse_buffer_update_jobs.push(SparseBufferUpdateJob {889sparse_buffer_handle: sparse_buffer_handle.clone(),890page_size_log2: staging_buffers.page_size_log2,891updated_page_count: staging_buffers.updated_page_count(),892element_word_size: staging_buffers.element_word_size,893label: (*label).clone(),894});895896// Create the bind group.897let bind_group = render_device.create_bind_group(898Some(&*format!("{} bind group", label)),899&pipeline_cache.get_bind_group_layout(bind_group_layout),900&BindGroupEntries::sequential((901// @group(0) @binding(0) var<storage, read_write> dest_buffer: array<u32>;902data_buffer.as_entire_binding(),903// @group(0) @binding(1) var<storage> src_buffer: array<u32>;904source_data_staging_buffer.as_entire_binding(),905// @group(0) @binding(2) var<storage> indices: array<u32>;906indices_staging_buffer.as_entire_binding(),907// @group(0) @binding(3) var<uniform> metadata:908// SparseBufferUpdateMetadata;909metadata_buffer.as_entire_binding(),910)),911);912sparse_buffer_update_bind_groups.bind_groups.insert(913sparse_buffer_handle,914SparseBufferUpdateBindGroup { bind_group },915);916}917918/// Ensures that the backing buffer for an [`AtomicSparseBufferVec`] is present919/// on the GPU.920///921/// The `capacity`, `data_buffer`, and `needs_full_reupload` fields are updated922/// to reflect the new buffer.923fn reserve(924new_capacity: usize,925capacity: &mut usize,926label: &str,927data_buffer: &mut Option<Buffer>,928buffer_usages: BufferUsages,929needs_full_reupload: &mut bool,930element_size: usize,931render_device: &RenderDevice,932) {933// If the buffer is already big enough, do nothing.934if new_capacity == 0 || new_capacity <= *capacity {935return;936}937938*capacity = new_capacity;939*data_buffer = Some(render_device.create_buffer(&BufferDescriptor {940label: Some(label),941size: element_size as u64 * new_capacity as u64,942usage: buffer_usages,943mapped_at_creation: false,944}));945946// Since we resized the buffer, we need to reupload it.947*needs_full_reupload = true;948}949950impl GpuSparseBufferUpdateMetadata {951/// Returns a new [`GpuSparseBufferUpdateMetadata`] for the given type and952/// page size.953fn new<T>(page_size_log2: u32) -> GpuSparseBufferUpdateMetadata {954assert_eq!(size_of::<T>() % 4, 0);955GpuSparseBufferUpdateMetadata {956element_size: (size_of::<T>() / 4) as u32,957updated_page_count: 0,958page_size_log2,959}960}961}962963/// Iterates over the bits in a single `u64`, from the least significant bit to964/// the most significant bit.965struct BitIter(u64);966967impl BitIter {968fn new(bits: u64) -> BitIter {969BitIter(bits)970}971}972973impl Iterator for BitIter {974type Item = u32;975976fn next(&mut self) -> Option<Self::Item> {977let trailing_zeros = self.0.trailing_zeros();978if trailing_zeros == 64 {979return None;980}981self.0 &= !(1 << trailing_zeros);982Some(trailing_zeros)983}984}985986/// Calculates the size that a buffer should be in order to balance reallocation987/// frequency against memory waste.988fn calculate_allocation_size(length: usize) -> usize {989let exponent = (length as f64).log(REALLOCATION_FACTOR).ceil();990let size = REALLOCATION_FACTOR.powf(exponent) as usize;991size.next_multiple_of(REALLOCATION_SIZE_MULTIPLE)992}993994995