Path: blob/main/crates/bevy_render/src/diagnostic/internal.rs
6596 views
use alloc::{borrow::Cow, sync::Arc};1use core::{2ops::{DerefMut, Range},3sync::atomic::{AtomicBool, Ordering},4};5use std::thread::{self, ThreadId};67use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};8use bevy_ecs::resource::Resource;9use bevy_ecs::system::{Res, ResMut};10use bevy_platform::time::Instant;11use std::sync::Mutex;12use wgpu::{13Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePass, Features, MapMode,14PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType, RenderPass,15};1617use crate::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};1819use super::RecordDiagnostics;2021// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8)22const MAX_TIMESTAMP_QUERIES: u32 = 256;23const MAX_PIPELINE_STATISTICS: u32 = 128;2425const TIMESTAMP_SIZE: u64 = 8;26const PIPELINE_STATISTICS_SIZE: u64 = 40;2728struct DiagnosticsRecorderInternal {29timestamp_period_ns: f32,30features: Features,31current_frame: Mutex<FrameData>,32submitted_frames: Vec<FrameData>,33finished_frames: Vec<FrameData>,34#[cfg(feature = "tracing-tracy")]35tracy_gpu_context: tracy_client::GpuContext,36}3738/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between39/// spans and indices to the corresponding entries in the [`QuerySet`].40#[derive(Resource)]41pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);4243impl DiagnosticsRecorder {44/// Creates the new `DiagnosticsRecorder`.45pub fn new(46adapter_info: &RenderAdapterInfo,47device: &RenderDevice,48queue: &RenderQueue,49) -> DiagnosticsRecorder {50let features = device.features();5152#[cfg(feature = "tracing-tracy")]53let tracy_gpu_context =54super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);55let _ = adapter_info; // Prevent unused variable warnings when tracing-tracy is not enabled5657DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {58timestamp_period_ns: queue.get_timestamp_period(),59features,60current_frame: Mutex::new(FrameData::new(61device,62features,63#[cfg(feature = "tracing-tracy")]64tracy_gpu_context.clone(),65)),66submitted_frames: Vec::new(),67finished_frames: Vec::new(),68#[cfg(feature = "tracing-tracy")]69tracy_gpu_context,70}))71}7273fn current_frame_mut(&mut self) -> &mut FrameData {74self.0.current_frame.get_mut().expect("lock poisoned")75}7677fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {78self.0.current_frame.lock().expect("lock poisoned")79}8081/// Begins recording diagnostics for a new frame.82pub fn begin_frame(&mut self) {83let internal = &mut self.0;84let mut idx = 0;85while idx < internal.submitted_frames.len() {86let timestamp = internal.timestamp_period_ns;87if internal.submitted_frames[idx].run_mapped_callback(timestamp) {88let removed = internal.submitted_frames.swap_remove(idx);89internal.finished_frames.push(removed);90} else {91idx += 1;92}93}9495self.current_frame_mut().begin();96}9798/// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU.99///100/// Should be called before [`DiagnosticsRecorder::finish_frame`].101pub fn resolve(&mut self, encoder: &mut CommandEncoder) {102self.current_frame_mut().resolve(encoder);103}104105/// Finishes recording diagnostics for the current frame.106///107/// The specified `callback` will be invoked when diagnostics become available.108///109/// Should be called after [`DiagnosticsRecorder::resolve`],110/// and **after** all commands buffers have been queued.111pub fn finish_frame(112&mut self,113device: &RenderDevice,114callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,115) {116#[cfg(feature = "tracing-tracy")]117let tracy_gpu_context = self.0.tracy_gpu_context.clone();118119let internal = &mut self.0;120internal121.current_frame122.get_mut()123.expect("lock poisoned")124.finish(callback);125126// reuse one of the finished frames, if we can127let new_frame = match internal.finished_frames.pop() {128Some(frame) => frame,129None => FrameData::new(130device,131internal.features,132#[cfg(feature = "tracing-tracy")]133tracy_gpu_context,134),135};136137let old_frame = core::mem::replace(138internal.current_frame.get_mut().expect("lock poisoned"),139new_frame,140);141internal.submitted_frames.push(old_frame);142}143}144145impl RecordDiagnostics for DiagnosticsRecorder {146fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {147self.current_frame_lock()148.begin_time_span(encoder, span_name);149}150151fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {152self.current_frame_lock().end_time_span(encoder);153}154155fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {156self.current_frame_lock().begin_pass(pass, span_name);157}158159fn end_pass_span<P: Pass>(&self, pass: &mut P) {160self.current_frame_lock().end_pass(pass);161}162}163164struct SpanRecord {165thread_id: ThreadId,166path_range: Range<usize>,167pass_kind: Option<PassKind>,168begin_timestamp_index: Option<u32>,169end_timestamp_index: Option<u32>,170begin_instant: Option<Instant>,171end_instant: Option<Instant>,172pipeline_statistics_index: Option<u32>,173}174175struct FrameData {176timestamps_query_set: Option<QuerySet>,177num_timestamps: u32,178supports_timestamps_inside_passes: bool,179supports_timestamps_inside_encoders: bool,180pipeline_statistics_query_set: Option<QuerySet>,181num_pipeline_statistics: u32,182buffer_size: u64,183pipeline_statistics_buffer_offset: u64,184resolve_buffer: Option<Buffer>,185read_buffer: Option<Buffer>,186path_components: Vec<Cow<'static, str>>,187open_spans: Vec<SpanRecord>,188closed_spans: Vec<SpanRecord>,189is_mapped: Arc<AtomicBool>,190callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,191#[cfg(feature = "tracing-tracy")]192tracy_gpu_context: tracy_client::GpuContext,193}194195impl FrameData {196fn new(197device: &RenderDevice,198features: Features,199#[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext,200) -> FrameData {201let wgpu_device = device.wgpu_device();202let mut buffer_size = 0;203204let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {205buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;206Some(wgpu_device.create_query_set(&QuerySetDescriptor {207label: Some("timestamps_query_set"),208ty: QueryType::Timestamp,209count: MAX_TIMESTAMP_QUERIES,210}))211} else {212None213};214215let pipeline_statistics_buffer_offset = buffer_size;216217let pipeline_statistics_query_set =218if features.contains(Features::PIPELINE_STATISTICS_QUERY) {219buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;220Some(wgpu_device.create_query_set(&QuerySetDescriptor {221label: Some("pipeline_statistics_query_set"),222ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),223count: MAX_PIPELINE_STATISTICS,224}))225} else {226None227};228229let (resolve_buffer, read_buffer) = if buffer_size > 0 {230let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {231label: Some("render_statistics_resolve_buffer"),232size: buffer_size,233usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,234mapped_at_creation: false,235});236let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {237label: Some("render_statistics_read_buffer"),238size: buffer_size,239usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,240mapped_at_creation: false,241});242(Some(resolve_buffer), Some(read_buffer))243} else {244(None, None)245};246247FrameData {248timestamps_query_set,249num_timestamps: 0,250supports_timestamps_inside_passes: features251.contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),252supports_timestamps_inside_encoders: features253.contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),254pipeline_statistics_query_set,255num_pipeline_statistics: 0,256buffer_size,257pipeline_statistics_buffer_offset,258resolve_buffer,259read_buffer,260path_components: Vec::new(),261open_spans: Vec::new(),262closed_spans: Vec::new(),263is_mapped: Arc::new(AtomicBool::new(false)),264callback: None,265#[cfg(feature = "tracing-tracy")]266tracy_gpu_context,267}268}269270fn begin(&mut self) {271self.num_timestamps = 0;272self.num_pipeline_statistics = 0;273self.path_components.clear();274self.open_spans.clear();275self.closed_spans.clear();276}277278fn write_timestamp(279&mut self,280encoder: &mut impl WriteTimestamp,281is_inside_pass: bool,282) -> Option<u32> {283// `encoder.write_timestamp` is unsupported on WebGPU.284if !self.supports_timestamps_inside_encoders {285return None;286}287288if is_inside_pass && !self.supports_timestamps_inside_passes {289return None;290}291292if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {293return None;294}295296let set = self.timestamps_query_set.as_ref()?;297let index = self.num_timestamps;298encoder.write_timestamp(set, index);299self.num_timestamps += 1;300Some(index)301}302303fn write_pipeline_statistics(304&mut self,305encoder: &mut impl WritePipelineStatistics,306) -> Option<u32> {307if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {308return None;309}310311let set = self.pipeline_statistics_query_set.as_ref()?;312let index = self.num_pipeline_statistics;313encoder.begin_pipeline_statistics_query(set, index);314self.num_pipeline_statistics += 1;315Some(index)316}317318fn open_span(319&mut self,320pass_kind: Option<PassKind>,321name: Cow<'static, str>,322) -> &mut SpanRecord {323let thread_id = thread::current().id();324325let parent = self326.open_spans327.iter()328.filter(|v| v.thread_id == thread_id)329.next_back();330331let path_range = match &parent {332Some(parent) if parent.path_range.end == self.path_components.len() => {333parent.path_range.start..parent.path_range.end + 1334}335Some(parent) => {336self.path_components337.extend_from_within(parent.path_range.clone());338self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1339}340None => self.path_components.len()..self.path_components.len() + 1,341};342343self.path_components.push(name);344345self.open_spans.push(SpanRecord {346thread_id,347path_range,348pass_kind,349begin_timestamp_index: None,350end_timestamp_index: None,351begin_instant: None,352end_instant: None,353pipeline_statistics_index: None,354});355356self.open_spans.last_mut().unwrap()357}358359fn close_span(&mut self) -> &mut SpanRecord {360let thread_id = thread::current().id();361362let iter = self.open_spans.iter();363let (index, _) = iter364.enumerate()365.filter(|(_, v)| v.thread_id == thread_id)366.next_back()367.unwrap();368369let span = self.open_spans.swap_remove(index);370self.closed_spans.push(span);371self.closed_spans.last_mut().unwrap()372}373374fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {375let begin_instant = Instant::now();376let begin_timestamp_index = self.write_timestamp(encoder, false);377378let span = self.open_span(None, name);379span.begin_instant = Some(begin_instant);380span.begin_timestamp_index = begin_timestamp_index;381}382383fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {384let end_timestamp_index = self.write_timestamp(encoder, false);385386let span = self.close_span();387span.end_timestamp_index = end_timestamp_index;388span.end_instant = Some(Instant::now());389}390391fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {392let begin_instant = Instant::now();393394let begin_timestamp_index = self.write_timestamp(pass, true);395let pipeline_statistics_index = self.write_pipeline_statistics(pass);396397let span = self.open_span(Some(P::KIND), name);398span.begin_instant = Some(begin_instant);399span.begin_timestamp_index = begin_timestamp_index;400span.pipeline_statistics_index = pipeline_statistics_index;401}402403fn end_pass(&mut self, pass: &mut impl Pass) {404let end_timestamp_index = self.write_timestamp(pass, true);405406let span = self.close_span();407span.end_timestamp_index = end_timestamp_index;408409if span.pipeline_statistics_index.is_some() {410pass.end_pipeline_statistics_query();411}412413span.end_instant = Some(Instant::now());414}415416fn resolve(&mut self, encoder: &mut CommandEncoder) {417let Some(resolve_buffer) = &self.resolve_buffer else {418return;419};420421match &self.timestamps_query_set {422Some(set) if self.num_timestamps > 0 => {423encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);424}425_ => {}426}427428match &self.pipeline_statistics_query_set {429Some(set) if self.num_pipeline_statistics > 0 => {430encoder.resolve_query_set(431set,4320..self.num_pipeline_statistics,433resolve_buffer,434self.pipeline_statistics_buffer_offset,435);436}437_ => {}438}439440let Some(read_buffer) = &self.read_buffer else {441return;442};443444encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);445}446447fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {448DiagnosticPath::from_components(449core::iter::once("render")450.chain(self.path_components[range.clone()].iter().map(|v| &**v))451.chain(core::iter::once(field)),452)453}454455fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {456let Some(read_buffer) = &self.read_buffer else {457// we still have cpu timings, so let's use them458459let mut diagnostics = Vec::new();460461for span in &self.closed_spans {462if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {463diagnostics.push(RenderDiagnostic {464path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),465suffix: "ms",466value: (end - begin).as_secs_f64() * 1000.0,467});468}469}470471callback(RenderDiagnostics(diagnostics));472return;473};474475self.callback = Some(Box::new(callback));476477let is_mapped = self.is_mapped.clone();478read_buffer.slice(..).map_async(MapMode::Read, move |res| {479if let Err(e) = res {480tracing::warn!("Failed to download render statistics buffer: {e}");481return;482}483484is_mapped.store(true, Ordering::Release);485});486}487488// returns true if the frame is considered finished, false otherwise489fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {490let Some(read_buffer) = &self.read_buffer else {491return true;492};493if !self.is_mapped.load(Ordering::Acquire) {494// need to wait more495return false;496}497let Some(callback) = self.callback.take() else {498return true;499};500501let data = read_buffer.slice(..).get_mapped_range();502503let timestamps = data[..(self.num_timestamps * 8) as usize]504.chunks(8)505.map(|v| u64::from_le_bytes(v.try_into().unwrap()))506.collect::<Vec<u64>>();507508let start = self.pipeline_statistics_buffer_offset as usize;509let len = (self.num_pipeline_statistics as usize) * 40;510let pipeline_statistics = data[start..start + len]511.chunks(8)512.map(|v| u64::from_le_bytes(v.try_into().unwrap()))513.collect::<Vec<u64>>();514515let mut diagnostics = Vec::new();516517for span in &self.closed_spans {518if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {519diagnostics.push(RenderDiagnostic {520path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),521suffix: "ms",522value: (end - begin).as_secs_f64() * 1000.0,523});524}525526if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)527{528let begin = timestamps[begin as usize] as f64;529let end = timestamps[end as usize] as f64;530let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;531532#[cfg(feature = "tracing-tracy")]533{534// Calling span_alloc() and end_zone() here instead of in open_span() and close_span() means that tracy does not know where each GPU command was recorded on the CPU timeline.535// Unfortunately we must do it this way, because tracy does not play nicely with multithreaded command recording. The start/end pairs would get all mixed up.536// The GPU spans themselves are still accurate though, and it's probably safe to assume that each GPU span in frame N belongs to the corresponding CPU render node span from frame N-1.537let name = &self.path_components[span.path_range.clone()].join("/");538let mut tracy_gpu_span =539self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();540tracy_gpu_span.end_zone();541tracy_gpu_span.upload_timestamp_start(begin as i64);542tracy_gpu_span.upload_timestamp_end(end as i64);543}544545diagnostics.push(RenderDiagnostic {546path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),547suffix: "ms",548value,549});550}551552if let Some(index) = span.pipeline_statistics_index {553let index = (index as usize) * 5;554555if span.pass_kind == Some(PassKind::Render) {556diagnostics.push(RenderDiagnostic {557path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),558suffix: "",559value: pipeline_statistics[index] as f64,560});561562diagnostics.push(RenderDiagnostic {563path: self.diagnostic_path(&span.path_range, "clipper_invocations"),564suffix: "",565value: pipeline_statistics[index + 1] as f64,566});567568diagnostics.push(RenderDiagnostic {569path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),570suffix: "",571value: pipeline_statistics[index + 2] as f64,572});573574diagnostics.push(RenderDiagnostic {575path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),576suffix: "",577value: pipeline_statistics[index + 3] as f64,578});579}580581if span.pass_kind == Some(PassKind::Compute) {582diagnostics.push(RenderDiagnostic {583path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),584suffix: "",585value: pipeline_statistics[index + 4] as f64,586});587}588}589}590591callback(RenderDiagnostics(diagnostics));592593drop(data);594read_buffer.unmap();595self.is_mapped.store(false, Ordering::Release);596597true598}599}600601/// Resource which stores render diagnostics of the most recent frame.602#[derive(Debug, Default, Clone, Resource)]603pub struct RenderDiagnostics(Vec<RenderDiagnostic>);604605/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`].606#[derive(Debug, Clone, Resource)]607pub struct RenderDiagnostic {608pub path: DiagnosticPath,609pub suffix: &'static str,610pub value: f64,611}612613/// Stores render diagnostics before they can be synced with the main app.614///615/// This mutex is locked twice per frame:616/// 1. in `PreUpdate`, during [`sync_diagnostics`],617/// 2. after rendering has finished and statistics have been downloaded from GPU.618#[derive(Debug, Default, Clone, Resource)]619pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);620621/// Updates render diagnostics measurements.622pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {623let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {624return;625};626627let time = Instant::now();628629for diagnostic in &diagnostics.0 {630if store.get(&diagnostic.path).is_none() {631store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));632}633634store635.get_mut(&diagnostic.path)636.unwrap()637.add_measurement(DiagnosticMeasurement {638time,639value: diagnostic.value,640});641}642}643644pub trait WriteTimestamp {645fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);646}647648impl WriteTimestamp for CommandEncoder {649fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {650CommandEncoder::write_timestamp(self, query_set, index);651}652}653654impl WriteTimestamp for RenderPass<'_> {655fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {656RenderPass::write_timestamp(self, query_set, index);657}658}659660impl WriteTimestamp for ComputePass<'_> {661fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {662ComputePass::write_timestamp(self, query_set, index);663}664}665666pub trait WritePipelineStatistics {667fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);668669fn end_pipeline_statistics_query(&mut self);670}671672impl WritePipelineStatistics for RenderPass<'_> {673fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {674RenderPass::begin_pipeline_statistics_query(self, query_set, index);675}676677fn end_pipeline_statistics_query(&mut self) {678RenderPass::end_pipeline_statistics_query(self);679}680}681682impl WritePipelineStatistics for ComputePass<'_> {683fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {684ComputePass::begin_pipeline_statistics_query(self, query_set, index);685}686687fn end_pipeline_statistics_query(&mut self) {688ComputePass::end_pipeline_statistics_query(self);689}690}691692pub trait Pass: WritePipelineStatistics + WriteTimestamp {693const KIND: PassKind;694}695696impl Pass for RenderPass<'_> {697const KIND: PassKind = PassKind::Render;698}699700impl Pass for ComputePass<'_> {701const KIND: PassKind = PassKind::Compute;702}703704#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]705pub enum PassKind {706Render,707Compute,708}709710711