Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bevyengine
GitHub Repository: bevyengine/bevy
Path: blob/main/crates/bevy_render/src/diagnostic/internal.rs
9396 views
1
use alloc::{borrow::Cow, sync::Arc};
2
use core::{
3
ops::{DerefMut, Range},
4
sync::atomic::{AtomicBool, Ordering},
5
};
6
use std::thread::{self, ThreadId};
7
8
use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};
9
use bevy_ecs::resource::Resource;
10
use bevy_ecs::system::{Res, ResMut};
11
use bevy_platform::time::Instant;
12
use std::sync::Mutex;
13
use wgpu::{
14
Buffer, BufferDescriptor, BufferSize, BufferSlice, BufferUsages, CommandEncoder, ComputePass,
15
Device, Features, MapMode, PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType,
16
RenderPass,
17
};
18
19
use crate::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};
20
21
use super::RecordDiagnostics;
22
23
// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8)
24
const MAX_TIMESTAMP_QUERIES: u32 = 256;
25
const MAX_PIPELINE_STATISTICS: u32 = 128;
26
27
const TIMESTAMP_SIZE: u64 = 8;
28
const PIPELINE_STATISTICS_SIZE: u64 = 40;
29
30
struct DiagnosticsRecorderInternal {
31
timestamp_period_ns: f32,
32
features: Features,
33
current_frame: Mutex<FrameData>,
34
submitted_frames: Vec<FrameData>,
35
finished_frames: Vec<FrameData>,
36
#[cfg(feature = "tracing-tracy")]
37
tracy_gpu_context: tracy_client::GpuContext,
38
}
39
40
/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between
41
/// spans and indices to the corresponding entries in the [`QuerySet`].
42
#[derive(Resource)]
43
pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);
44
45
impl DiagnosticsRecorder {
46
/// Creates the new `DiagnosticsRecorder`.
47
pub fn new(
48
adapter_info: &RenderAdapterInfo,
49
device: &RenderDevice,
50
queue: &RenderQueue,
51
) -> DiagnosticsRecorder {
52
let features = device.features();
53
54
#[cfg(feature = "tracing-tracy")]
55
let tracy_gpu_context =
56
super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);
57
let _ = adapter_info; // Prevent unused variable warnings when tracing-tracy is not enabled
58
59
DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {
60
timestamp_period_ns: queue.get_timestamp_period(),
61
features,
62
current_frame: Mutex::new(FrameData::new(
63
device,
64
features,
65
#[cfg(feature = "tracing-tracy")]
66
tracy_gpu_context.clone(),
67
)),
68
submitted_frames: Vec::new(),
69
finished_frames: Vec::new(),
70
#[cfg(feature = "tracing-tracy")]
71
tracy_gpu_context,
72
}))
73
}
74
75
fn current_frame_mut(&mut self) -> &mut FrameData {
76
self.0.current_frame.get_mut().expect("lock poisoned")
77
}
78
79
fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {
80
self.0.current_frame.lock().expect("lock poisoned")
81
}
82
83
/// Begins recording diagnostics for a new frame.
84
pub fn begin_frame(&mut self) {
85
let internal = &mut self.0;
86
let mut idx = 0;
87
while idx < internal.submitted_frames.len() {
88
let timestamp = internal.timestamp_period_ns;
89
if internal.submitted_frames[idx].run_mapped_callback(timestamp) {
90
let removed = internal.submitted_frames.swap_remove(idx);
91
internal.finished_frames.push(removed);
92
} else {
93
idx += 1;
94
}
95
}
96
97
self.current_frame_mut().begin();
98
}
99
100
/// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU.
101
///
102
/// Should be called before [`DiagnosticsRecorder::finish_frame`].
103
pub fn resolve(&mut self, encoder: &mut CommandEncoder) {
104
self.current_frame_mut().resolve(encoder);
105
}
106
107
/// Finishes recording diagnostics for the current frame.
108
///
109
/// The specified `callback` will be invoked when diagnostics become available.
110
///
111
/// Should be called after [`DiagnosticsRecorder::resolve`],
112
/// and **after** all commands buffers have been queued.
113
pub fn finish_frame(
114
&mut self,
115
device: &RenderDevice,
116
callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,
117
) {
118
#[cfg(feature = "tracing-tracy")]
119
let tracy_gpu_context = self.0.tracy_gpu_context.clone();
120
121
let internal = &mut self.0;
122
internal
123
.current_frame
124
.get_mut()
125
.expect("lock poisoned")
126
.finish(callback);
127
128
// reuse one of the finished frames, if we can
129
let new_frame = match internal.finished_frames.pop() {
130
Some(frame) => frame,
131
None => FrameData::new(
132
device,
133
internal.features,
134
#[cfg(feature = "tracing-tracy")]
135
tracy_gpu_context,
136
),
137
};
138
139
let old_frame = core::mem::replace(
140
internal.current_frame.get_mut().expect("lock poisoned"),
141
new_frame,
142
);
143
internal.submitted_frames.push(old_frame);
144
}
145
}
146
147
impl RecordDiagnostics for DiagnosticsRecorder {
148
fn record_f32<N>(&self, command_encoder: &mut CommandEncoder, buffer: &BufferSlice, name: N)
149
where
150
N: Into<Cow<'static, str>>,
151
{
152
assert_eq!(
153
buffer.size(),
154
BufferSize::new(4).unwrap(),
155
"DiagnosticsRecorder::record_f32 buffer slice must be 4 bytes long"
156
);
157
assert!(
158
buffer.buffer().usage().contains(BufferUsages::COPY_SRC),
159
"DiagnosticsRecorder::record_f32 buffer must have BufferUsages::COPY_SRC"
160
);
161
162
self.current_frame_lock()
163
.record_value(command_encoder, buffer, name.into(), true);
164
}
165
166
fn record_u32<N>(&self, command_encoder: &mut CommandEncoder, buffer: &BufferSlice, name: N)
167
where
168
N: Into<Cow<'static, str>>,
169
{
170
assert_eq!(
171
buffer.size(),
172
BufferSize::new(4).unwrap(),
173
"DiagnosticsRecorder::record_u32 buffer slice must be 4 bytes long"
174
);
175
assert!(
176
buffer.buffer().usage().contains(BufferUsages::COPY_SRC),
177
"DiagnosticsRecorder::record_u32 buffer must have BufferUsages::COPY_SRC"
178
);
179
180
self.current_frame_lock()
181
.record_value(command_encoder, buffer, name.into(), false);
182
}
183
184
fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {
185
self.current_frame_lock()
186
.begin_time_span(encoder, span_name);
187
}
188
189
fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
190
self.current_frame_lock().end_time_span(encoder);
191
}
192
193
fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {
194
self.current_frame_lock().begin_pass(pass, span_name);
195
}
196
197
fn end_pass_span<P: Pass>(&self, pass: &mut P) {
198
self.current_frame_lock().end_pass(pass);
199
}
200
}
201
202
struct SpanRecord {
203
thread_id: ThreadId,
204
path_range: Range<usize>,
205
pass_kind: Option<PassKind>,
206
begin_timestamp_index: Option<u32>,
207
end_timestamp_index: Option<u32>,
208
begin_instant: Option<Instant>,
209
end_instant: Option<Instant>,
210
pipeline_statistics_index: Option<u32>,
211
}
212
213
struct FrameData {
214
device: Device,
215
timestamps_query_set: Option<QuerySet>,
216
num_timestamps: u32,
217
supports_timestamps_inside_passes: bool,
218
supports_timestamps_inside_encoders: bool,
219
pipeline_statistics_query_set: Option<QuerySet>,
220
num_pipeline_statistics: u32,
221
buffer_size: u64,
222
pipeline_statistics_buffer_offset: u64,
223
resolve_buffer: Option<Buffer>,
224
read_buffer: Option<Buffer>,
225
path_components: Vec<Cow<'static, str>>,
226
open_spans: Vec<SpanRecord>,
227
closed_spans: Vec<SpanRecord>,
228
value_buffers: Vec<(Buffer, Cow<'static, str>, bool)>,
229
is_mapped: Arc<AtomicBool>,
230
callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,
231
#[cfg(feature = "tracing-tracy")]
232
tracy_gpu_context: tracy_client::GpuContext,
233
}
234
235
impl FrameData {
236
fn new(
237
device: &RenderDevice,
238
features: Features,
239
#[cfg(feature = "tracing-tracy")] tracy_gpu_context: tracy_client::GpuContext,
240
) -> FrameData {
241
let wgpu_device = device.wgpu_device();
242
let mut buffer_size = 0;
243
244
let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {
245
buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;
246
Some(wgpu_device.create_query_set(&QuerySetDescriptor {
247
label: Some("timestamps_query_set"),
248
ty: QueryType::Timestamp,
249
count: MAX_TIMESTAMP_QUERIES,
250
}))
251
} else {
252
None
253
};
254
255
let pipeline_statistics_buffer_offset = buffer_size;
256
257
let pipeline_statistics_query_set =
258
if features.contains(Features::PIPELINE_STATISTICS_QUERY) {
259
buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;
260
Some(wgpu_device.create_query_set(&QuerySetDescriptor {
261
label: Some("pipeline_statistics_query_set"),
262
ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),
263
count: MAX_PIPELINE_STATISTICS,
264
}))
265
} else {
266
None
267
};
268
269
let (resolve_buffer, read_buffer) = if buffer_size > 0 {
270
let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {
271
label: Some("render_statistics_resolve_buffer"),
272
size: buffer_size,
273
usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
274
mapped_at_creation: false,
275
});
276
let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {
277
label: Some("render_statistics_read_buffer"),
278
size: buffer_size,
279
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
280
mapped_at_creation: false,
281
});
282
(Some(resolve_buffer), Some(read_buffer))
283
} else {
284
(None, None)
285
};
286
287
FrameData {
288
device: wgpu_device.clone(),
289
timestamps_query_set,
290
num_timestamps: 0,
291
supports_timestamps_inside_passes: features
292
.contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),
293
supports_timestamps_inside_encoders: features
294
.contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),
295
pipeline_statistics_query_set,
296
num_pipeline_statistics: 0,
297
buffer_size,
298
pipeline_statistics_buffer_offset,
299
resolve_buffer,
300
read_buffer,
301
path_components: Vec::new(),
302
open_spans: Vec::new(),
303
closed_spans: Vec::new(),
304
value_buffers: Vec::new(),
305
is_mapped: Arc::new(AtomicBool::new(false)),
306
callback: None,
307
#[cfg(feature = "tracing-tracy")]
308
tracy_gpu_context,
309
}
310
}
311
312
fn begin(&mut self) {
313
self.num_timestamps = 0;
314
self.num_pipeline_statistics = 0;
315
self.path_components.clear();
316
self.open_spans.clear();
317
self.closed_spans.clear();
318
}
319
320
fn write_timestamp(
321
&mut self,
322
encoder: &mut impl WriteTimestamp,
323
is_inside_pass: bool,
324
) -> Option<u32> {
325
// `encoder.write_timestamp` is unsupported on WebGPU.
326
if !self.supports_timestamps_inside_encoders {
327
return None;
328
}
329
330
if is_inside_pass && !self.supports_timestamps_inside_passes {
331
return None;
332
}
333
334
if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {
335
return None;
336
}
337
338
let set = self.timestamps_query_set.as_ref()?;
339
let index = self.num_timestamps;
340
encoder.write_timestamp(set, index);
341
self.num_timestamps += 1;
342
Some(index)
343
}
344
345
fn write_pipeline_statistics(
346
&mut self,
347
encoder: &mut impl WritePipelineStatistics,
348
) -> Option<u32> {
349
if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {
350
return None;
351
}
352
353
let set = self.pipeline_statistics_query_set.as_ref()?;
354
let index = self.num_pipeline_statistics;
355
encoder.begin_pipeline_statistics_query(set, index);
356
self.num_pipeline_statistics += 1;
357
Some(index)
358
}
359
360
fn open_span(
361
&mut self,
362
pass_kind: Option<PassKind>,
363
name: Cow<'static, str>,
364
) -> &mut SpanRecord {
365
let thread_id = thread::current().id();
366
367
let parent = self.open_spans.iter().rfind(|v| v.thread_id == thread_id);
368
369
let path_range = match &parent {
370
Some(parent) if parent.path_range.end == self.path_components.len() => {
371
parent.path_range.start..parent.path_range.end + 1
372
}
373
Some(parent) => {
374
self.path_components
375
.extend_from_within(parent.path_range.clone());
376
self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1
377
}
378
None => self.path_components.len()..self.path_components.len() + 1,
379
};
380
381
self.path_components.push(name);
382
383
self.open_spans.push(SpanRecord {
384
thread_id,
385
path_range,
386
pass_kind,
387
begin_timestamp_index: None,
388
end_timestamp_index: None,
389
begin_instant: None,
390
end_instant: None,
391
pipeline_statistics_index: None,
392
});
393
394
self.open_spans.last_mut().unwrap()
395
}
396
397
fn close_span(&mut self) -> &mut SpanRecord {
398
let thread_id = thread::current().id();
399
400
let iter = self.open_spans.iter();
401
let (index, _) = iter
402
.enumerate()
403
.rfind(|(_, v)| v.thread_id == thread_id)
404
.unwrap();
405
406
let span = self.open_spans.swap_remove(index);
407
self.closed_spans.push(span);
408
self.closed_spans.last_mut().unwrap()
409
}
410
411
fn record_value(
412
&mut self,
413
command_encoder: &mut CommandEncoder,
414
buffer: &BufferSlice,
415
name: Cow<'static, str>,
416
is_f32: bool,
417
) {
418
let dest_buffer = self.device.create_buffer(&BufferDescriptor {
419
label: Some(&format!("render_diagnostic_{name}")),
420
size: 4,
421
usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
422
mapped_at_creation: false,
423
});
424
425
command_encoder.copy_buffer_to_buffer(
426
buffer.buffer(),
427
buffer.offset(),
428
&dest_buffer,
429
0,
430
Some(buffer.size().into()),
431
);
432
433
command_encoder.map_buffer_on_submit(&dest_buffer, MapMode::Read, .., |_| {});
434
435
self.value_buffers.push((dest_buffer, name, is_f32));
436
}
437
438
fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {
439
let begin_instant = Instant::now();
440
let begin_timestamp_index = self.write_timestamp(encoder, false);
441
442
let span = self.open_span(None, name);
443
span.begin_instant = Some(begin_instant);
444
span.begin_timestamp_index = begin_timestamp_index;
445
}
446
447
fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {
448
let end_timestamp_index = self.write_timestamp(encoder, false);
449
450
let span = self.close_span();
451
span.end_timestamp_index = end_timestamp_index;
452
span.end_instant = Some(Instant::now());
453
}
454
455
fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {
456
let begin_instant = Instant::now();
457
458
let begin_timestamp_index = self.write_timestamp(pass, true);
459
let pipeline_statistics_index = self.write_pipeline_statistics(pass);
460
461
let span = self.open_span(Some(P::KIND), name);
462
span.begin_instant = Some(begin_instant);
463
span.begin_timestamp_index = begin_timestamp_index;
464
span.pipeline_statistics_index = pipeline_statistics_index;
465
}
466
467
fn end_pass(&mut self, pass: &mut impl Pass) {
468
let end_timestamp_index = self.write_timestamp(pass, true);
469
470
let span = self.close_span();
471
span.end_timestamp_index = end_timestamp_index;
472
473
if span.pipeline_statistics_index.is_some() {
474
pass.end_pipeline_statistics_query();
475
}
476
477
span.end_instant = Some(Instant::now());
478
}
479
480
fn resolve(&mut self, encoder: &mut CommandEncoder) {
481
let Some(resolve_buffer) = &self.resolve_buffer else {
482
return;
483
};
484
485
match &self.timestamps_query_set {
486
Some(set) if self.num_timestamps > 0 => {
487
encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);
488
}
489
_ => {}
490
}
491
492
match &self.pipeline_statistics_query_set {
493
Some(set) if self.num_pipeline_statistics > 0 => {
494
encoder.resolve_query_set(
495
set,
496
0..self.num_pipeline_statistics,
497
resolve_buffer,
498
self.pipeline_statistics_buffer_offset,
499
);
500
}
501
_ => {}
502
}
503
504
let Some(read_buffer) = &self.read_buffer else {
505
return;
506
};
507
508
encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);
509
}
510
511
fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {
512
DiagnosticPath::from_components(
513
core::iter::once("render")
514
.chain(self.path_components[range.clone()].iter().map(|v| &**v))
515
.chain(core::iter::once(field)),
516
)
517
}
518
519
fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {
520
let Some(read_buffer) = &self.read_buffer else {
521
// we still have cpu timings, so let's use them
522
523
let mut diagnostics = Vec::new();
524
525
for span in &self.closed_spans {
526
if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
527
diagnostics.push(RenderDiagnostic {
528
path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
529
suffix: "ms",
530
value: (end - begin).as_secs_f64() * 1000.0,
531
});
532
}
533
}
534
535
for (buffer, diagnostic_path, is_f32) in self.value_buffers.drain(..) {
536
let buffer = buffer.get_mapped_range(..);
537
diagnostics.push(RenderDiagnostic {
538
path: DiagnosticPath::from_components(
539
core::iter::once("render")
540
.chain(core::iter::once(diagnostic_path.as_ref())),
541
),
542
suffix: "",
543
value: if is_f32 {
544
f32::from_le_bytes((*buffer).try_into().unwrap()) as f64
545
} else {
546
u32::from_le_bytes((*buffer).try_into().unwrap()) as f64
547
},
548
});
549
}
550
551
callback(RenderDiagnostics(diagnostics));
552
return;
553
};
554
555
self.callback = Some(Box::new(callback));
556
557
let is_mapped = self.is_mapped.clone();
558
read_buffer.slice(..).map_async(MapMode::Read, move |res| {
559
if let Err(e) = res {
560
bevy_log::warn!("Failed to download render statistics buffer: {e}");
561
return;
562
}
563
564
is_mapped.store(true, Ordering::Release);
565
});
566
}
567
568
// returns true if the frame is considered finished, false otherwise
569
fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {
570
let Some(read_buffer) = &self.read_buffer else {
571
return true;
572
};
573
if !self.is_mapped.load(Ordering::Acquire) {
574
// need to wait more
575
return false;
576
}
577
let Some(callback) = self.callback.take() else {
578
return true;
579
};
580
581
let data = read_buffer.slice(..).get_mapped_range();
582
583
let timestamps = data[..(self.num_timestamps * 8) as usize]
584
.chunks(8)
585
.map(|v| u64::from_le_bytes(v.try_into().unwrap()))
586
.collect::<Vec<u64>>();
587
588
let start = self.pipeline_statistics_buffer_offset as usize;
589
let len = (self.num_pipeline_statistics as usize) * 40;
590
let pipeline_statistics = data[start..start + len]
591
.chunks(8)
592
.map(|v| u64::from_le_bytes(v.try_into().unwrap()))
593
.collect::<Vec<u64>>();
594
595
let mut diagnostics = Vec::new();
596
597
for span in &self.closed_spans {
598
if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
599
diagnostics.push(RenderDiagnostic {
600
path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
601
suffix: "ms",
602
value: (end - begin).as_secs_f64() * 1000.0,
603
});
604
}
605
606
if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)
607
{
608
let begin = timestamps[begin as usize] as f64;
609
let end = timestamps[end as usize] as f64;
610
let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;
611
612
#[cfg(feature = "tracing-tracy")]
613
{
614
// Calling span_alloc() and end_zone() here instead of in open_span() and close_span() means that tracy does not know where each GPU command was recorded on the CPU timeline.
615
// Unfortunately we must do it this way, because tracy does not play nicely with multithreaded command recording. The start/end pairs would get all mixed up.
616
// The GPU spans themselves are still accurate though, and it's probably safe to assume that each GPU span in frame N belongs to the corresponding CPU render node span from frame N-1.
617
let name = &self.path_components[span.path_range.clone()].join("/");
618
let mut tracy_gpu_span =
619
self.tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();
620
tracy_gpu_span.end_zone();
621
tracy_gpu_span.upload_timestamp_start(begin as i64);
622
tracy_gpu_span.upload_timestamp_end(end as i64);
623
}
624
625
diagnostics.push(RenderDiagnostic {
626
path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),
627
suffix: "ms",
628
value,
629
});
630
}
631
632
if let Some(index) = span.pipeline_statistics_index {
633
let index = (index as usize) * 5;
634
635
if span.pass_kind == Some(PassKind::Render) {
636
diagnostics.push(RenderDiagnostic {
637
path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),
638
suffix: "",
639
value: pipeline_statistics[index] as f64,
640
});
641
642
diagnostics.push(RenderDiagnostic {
643
path: self.diagnostic_path(&span.path_range, "clipper_invocations"),
644
suffix: "",
645
value: pipeline_statistics[index + 1] as f64,
646
});
647
648
diagnostics.push(RenderDiagnostic {
649
path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),
650
suffix: "",
651
value: pipeline_statistics[index + 2] as f64,
652
});
653
654
diagnostics.push(RenderDiagnostic {
655
path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),
656
suffix: "",
657
value: pipeline_statistics[index + 3] as f64,
658
});
659
}
660
661
if span.pass_kind == Some(PassKind::Compute) {
662
diagnostics.push(RenderDiagnostic {
663
path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),
664
suffix: "",
665
value: pipeline_statistics[index + 4] as f64,
666
});
667
}
668
}
669
}
670
671
for (buffer, diagnostic_path, is_f32) in self.value_buffers.drain(..) {
672
let buffer = buffer.get_mapped_range(..);
673
diagnostics.push(RenderDiagnostic {
674
path: DiagnosticPath::from_components(
675
core::iter::once("render").chain(core::iter::once(diagnostic_path.as_ref())),
676
),
677
suffix: "",
678
value: if is_f32 {
679
f32::from_le_bytes((*buffer).try_into().unwrap()) as f64
680
} else {
681
u32::from_le_bytes((*buffer).try_into().unwrap()) as f64
682
},
683
});
684
}
685
686
callback(RenderDiagnostics(diagnostics));
687
688
drop(data);
689
read_buffer.unmap();
690
self.is_mapped.store(false, Ordering::Release);
691
692
true
693
}
694
}
695
696
/// Resource which stores render diagnostics of the most recent frame.
697
#[derive(Debug, Default, Clone, Resource)]
698
pub struct RenderDiagnostics(Vec<RenderDiagnostic>);
699
700
/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`].
701
#[derive(Debug, Clone, Resource)]
702
pub struct RenderDiagnostic {
703
pub path: DiagnosticPath,
704
pub suffix: &'static str,
705
pub value: f64,
706
}
707
708
/// Stores render diagnostics before they can be synced with the main app.
709
///
710
/// This mutex is locked twice per frame:
711
/// 1. in `PreUpdate`, during [`sync_diagnostics`],
712
/// 2. after rendering has finished and statistics have been downloaded from GPU.
713
#[derive(Debug, Default, Clone, Resource)]
714
pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);
715
716
/// Updates render diagnostics measurements.
717
pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {
718
let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {
719
return;
720
};
721
722
let time = Instant::now();
723
724
for diagnostic in &diagnostics.0 {
725
if store.get(&diagnostic.path).is_none() {
726
store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));
727
}
728
729
store
730
.get_mut(&diagnostic.path)
731
.unwrap()
732
.add_measurement(DiagnosticMeasurement {
733
time,
734
value: diagnostic.value,
735
});
736
}
737
}
738
739
pub trait WriteTimestamp {
740
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);
741
}
742
743
impl WriteTimestamp for CommandEncoder {
744
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
745
if cfg!(target_os = "macos") {
746
// When using tracy (and thus this function), rendering was flickering on macOS Tahoe.
747
// See: https://github.com/bevyengine/bevy/issues/22257
748
// The issue seems to be triggered when `write_timestamp` is called very close to frame
749
// presentation.
750
return;
751
}
752
CommandEncoder::write_timestamp(self, query_set, index);
753
}
754
}
755
756
impl WriteTimestamp for RenderPass<'_> {
757
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
758
RenderPass::write_timestamp(self, query_set, index);
759
}
760
}
761
762
impl WriteTimestamp for ComputePass<'_> {
763
fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
764
ComputePass::write_timestamp(self, query_set, index);
765
}
766
}
767
768
pub trait WritePipelineStatistics {
769
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);
770
771
fn end_pipeline_statistics_query(&mut self);
772
}
773
774
impl WritePipelineStatistics for RenderPass<'_> {
775
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
776
RenderPass::begin_pipeline_statistics_query(self, query_set, index);
777
}
778
779
fn end_pipeline_statistics_query(&mut self) {
780
RenderPass::end_pipeline_statistics_query(self);
781
}
782
}
783
784
impl WritePipelineStatistics for ComputePass<'_> {
785
fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
786
ComputePass::begin_pipeline_statistics_query(self, query_set, index);
787
}
788
789
fn end_pipeline_statistics_query(&mut self) {
790
ComputePass::end_pipeline_statistics_query(self);
791
}
792
}
793
794
pub trait Pass: WritePipelineStatistics + WriteTimestamp {
795
const KIND: PassKind;
796
}
797
798
impl Pass for RenderPass<'_> {
799
const KIND: PassKind = PassKind::Render;
800
}
801
802
impl Pass for ComputePass<'_> {
803
const KIND: PassKind = PassKind::Compute;
804
}
805
806
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
807
pub enum PassKind {
808
Render,
809
Compute,
810
}
811
812