Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_perfetto.cc
4570 views
1
/*
2
* Copyright © 2021 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include <perfetto.h>
25
26
#include "util/u_perfetto.h"
27
28
#include "freedreno_tracepoints.h"
29
30
static uint32_t gpu_clock_id;
31
static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
32
33
/**
34
* The timestamp at the point where we first emitted the clock_sync..
35
* this will be a *later* timestamp that the first GPU traces (since
36
* we capture the first clock_sync from the CPU *after* the first GPU
37
* tracepoints happen). To avoid confusing perfetto we need to drop
38
* the GPU traces with timestamps before this.
39
*/
40
static uint64_t sync_gpu_ts;
41
42
struct FdRenderpassIncrementalState {
43
bool was_cleared = true;
44
};
45
46
struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
47
using IncrementalStateType = FdRenderpassIncrementalState;
48
};
49
50
class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
51
public:
52
void OnSetup(const SetupArgs &) override
53
{
54
// Use this callback to apply any custom configuration to your data source
55
// based on the TraceConfig in SetupArgs.
56
}
57
58
void OnStart(const StartArgs &) override
59
{
60
// This notification can be used to initialize the GPU driver, enable
61
// counters, etc. StartArgs will contains the DataSourceDescriptor,
62
// which can be extended.
63
u_trace_perfetto_start();
64
PERFETTO_LOG("Tracing started");
65
66
/* Note: clock_id's below 128 are reserved.. for custom clock sources,
67
* using the hash of a namespaced string is the recommended approach.
68
* See: https://perfetto.dev/docs/concepts/clock-sync
69
*/
70
gpu_clock_id =
71
_mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
72
}
73
74
void OnStop(const StopArgs &) override
75
{
76
PERFETTO_LOG("Tracing stopped");
77
78
// Undo any initialization done in OnStart.
79
u_trace_perfetto_stop();
80
// TODO we should perhaps block until queued traces are flushed?
81
82
Trace([](FdRenderpassDataSource::TraceContext ctx) {
83
auto packet = ctx.NewTracePacket();
84
packet->Finalize();
85
ctx.Flush();
86
});
87
}
88
};
89
90
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
91
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
92
93
static void
94
send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
95
{
96
PERFETTO_LOG("Sending renderstage descriptors");
97
98
auto packet = ctx.NewTracePacket();
99
100
packet->set_timestamp(0);
101
// packet->set_timestamp(ts_ns);
102
// packet->set_timestamp_clock_id(gpu_clock_id);
103
104
auto event = packet->set_gpu_render_stage_event();
105
event->set_gpu_id(0);
106
107
auto spec = event->set_specifications();
108
109
for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
110
auto desc = spec->add_hw_queue();
111
112
desc->set_name(queues[i].name);
113
desc->set_description(queues[i].desc);
114
}
115
116
for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
117
auto desc = spec->add_stage();
118
119
desc->set_name(stages[i].name);
120
if (stages[i].desc)
121
desc->set_description(stages[i].desc);
122
}
123
}
124
125
static void
126
stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
127
{
128
struct fd_context *ctx = fd_context(pctx);
129
struct fd_perfetto_state *p = &ctx->perfetto;
130
131
p->start_ts[stage] = ts_ns;
132
}
133
134
static void
135
stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
136
{
137
struct fd_context *ctx = fd_context(pctx);
138
struct fd_perfetto_state *p = &ctx->perfetto;
139
140
/* If we haven't managed to calibrate the alignment between GPU and CPU
141
* timestamps yet, then skip this trace, otherwise perfetto won't know
142
* what to do with it.
143
*/
144
if (!sync_gpu_ts)
145
return;
146
147
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
148
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
149
send_descriptors(tctx, p->start_ts[stage]);
150
state->was_cleared = false;
151
}
152
153
auto packet = tctx.NewTracePacket();
154
155
packet->set_timestamp(p->start_ts[stage]);
156
packet->set_timestamp_clock_id(gpu_clock_id);
157
158
auto event = packet->set_gpu_render_stage_event();
159
event->set_event_id(0); // ???
160
event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
161
event->set_duration(ts_ns - p->start_ts[stage]);
162
event->set_stage_id(stage);
163
event->set_context((uintptr_t)pctx);
164
165
/* The "surface" meta-stage has extra info about render target: */
166
if (stage == SURFACE_STAGE_ID) {
167
168
event->set_submission_id(p->submit_id);
169
170
if (p->cbuf0_format) {
171
auto data = event->add_extra_data();
172
173
data->set_name("color0 format");
174
data->set_value(util_format_short_name(p->cbuf0_format));
175
}
176
177
if (p->zs_format) {
178
auto data = event->add_extra_data();
179
180
data->set_name("zs format");
181
data->set_value(util_format_short_name(p->zs_format));
182
}
183
184
{
185
auto data = event->add_extra_data();
186
187
data->set_name("width");
188
data->set_value(std::to_string(p->width));
189
}
190
191
{
192
auto data = event->add_extra_data();
193
194
data->set_name("height");
195
data->set_value(std::to_string(p->height));
196
}
197
198
{
199
auto data = event->add_extra_data();
200
201
data->set_name("MSAA");
202
data->set_value(std::to_string(p->samples));
203
}
204
205
{
206
auto data = event->add_extra_data();
207
208
data->set_name("MRTs");
209
data->set_value(std::to_string(p->mrts));
210
}
211
212
// "renderMode"
213
// "surfaceID"
214
215
if (p->nbins) {
216
auto data = event->add_extra_data();
217
218
data->set_name("numberOfBins");
219
data->set_value(std::to_string(p->nbins));
220
}
221
222
if (p->binw) {
223
auto data = event->add_extra_data();
224
225
data->set_name("binWidth");
226
data->set_value(std::to_string(p->binw));
227
}
228
229
if (p->binh) {
230
auto data = event->add_extra_data();
231
232
data->set_name("binHeight");
233
data->set_value(std::to_string(p->binh));
234
}
235
}
236
});
237
}
238
239
#ifdef __cplusplus
240
extern "C" {
241
#endif
242
243
void
244
fd_perfetto_init(void)
245
{
246
util_perfetto_init();
247
248
perfetto::DataSourceDescriptor dsd;
249
dsd.set_name("gpu.renderstages.msm");
250
FdRenderpassDataSource::Register(dsd);
251
}
252
253
static void
254
sync_timestamp(struct fd_context *ctx)
255
{
256
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
257
uint64_t gpu_ts;
258
259
if (cpu_ts < next_clock_sync_ns)
260
return;
261
262
if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
263
PERFETTO_ELOG("Could not sync CPU and GPU clocks");
264
return;
265
}
266
267
/* convert GPU ts into ns: */
268
gpu_ts = ctx->ts_to_ns(gpu_ts);
269
270
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
271
auto packet = tctx.NewTracePacket();
272
273
packet->set_timestamp(cpu_ts);
274
275
auto event = packet->set_clock_snapshot();
276
277
{
278
auto clock = event->add_clocks();
279
280
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
281
clock->set_timestamp(cpu_ts);
282
}
283
284
{
285
auto clock = event->add_clocks();
286
287
clock->set_clock_id(gpu_clock_id);
288
clock->set_timestamp(gpu_ts);
289
}
290
291
sync_gpu_ts = gpu_ts;
292
next_clock_sync_ns = cpu_ts + 30000000;
293
});
294
}
295
296
static void
297
emit_submit_id(struct fd_context *ctx)
298
{
299
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
300
auto packet = tctx.NewTracePacket();
301
302
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
303
304
auto event = packet->set_vulkan_api_event();
305
auto submit = event->set_vk_queue_submit();
306
307
submit->set_submission_id(ctx->submit_count);
308
});
309
}
310
311
void
312
fd_perfetto_submit(struct fd_context *ctx)
313
{
314
sync_timestamp(ctx);
315
emit_submit_id(ctx);
316
}
317
318
/*
319
* Trace callbacks, called from u_trace once the timestamps from GPU have been
320
* collected.
321
*/
322
323
void
324
fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
325
const struct trace_start_render_pass *payload)
326
{
327
stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
328
329
struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
330
331
p->submit_id = payload->submit_id;
332
p->cbuf0_format = payload->cbuf0_format;
333
p->zs_format = payload->zs_format;
334
p->width = payload->width;
335
p->height = payload->height;
336
p->mrts = payload->mrts;
337
p->samples = payload->samples;
338
p->nbins = payload->nbins;
339
p->binw = payload->binw;
340
p->binh = payload->binh;
341
}
342
343
void
344
fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
345
const struct trace_end_render_pass *payload)
346
{
347
stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
348
}
349
350
void
351
fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
352
const struct trace_start_binning_ib *payload)
353
{
354
stage_start(pctx, ts_ns, BINNING_STAGE_ID);
355
}
356
357
void
358
fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
359
const struct trace_end_binning_ib *payload)
360
{
361
stage_end(pctx, ts_ns, BINNING_STAGE_ID);
362
}
363
364
void
365
fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
366
const struct trace_start_draw_ib *payload)
367
{
368
stage_start(
369
pctx, ts_ns,
370
fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
371
}
372
373
void
374
fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
375
const struct trace_end_draw_ib *payload)
376
{
377
stage_end(
378
pctx, ts_ns,
379
fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
380
}
381
382
void
383
fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
384
const struct trace_start_blit *payload)
385
{
386
stage_start(pctx, ts_ns, BLIT_STAGE_ID);
387
}
388
389
void
390
fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
391
const struct trace_end_blit *payload)
392
{
393
stage_end(pctx, ts_ns, BLIT_STAGE_ID);
394
}
395
396
void
397
fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
398
const struct trace_start_compute *payload)
399
{
400
stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
401
}
402
403
void
404
fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
405
const struct trace_end_compute *payload)
406
{
407
stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
408
}
409
410
void
411
fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
412
const struct trace_start_clear_restore *payload)
413
{
414
stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
415
}
416
417
void
418
fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
419
const struct trace_end_clear_restore *payload)
420
{
421
stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
422
}
423
424
void
425
fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
426
const struct trace_start_resolve *payload)
427
{
428
stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
429
}
430
431
void
432
fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
433
const struct trace_end_resolve *payload)
434
{
435
stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
436
}
437
438
#ifdef __cplusplus
439
}
440
#endif
441
442