Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_perfetto.cc
4570 views
/*1* Copyright © 2021 Google, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include <perfetto.h>2425#include "util/u_perfetto.h"2627#include "freedreno_tracepoints.h"2829static uint32_t gpu_clock_id;30static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */3132/**33* The timestamp at the point where we first emitted the clock_sync..34* this will be a *later* timestamp that the first GPU traces (since35* we capture the first clock_sync from the CPU *after* the first GPU36* tracepoints happen). To avoid confusing perfetto we need to drop37* the GPU traces with timestamps before this.38*/39static uint64_t sync_gpu_ts;4041struct FdRenderpassIncrementalState {42bool was_cleared = true;43};4445struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {46using IncrementalStateType = FdRenderpassIncrementalState;47};4849class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {50public:51void OnSetup(const SetupArgs &) override52{53// Use this callback to apply any custom configuration to your data source54// based on the TraceConfig in SetupArgs.55}5657void OnStart(const StartArgs &) override58{59// This notification can be used to initialize the GPU driver, enable60// counters, etc. StartArgs will contains the DataSourceDescriptor,61// which can be extended.62u_trace_perfetto_start();63PERFETTO_LOG("Tracing started");6465/* Note: clock_id's below 128 are reserved.. for custom clock sources,66* using the hash of a namespaced string is the recommended approach.67* See: https://perfetto.dev/docs/concepts/clock-sync68*/69gpu_clock_id =70_mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;71}7273void OnStop(const StopArgs &) override74{75PERFETTO_LOG("Tracing stopped");7677// Undo any initialization done in OnStart.78u_trace_perfetto_stop();79// TODO we should perhaps block until queued traces are flushed?8081Trace([](FdRenderpassDataSource::TraceContext ctx) {82auto packet = ctx.NewTracePacket();83packet->Finalize();84ctx.Flush();85});86}87};8889PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);90PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);9192static void93send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)94{95PERFETTO_LOG("Sending renderstage descriptors");9697auto packet = ctx.NewTracePacket();9899packet->set_timestamp(0);100// packet->set_timestamp(ts_ns);101// packet->set_timestamp_clock_id(gpu_clock_id);102103auto event = packet->set_gpu_render_stage_event();104event->set_gpu_id(0);105106auto spec = event->set_specifications();107108for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {109auto desc = spec->add_hw_queue();110111desc->set_name(queues[i].name);112desc->set_description(queues[i].desc);113}114115for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {116auto desc = spec->add_stage();117118desc->set_name(stages[i].name);119if (stages[i].desc)120desc->set_description(stages[i].desc);121}122}123124static void125stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)126{127struct fd_context *ctx = fd_context(pctx);128struct fd_perfetto_state *p = &ctx->perfetto;129130p->start_ts[stage] = ts_ns;131}132133static void134stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)135{136struct fd_context *ctx = fd_context(pctx);137struct fd_perfetto_state *p = &ctx->perfetto;138139/* If we haven't managed to calibrate the alignment between GPU and CPU140* timestamps yet, then skip this trace, otherwise perfetto won't know141* what to do with it.142*/143if (!sync_gpu_ts)144return;145146FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {147if (auto state = tctx.GetIncrementalState(); state->was_cleared) {148send_descriptors(tctx, p->start_ts[stage]);149state->was_cleared = false;150}151152auto packet = tctx.NewTracePacket();153154packet->set_timestamp(p->start_ts[stage]);155packet->set_timestamp_clock_id(gpu_clock_id);156157auto event = packet->set_gpu_render_stage_event();158event->set_event_id(0); // ???159event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);160event->set_duration(ts_ns - p->start_ts[stage]);161event->set_stage_id(stage);162event->set_context((uintptr_t)pctx);163164/* The "surface" meta-stage has extra info about render target: */165if (stage == SURFACE_STAGE_ID) {166167event->set_submission_id(p->submit_id);168169if (p->cbuf0_format) {170auto data = event->add_extra_data();171172data->set_name("color0 format");173data->set_value(util_format_short_name(p->cbuf0_format));174}175176if (p->zs_format) {177auto data = event->add_extra_data();178179data->set_name("zs format");180data->set_value(util_format_short_name(p->zs_format));181}182183{184auto data = event->add_extra_data();185186data->set_name("width");187data->set_value(std::to_string(p->width));188}189190{191auto data = event->add_extra_data();192193data->set_name("height");194data->set_value(std::to_string(p->height));195}196197{198auto data = event->add_extra_data();199200data->set_name("MSAA");201data->set_value(std::to_string(p->samples));202}203204{205auto data = event->add_extra_data();206207data->set_name("MRTs");208data->set_value(std::to_string(p->mrts));209}210211// "renderMode"212// "surfaceID"213214if (p->nbins) {215auto data = event->add_extra_data();216217data->set_name("numberOfBins");218data->set_value(std::to_string(p->nbins));219}220221if (p->binw) {222auto data = event->add_extra_data();223224data->set_name("binWidth");225data->set_value(std::to_string(p->binw));226}227228if (p->binh) {229auto data = event->add_extra_data();230231data->set_name("binHeight");232data->set_value(std::to_string(p->binh));233}234}235});236}237238#ifdef __cplusplus239extern "C" {240#endif241242void243fd_perfetto_init(void)244{245util_perfetto_init();246247perfetto::DataSourceDescriptor dsd;248dsd.set_name("gpu.renderstages.msm");249FdRenderpassDataSource::Register(dsd);250}251252static void253sync_timestamp(struct fd_context *ctx)254{255uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();256uint64_t gpu_ts;257258if (cpu_ts < next_clock_sync_ns)259return;260261if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {262PERFETTO_ELOG("Could not sync CPU and GPU clocks");263return;264}265266/* convert GPU ts into ns: */267gpu_ts = ctx->ts_to_ns(gpu_ts);268269FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {270auto packet = tctx.NewTracePacket();271272packet->set_timestamp(cpu_ts);273274auto event = packet->set_clock_snapshot();275276{277auto clock = event->add_clocks();278279clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);280clock->set_timestamp(cpu_ts);281}282283{284auto clock = event->add_clocks();285286clock->set_clock_id(gpu_clock_id);287clock->set_timestamp(gpu_ts);288}289290sync_gpu_ts = gpu_ts;291next_clock_sync_ns = cpu_ts + 30000000;292});293}294295static void296emit_submit_id(struct fd_context *ctx)297{298FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {299auto packet = tctx.NewTracePacket();300301packet->set_timestamp(perfetto::base::GetBootTimeNs().count());302303auto event = packet->set_vulkan_api_event();304auto submit = event->set_vk_queue_submit();305306submit->set_submission_id(ctx->submit_count);307});308}309310void311fd_perfetto_submit(struct fd_context *ctx)312{313sync_timestamp(ctx);314emit_submit_id(ctx);315}316317/*318* Trace callbacks, called from u_trace once the timestamps from GPU have been319* collected.320*/321322void323fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,324const struct trace_start_render_pass *payload)325{326stage_start(pctx, ts_ns, SURFACE_STAGE_ID);327328struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;329330p->submit_id = payload->submit_id;331p->cbuf0_format = payload->cbuf0_format;332p->zs_format = payload->zs_format;333p->width = payload->width;334p->height = payload->height;335p->mrts = payload->mrts;336p->samples = payload->samples;337p->nbins = payload->nbins;338p->binw = payload->binw;339p->binh = payload->binh;340}341342void343fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,344const struct trace_end_render_pass *payload)345{346stage_end(pctx, ts_ns, SURFACE_STAGE_ID);347}348349void350fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,351const struct trace_start_binning_ib *payload)352{353stage_start(pctx, ts_ns, BINNING_STAGE_ID);354}355356void357fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,358const struct trace_end_binning_ib *payload)359{360stage_end(pctx, ts_ns, BINNING_STAGE_ID);361}362363void364fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,365const struct trace_start_draw_ib *payload)366{367stage_start(368pctx, ts_ns,369fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);370}371372void373fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,374const struct trace_end_draw_ib *payload)375{376stage_end(377pctx, ts_ns,378fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);379}380381void382fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,383const struct trace_start_blit *payload)384{385stage_start(pctx, ts_ns, BLIT_STAGE_ID);386}387388void389fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,390const struct trace_end_blit *payload)391{392stage_end(pctx, ts_ns, BLIT_STAGE_ID);393}394395void396fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,397const struct trace_start_compute *payload)398{399stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);400}401402void403fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,404const struct trace_end_compute *payload)405{406stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);407}408409void410fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,411const struct trace_start_clear_restore *payload)412{413stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);414}415416void417fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,418const struct trace_end_clear_restore *payload)419{420stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);421}422423void424fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,425const struct trace_start_resolve *payload)426{427stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);428}429430void431fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,432const struct trace_end_resolve *payload)433{434stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);435}436437#ifdef __cplusplus438}439#endif440441442