Path: blob/21.2-virgl/src/intel/common/intel_measure.c
4547 views
/*1* Copyright © 2020 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file intel_measure.c24*/2526#include "intel_measure.h"2728#include <errno.h>29#include <fcntl.h>30#include <stdlib.h>31#include <string.h>32#include <sys/stat.h>33#include <sys/types.h>34#include <unistd.h>3536#define __STDC_FORMAT_MACROS 137#include <inttypes.h>3839#include "dev/intel_device_info.h"40#include "util/debug.h"41#include "util/macros.h"42#include "util/u_debug.h"434445static const struct debug_control debug_control[] = {46{ "draw", INTEL_MEASURE_DRAW },47{ "rt", INTEL_MEASURE_RENDERPASS },48{ "shader", INTEL_MEASURE_SHADER },49{ "batch", INTEL_MEASURE_BATCH },50{ "frame", INTEL_MEASURE_FRAME },51{ NULL, 0 }52};53static struct intel_measure_config config;5455void56intel_measure_init(struct intel_measure_device *device)57{58static bool once = false;59const char *env = getenv("INTEL_MEASURE");60if (unlikely(!once)) {61once = true;62memset(&config, 0, sizeof(struct intel_measure_config));63if (!env)64return;6566config.file = stderr;67config.flags = parse_debug_string(env, debug_control);68if (!config.flags)69config.flags = INTEL_MEASURE_DRAW;70config.enabled = true;71config.event_interval = 1;72config.control_fh = -1;7374/* Overflows of the following defaults will drop data and generate a75* warning on the output filehandle.76*/7778/* default batch_size allows for 8k renders in a single batch */79const int DEFAULT_BATCH_SIZE = 16 * 1024;80config.batch_size = DEFAULT_BATCH_SIZE;8182/* Default buffer_size allows for 16k batches per line of output in the83* csv. Overflow may occur for offscreen workloads or large 'interval'84* settings.85*/86const int DEFAULT_BUFFER_SIZE = 16 * 1024;87config.buffer_size = DEFAULT_BUFFER_SIZE;8889const char *filename = strstr(env, "file=");90const char *start_frame_s = strstr(env, "start=");91const char *count_frame_s = strstr(env, "count=");92const char *control_path = strstr(env, "control=");93const char *interval_s = strstr(env, "interval=");94const char *batch_size_s = strstr(env, "batch_size=");95const char *buffer_size_s = strstr(env, "buffer_size=");96while (true) {97char *sep = strrchr(env, ',');98if (sep == NULL)99break;100*sep = '\0';101}102103if (filename && !__check_suid()) {104filename += 5;105config.file = fopen(filename, "w");106if (!config.file) {107fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",108filename, strerror (errno));109abort();110}111}112113if (start_frame_s) {114start_frame_s += 6;115const int start_frame = atoi(start_frame_s);116if (start_frame < 0) {117fprintf(stderr, "INTEL_MEASURE start frame may "118"not be negative: %d\n", start_frame);119abort();120}121122config.start_frame = start_frame;123config.enabled = false;124}125126if (count_frame_s) {127count_frame_s += 6;128const int count_frame = atoi(count_frame_s);129if (count_frame <= 0) {130fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",131count_frame);132abort();133}134135config.end_frame = config.start_frame + count_frame;136}137138if (control_path) {139control_path += 8;140if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {141if (errno != EEXIST) {142fprintf(stderr, "INTEL_MEASURE failed to create control "143"fifo %s: %s\n", control_path, strerror (errno));144abort();145}146}147148config.control_fh = openat(AT_FDCWD, control_path,149O_RDONLY | O_NONBLOCK);150if (config.control_fh == -1) {151fprintf(stderr, "INTEL_MEASURE failed to open control fifo "152"%s: %s\n", control_path, strerror (errno));153abort();154}155156/* when using a control fifo, do not start until the user triggers157* capture158*/159config.enabled = false;160}161162if (interval_s) {163interval_s += 9;164const int event_interval = atoi(interval_s);165if (event_interval < 1) {166fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "167"%d\n", event_interval);168abort();169}170config.event_interval = event_interval;171}172173if (batch_size_s) {174batch_size_s += 11;175const int batch_size = atoi(batch_size_s);176if (batch_size < DEFAULT_BATCH_SIZE) {177fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "178"%d\n", batch_size);179abort();180}181if (batch_size > DEFAULT_BATCH_SIZE * 1024) {182fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "183"%d\n", batch_size);184abort();185}186187config.batch_size = batch_size;188}189190if (buffer_size_s) {191buffer_size_s += 12;192const int buffer_size = atoi(buffer_size_s);193if (buffer_size < DEFAULT_BUFFER_SIZE) {194fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "195"%d\n", DEFAULT_BUFFER_SIZE);196}197if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {198fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "199"%d\n", buffer_size);200}201202config.buffer_size = buffer_size;203}204205fputs("draw_start,draw_end,frame,batch,"206"event_index,event_count,type,count,vs,tcs,tes,"207"gs,fs,cs,framebuffer,idle_ns,time_ns\n",208config.file);209}210211device->config = NULL;212device->frame = 0;213pthread_mutex_init(&device->mutex, NULL);214list_inithead(&device->queued_snapshots);215216if (env)217device->config = &config;218}219220const char *221intel_measure_snapshot_string(enum intel_measure_snapshot_type type)222{223const char *names[] = {224[INTEL_SNAPSHOT_UNDEFINED] = "undefined",225[INTEL_SNAPSHOT_BLIT] = "blit",226[INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate",227[INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear",228[INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",229[INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve",230[INTEL_SNAPSHOT_COMPUTE] = "compute",231[INTEL_SNAPSHOT_COPY] = "copy",232[INTEL_SNAPSHOT_DRAW] = "draw",233[INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate",234[INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear",235[INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve",236[INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear",237[INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",238[INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear",239[INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear",240[INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer",241[INTEL_SNAPSHOT_END] = "end",242};243assert(type < ARRAY_SIZE(names));244assert(names[type] != NULL);245assert(type != INTEL_SNAPSHOT_UNDEFINED);246return names[type];247}248249/**250* Indicate to the caller whether a new snapshot should be started.251*252* Callers provide rendering state to this method to determine whether the253* current start event should be skipped. Depending on the configuration254* flags, a new snapshot may start:255* - at every event256* - when the program changes257* - after a batch is submitted258* - at frame boundaries259*260* Returns true if a snapshot should be started.261*/262bool263intel_measure_state_changed(const struct intel_measure_batch *batch,264uintptr_t vs, uintptr_t tcs, uintptr_t tes,265uintptr_t gs, uintptr_t fs, uintptr_t cs)266{267if (batch->index == 0) {268/* always record the first event */269return true;270}271272const struct intel_measure_snapshot *last_snap =273&batch->snapshots[batch->index - 1];274275if (config.flags & INTEL_MEASURE_DRAW)276return true;277278if (batch->index % 2 == 0) {279/* no snapshot is running, but we have a start event */280return true;281}282283if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {284/* only start collection when index == 0, at the beginning of a batch */285return false;286}287288if (config.flags & INTEL_MEASURE_RENDERPASS) {289return ((last_snap->framebuffer != batch->framebuffer) ||290/* compute workloads are always in their own renderpass */291(cs != 0));292}293294/* remaining comparisons check the state of the render pipeline for295* INTEL_MEASURE_PROGRAM296*/297assert(config.flags & INTEL_MEASURE_SHADER);298299if (!vs && !tcs && !tes && !gs && !fs && !cs) {300/* blorp always changes program */301return true;302}303304return (last_snap->vs != (uintptr_t) vs ||305last_snap->tcs != (uintptr_t) tcs ||306last_snap->tes != (uintptr_t) tes ||307last_snap->gs != (uintptr_t) gs ||308last_snap->fs != (uintptr_t) fs ||309last_snap->cs != (uintptr_t) cs);310}311312/**313* Notify intel_measure that a frame is about to begin.314*315* Configuration values and the control fifo may commence measurement at frame316* boundaries.317*/318void319intel_measure_frame_transition(unsigned frame)320{321if (frame == config.start_frame)322config.enabled = true;323else if (frame == config.end_frame)324config.enabled = false;325326/* user commands to the control fifo will override any start/count327* environment settings328*/329if (config.control_fh != -1) {330while (true) {331const unsigned BUF_SIZE = 128;332char buf[BUF_SIZE];333ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);334if (bytes == 0)335break;336if (bytes == -1) {337fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",338strerror(errno));339abort();340}341342buf[bytes] = '\0';343char *nptr = buf, *endptr = buf;344while (*nptr != '\0' && *endptr != '\0') {345long fcount = strtol(nptr, &endptr, 10);346if (nptr == endptr) {347config.enabled = false;348fprintf(stderr, "INTEL_MEASURE invalid frame count on "349"control fifo.\n");350lseek(config.control_fh, 0, SEEK_END);351break;352} else if (fcount == 0) {353config.enabled = false;354} else {355config.enabled = true;356config.end_frame = frame + fcount;357}358359nptr = endptr + 1;360}361}362}363}364365#define TIMESTAMP_BITS 36366static uint64_t367raw_timestamp_delta(uint64_t time0, uint64_t time1)368{369if (time0 > time1) {370return (1ULL << TIMESTAMP_BITS) + time1 - time0;371} else {372return time1 - time0;373}374}375376/**377* Verify that rendering has completed for the batch378*379* Rendering is complete when the last timestamp has been written.380*/381bool382intel_measure_ready(struct intel_measure_batch *batch)383{384assert(batch->timestamps);385assert(batch->index > 1);386return (batch->timestamps[batch->index - 1] != 0);387}388389/**390* Submit completed snapshots for buffering.391*392* Snapshot data becomes available when asynchronous rendering completes.393* Depending on configuration, snapshot data may need to be collated before394* writing to the output file.395*/396static void397intel_measure_push_result(struct intel_measure_device *device,398struct intel_measure_batch *batch)399{400struct intel_measure_ringbuffer *rb = device->ringbuffer;401402uint64_t *timestamps = batch->timestamps;403assert(timestamps != NULL);404assert(timestamps[0] != 0);405406for (int i = 0; i < batch->index; i += 2) {407const struct intel_measure_snapshot *begin = &batch->snapshots[i];408const struct intel_measure_snapshot *end = &batch->snapshots[i+1];409410assert (end->type == INTEL_SNAPSHOT_END);411412if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {413assert(begin->secondary != NULL);414begin->secondary->batch_count = batch->batch_count;415intel_measure_push_result(device, begin->secondary);416continue;417}418419const uint64_t prev_end_ts = rb->results[rb->head].end_ts;420421/* advance ring buffer */422if (++rb->head == config.buffer_size)423rb->head = 0;424if (rb->head == rb->tail) {425static bool warned = false;426if (unlikely(!warned)) {427fprintf(config.file,428"WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "429"Data has been dropped. "430"Increase setting with INTEL_MEASURE=buffer_size={count}\n",431config.buffer_size);432warned = true;433}434break;435}436437struct intel_measure_buffered_result *buffered_result =438&rb->results[rb->head];439440memset(buffered_result, 0, sizeof(*buffered_result));441memcpy(&buffered_result->snapshot, begin,442sizeof(struct intel_measure_snapshot));443buffered_result->start_ts = timestamps[i];444buffered_result->end_ts = timestamps[i+1];445buffered_result->idle_duration =446raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);447buffered_result->frame = batch->frame;448buffered_result->batch_count = batch->batch_count;449buffered_result->event_index = i / 2;450buffered_result->snapshot.event_count = end->event_count;451}452}453454static unsigned455ringbuffer_size(const struct intel_measure_ringbuffer *rb)456{457unsigned head = rb->head;458if (head < rb->tail)459head += config.buffer_size;460return head - rb->tail;461}462463static const struct intel_measure_buffered_result *464ringbuffer_pop(struct intel_measure_ringbuffer *rb)465{466if (rb->tail == rb->head) {467/* encountered ringbuffer overflow while processing events */468return NULL;469}470471if (++rb->tail == config.buffer_size)472rb->tail = 0;473return &rb->results[rb->tail];474}475476static const struct intel_measure_buffered_result *477ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)478{479int result_offset = rb->tail + index + 1;480if (result_offset >= config.buffer_size)481result_offset -= config.buffer_size;482return &rb->results[result_offset];483}484485486/**487* Determine the number of buffered events that must be combined for the next488* line of csv output. Returns 0 if more events are needed.489*/490static unsigned491buffered_event_count(struct intel_measure_device *device)492{493const struct intel_measure_ringbuffer *rb = device->ringbuffer;494const unsigned buffered_event_count = ringbuffer_size(rb);495if (buffered_event_count == 0) {496/* no events to collect */497return 0;498}499500/* count the number of buffered events required to meet the configuration */501if (config.flags & (INTEL_MEASURE_DRAW |502INTEL_MEASURE_RENDERPASS |503INTEL_MEASURE_SHADER)) {504/* For these flags, every buffered event represents a line in the505* output. None of these events span batches. If the event interval506* crosses a batch boundary, then the next interval starts with the new507* batch.508*/509return 1;510}511512const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;513if (config.flags & INTEL_MEASURE_BATCH) {514/* each buffered event is a command buffer. The number of events to515* process is the same as the interval, unless the interval crosses a516* frame boundary517*/518if (buffered_event_count < config.event_interval) {519/* not enough events */520return 0;521}522523/* Imperfect frame tracking requires us to allow for *older* frames */524if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {525/* No frame transition. The next {interval} events should be combined. */526return config.event_interval;527}528529/* Else a frame transition occurs within the interval. Find the530* transition, so the following line of output begins with the batch531* that starts the new frame.532*/533for (int event_index = 1;534event_index <= config.event_interval;535++event_index) {536if (ringbuffer_peek(rb, event_index)->frame > start_frame)537return event_index;538}539540assert(false);541}542543/* Else we need to search buffered events to find the matching frame544* transition for our interval.545*/546assert(config.flags & INTEL_MEASURE_FRAME);547for (int event_index = 1;548event_index < buffered_event_count;549++event_index) {550const int latest_frame = ringbuffer_peek(rb, event_index)->frame;551if (latest_frame - start_frame >= config.event_interval)552return event_index;553}554555return 0;556}557558/**559* Take result_count events from the ringbuffer and output them as a single560* line.561*/562static void563print_combined_results(struct intel_measure_device *measure_device,564int result_count,565struct intel_device_info *info)566{567if (result_count == 0)568return;569570struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;571assert(ringbuffer_size(result_rb) >= result_count);572const struct intel_measure_buffered_result* start_result =573ringbuffer_pop(result_rb);574const struct intel_measure_buffered_result* current_result = start_result;575576if (start_result == NULL)577return;578--result_count;579580uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,581current_result->end_ts);582unsigned event_count = start_result->snapshot.event_count;583while (result_count-- > 0) {584assert(ringbuffer_size(result_rb) > 0);585current_result = ringbuffer_pop(result_rb);586if (current_result == NULL)587return;588duration_ts += raw_timestamp_delta(current_result->start_ts,589current_result->end_ts);590event_count += current_result->snapshot.event_count;591}592593const struct intel_measure_snapshot *begin = &start_result->snapshot;594fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"595"0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","596"0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n",597start_result->start_ts, current_result->end_ts,598start_result->frame, start_result->batch_count,599start_result->event_index, event_count,600begin->event_name, begin->count,601begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,602begin->framebuffer,603intel_device_info_timebase_scale(info, start_result->idle_duration),604intel_device_info_timebase_scale(info, duration_ts));605}606607/**608* Empty the ringbuffer of events that can be printed.609*/610static void611intel_measure_print(struct intel_measure_device *device,612struct intel_device_info *info)613{614while (true) {615const int events_to_combine = buffered_event_count(device);616if (events_to_combine == 0)617break;618print_combined_results(device, events_to_combine, info);619}620}621622/**623* Collect snapshots from completed command buffers and submit them to624* intel_measure for printing.625*/626void627intel_measure_gather(struct intel_measure_device *measure_device,628struct intel_device_info *info)629{630pthread_mutex_lock(&measure_device->mutex);631632/* Iterate snapshots and collect if ready. Each snapshot queue will be633* in-order, but we must determine which queue has the oldest batch.634*/635/* iterate snapshots and collect if ready */636while (!list_is_empty(&measure_device->queued_snapshots)) {637struct intel_measure_batch *batch =638list_first_entry(&measure_device->queued_snapshots,639struct intel_measure_batch, link);640641if (!intel_measure_ready(batch)) {642/* command buffer has begun execution on the gpu, but has not643* completed.644*/645break;646}647648list_del(&batch->link);649assert(batch->index % 2 == 0);650651intel_measure_push_result(measure_device, batch);652653batch->index = 0;654batch->frame = 0;655}656657intel_measure_print(measure_device, info);658pthread_mutex_unlock(&measure_device->mutex);659}660661662663