Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a5xx/fd5_query.c
4574 views
/*1* Copyright (C) 2017 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526/* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */2728#include "freedreno_query_acc.h"29#include "freedreno_resource.h"3031#include "fd5_context.h"32#include "fd5_emit.h"33#include "fd5_format.h"34#include "fd5_query.h"3536struct PACKED fd5_query_sample {37uint64_t start;38uint64_t result;39uint64_t stop;40};4142/* offset of a single field of an array of fd5_query_sample: */43#define query_sample_idx(aq, idx, field) \44fd_resource((aq)->prsc)->bo, \45(idx * sizeof(struct fd5_query_sample)) + \46offsetof(struct fd5_query_sample, field), \470, 04849/* offset of a single field of fd5_query_sample: */50#define query_sample(aq, field) query_sample_idx(aq, 0, field)5152/*53* Occlusion Query:54*55* OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they56* interpret results57*/5859static void60occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)61{62struct fd_ringbuffer *ring = batch->draw;6364OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);65OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);6667OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);68OUT_RELOC(ring, query_sample(aq, start));6970fd5_event_write(batch, ring, ZPASS_DONE, false);71fd_reset_wfi(batch);7273fd5_context(batch->ctx)->samples_passed_queries++;74}7576static void77occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)78{79struct fd_ringbuffer *ring = batch->draw;8081OUT_PKT7(ring, CP_MEM_WRITE, 4);82OUT_RELOC(ring, query_sample(aq, stop));83OUT_RING(ring, 0xffffffff);84OUT_RING(ring, 0xffffffff);8586OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);8788OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);89OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);9091OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);92OUT_RELOC(ring, query_sample(aq, stop));9394fd5_event_write(batch, ring, ZPASS_DONE, false);95fd_reset_wfi(batch);9697OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);98OUT_RING(ring, 0x00000014); // XXX99OUT_RELOC(ring, query_sample(aq, stop));100OUT_RING(ring, 0xffffffff);101OUT_RING(ring, 0xffffffff);102OUT_RING(ring, 0x00000010); // XXX103104/* result += stop - start: */105OUT_PKT7(ring, CP_MEM_TO_MEM, 9);106OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);107OUT_RELOC(ring, query_sample(aq, result)); /* dst */108OUT_RELOC(ring, query_sample(aq, result)); /* srcA */109OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */110OUT_RELOC(ring, query_sample(aq, start)); /* srcC */111112fd5_context(batch->ctx)->samples_passed_queries--;113}114115static void116occlusion_counter_result(struct fd_acc_query *aq, void *buf,117union pipe_query_result *result)118{119struct fd5_query_sample *sp = buf;120result->u64 = sp->result;121}122123static void124occlusion_predicate_result(struct fd_acc_query *aq, void *buf,125union pipe_query_result *result)126{127struct fd5_query_sample *sp = buf;128result->b = !!sp->result;129}130131static const struct fd_acc_sample_provider occlusion_counter = {132.query_type = PIPE_QUERY_OCCLUSION_COUNTER,133.size = sizeof(struct fd5_query_sample),134.resume = occlusion_resume,135.pause = occlusion_pause,136.result = occlusion_counter_result,137};138139static const struct fd_acc_sample_provider occlusion_predicate = {140.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,141.size = sizeof(struct fd5_query_sample),142.resume = occlusion_resume,143.pause = occlusion_pause,144.result = occlusion_predicate_result,145};146147static const struct fd_acc_sample_provider occlusion_predicate_conservative = {148.query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,149.size = sizeof(struct fd5_query_sample),150.resume = occlusion_resume,151.pause = occlusion_pause,152.result = occlusion_predicate_result,153};154155/*156* Timestamp Queries:157*/158159static void160timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt161{162struct fd_ringbuffer *ring = batch->draw;163164OUT_PKT7(ring, CP_EVENT_WRITE, 4);165OUT_RING(ring,166CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);167OUT_RELOC(ring, query_sample(aq, start));168OUT_RING(ring, 0x00000000);169170fd_reset_wfi(batch);171}172173static void174timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt175{176struct fd_ringbuffer *ring = batch->draw;177178OUT_PKT7(ring, CP_EVENT_WRITE, 4);179OUT_RING(ring,180CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);181OUT_RELOC(ring, query_sample(aq, stop));182OUT_RING(ring, 0x00000000);183184fd_reset_wfi(batch);185fd_wfi(batch, ring);186187/* result += stop - start: */188OUT_PKT7(ring, CP_MEM_TO_MEM, 9);189OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);190OUT_RELOC(ring, query_sample(aq, result)); /* dst */191OUT_RELOC(ring, query_sample(aq, result)); /* srcA */192OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */193OUT_RELOC(ring, query_sample(aq, start)); /* srcC */194}195196static uint64_t197ticks_to_ns(uint32_t ts)198{199/* This is based on the 19.2MHz always-on rbbm timer.200*201* TODO we should probably query this value from kernel..202*/203return ts * (1000000000 / 19200000);204}205206static void207time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,208union pipe_query_result *result)209{210struct fd5_query_sample *sp = buf;211result->u64 = ticks_to_ns(sp->result);212}213214static void215timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,216union pipe_query_result *result)217{218struct fd5_query_sample *sp = buf;219result->u64 = ticks_to_ns(sp->result);220}221222static const struct fd_acc_sample_provider time_elapsed = {223.query_type = PIPE_QUERY_TIME_ELAPSED,224.always = true,225.size = sizeof(struct fd5_query_sample),226.resume = timestamp_resume,227.pause = timestamp_pause,228.result = time_elapsed_accumulate_result,229};230231/* NOTE: timestamp query isn't going to give terribly sensible results232* on a tiler. But it is needed by qapitrace profile heatmap. If you233* add in a binning pass, the results get even more non-sensical. So234* we just return the timestamp on the first tile and hope that is235* kind of good enough.236*/237238static const struct fd_acc_sample_provider timestamp = {239.query_type = PIPE_QUERY_TIMESTAMP,240.always = true,241.size = sizeof(struct fd5_query_sample),242.resume = timestamp_resume,243.pause = timestamp_pause,244.result = timestamp_accumulate_result,245};246247/*248* Performance Counter (batch) queries:249*250* Only one of these is active at a time, per design of the gallium251* batch_query API design. On perfcntr query tracks N query_types,252* each of which has a 'fd_batch_query_entry' that maps it back to253* the associated group and counter.254*/255256struct fd_batch_query_entry {257uint8_t gid; /* group-id */258uint8_t cid; /* countable-id within the group */259};260261struct fd_batch_query_data {262struct fd_screen *screen;263unsigned num_query_entries;264struct fd_batch_query_entry query_entries[];265};266267static void268perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt269{270struct fd_batch_query_data *data = aq->query_data;271struct fd_screen *screen = data->screen;272struct fd_ringbuffer *ring = batch->draw;273274unsigned counters_per_group[screen->num_perfcntr_groups];275memset(counters_per_group, 0, sizeof(counters_per_group));276277fd_wfi(batch, ring);278279/* configure performance counters for the requested queries: */280for (unsigned i = 0; i < data->num_query_entries; i++) {281struct fd_batch_query_entry *entry = &data->query_entries[i];282const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];283unsigned counter_idx = counters_per_group[entry->gid]++;284285debug_assert(counter_idx < g->num_counters);286287OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);288OUT_RING(ring, g->countables[entry->cid].selector);289}290291memset(counters_per_group, 0, sizeof(counters_per_group));292293/* and snapshot the start values */294for (unsigned i = 0; i < data->num_query_entries; i++) {295struct fd_batch_query_entry *entry = &data->query_entries[i];296const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];297unsigned counter_idx = counters_per_group[entry->gid]++;298const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];299300OUT_PKT7(ring, CP_REG_TO_MEM, 3);301OUT_RING(ring, CP_REG_TO_MEM_0_64B |302CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));303OUT_RELOC(ring, query_sample_idx(aq, i, start));304}305}306307static void308perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt309{310struct fd_batch_query_data *data = aq->query_data;311struct fd_screen *screen = data->screen;312struct fd_ringbuffer *ring = batch->draw;313314unsigned counters_per_group[screen->num_perfcntr_groups];315memset(counters_per_group, 0, sizeof(counters_per_group));316317fd_wfi(batch, ring);318319/* TODO do we need to bother to turn anything off? */320321/* snapshot the end values: */322for (unsigned i = 0; i < data->num_query_entries; i++) {323struct fd_batch_query_entry *entry = &data->query_entries[i];324const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];325unsigned counter_idx = counters_per_group[entry->gid]++;326const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];327328OUT_PKT7(ring, CP_REG_TO_MEM, 3);329OUT_RING(ring, CP_REG_TO_MEM_0_64B |330CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));331OUT_RELOC(ring, query_sample_idx(aq, i, stop));332}333334/* and compute the result: */335for (unsigned i = 0; i < data->num_query_entries; i++) {336/* result += stop - start: */337OUT_PKT7(ring, CP_MEM_TO_MEM, 9);338OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);339OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */340OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */341OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */342OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */343}344}345346static void347perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,348union pipe_query_result *result)349{350struct fd_batch_query_data *data = aq->query_data;351struct fd5_query_sample *sp = buf;352353for (unsigned i = 0; i < data->num_query_entries; i++) {354result->batch[i].u64 = sp[i].result;355}356}357358static const struct fd_acc_sample_provider perfcntr = {359.query_type = FD_QUERY_FIRST_PERFCNTR,360.always = true,361.resume = perfcntr_resume,362.pause = perfcntr_pause,363.result = perfcntr_accumulate_result,364};365366static struct pipe_query *367fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,368unsigned *query_types)369{370struct fd_context *ctx = fd_context(pctx);371struct fd_screen *screen = ctx->screen;372struct fd_query *q;373struct fd_acc_query *aq;374struct fd_batch_query_data *data;375376data = CALLOC_VARIANT_LENGTH_STRUCT(377fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));378379data->screen = screen;380data->num_query_entries = num_queries;381382/* validate the requested query_types and ensure we don't try383* to request more query_types of a given group than we have384* counters:385*/386unsigned counters_per_group[screen->num_perfcntr_groups];387memset(counters_per_group, 0, sizeof(counters_per_group));388389for (unsigned i = 0; i < num_queries; i++) {390unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;391392/* verify valid query_type, ie. is it actually a perfcntr? */393if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||394(idx >= screen->num_perfcntr_queries)) {395mesa_loge("invalid batch query query_type: %u", query_types[i]);396goto error;397}398399struct fd_batch_query_entry *entry = &data->query_entries[i];400struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];401402entry->gid = pq->group_id;403404/* the perfcntr_queries[] table flattens all the countables405* for each group in series, ie:406*407* (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...408*409* So to find the countable index just step back through the410* table to find the first entry with the same group-id.411*/412while (pq > screen->perfcntr_queries) {413pq--;414if (pq->group_id == entry->gid)415entry->cid++;416}417418if (counters_per_group[entry->gid] >=419screen->perfcntr_groups[entry->gid].num_counters) {420mesa_loge("too many counters for group %u\n", entry->gid);421goto error;422}423424counters_per_group[entry->gid]++;425}426427q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);428aq = fd_acc_query(q);429430/* sample buffer size is based on # of queries: */431aq->size = num_queries * sizeof(struct fd5_query_sample);432aq->query_data = data;433434return (struct pipe_query *)q;435436error:437free(data);438return NULL;439}440441void442fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis443{444struct fd_context *ctx = fd_context(pctx);445446ctx->create_query = fd_acc_create_query;447ctx->query_update_batch = fd_acc_query_update_batch;448449pctx->create_batch_query = fd5_create_batch_query;450451fd_acc_query_register_provider(pctx, &occlusion_counter);452fd_acc_query_register_provider(pctx, &occlusion_predicate);453fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);454455fd_acc_query_register_provider(pctx, &time_elapsed);456fd_acc_query_register_provider(pctx, ×tamp);457}458459460