Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
4574 views
/*1* Copyright 2011 Christoph Bumiller2* Copyright 2015 Samuel Pitoiset3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#define NV50_PUSH_EXPLICIT_SPACE_CHECKING2425#include "nv50/nv50_context.h"26#include "nv50/nv50_query_hw.h"27#include "nv50/nv50_query_hw_metric.h"28#include "nv50/nv50_query_hw_sm.h"29#include "nv_object.xml.h"3031/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts32* (since we use only a single GPU channel per screen) will not work properly.33*34* The first is not that big of an issue because OpenGL does not allow nested35* queries anyway.36*/3738#define NV50_HW_QUERY_ALLOC_SPACE 2563940bool41nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,42int size)43{44struct nv50_screen *screen = nv50->screen;45struct nv50_hw_query *hq = nv50_hw_query(q);46int ret;4748if (hq->bo) {49nouveau_bo_ref(NULL, &hq->bo);50if (hq->mm) {51if (hq->state == NV50_HW_QUERY_STATE_READY)52nouveau_mm_free(hq->mm);53else54nouveau_fence_work(screen->base.fence.current,55nouveau_mm_free_work, hq->mm);56}57}58if (size) {59hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,60&hq->bo, &hq->base_offset);61if (!hq->bo)62return false;63hq->offset = hq->base_offset;6465ret = nouveau_bo_map(hq->bo, 0, screen->base.client);66if (ret) {67nv50_hw_query_allocate(nv50, q, 0);68return false;69}70hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);71}72return true;73}7475static void76nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,77unsigned offset, uint32_t get)78{79struct nv50_hw_query *hq = nv50_hw_query(q);8081offset += hq->offset;8283PUSH_SPACE(push, 5);84PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);85BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);86PUSH_DATAh(push, hq->bo->offset + offset);87PUSH_DATA (push, hq->bo->offset + offset);88PUSH_DATA (push, hq->sequence);89PUSH_DATA (push, get);90}9192static inline void93nv50_hw_query_update(struct nv50_query *q)94{95struct nv50_hw_query *hq = nv50_hw_query(q);9697if (hq->is64bit) {98if (nouveau_fence_signalled(hq->fence))99hq->state = NV50_HW_QUERY_STATE_READY;100} else {101if (hq->data[0] == hq->sequence)102hq->state = NV50_HW_QUERY_STATE_READY;103}104}105106static void107nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)108{109struct nv50_hw_query *hq = nv50_hw_query(q);110111if (hq->funcs && hq->funcs->destroy_query) {112hq->funcs->destroy_query(nv50, hq);113return;114}115116nv50_hw_query_allocate(nv50, q, 0);117nouveau_fence_ref(NULL, &hq->fence);118FREE(hq);119}120121static bool122nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)123{124struct nouveau_pushbuf *push = nv50->base.pushbuf;125struct nv50_hw_query *hq = nv50_hw_query(q);126127if (hq->funcs && hq->funcs->begin_query)128return hq->funcs->begin_query(nv50, hq);129130/* For occlusion queries we have to change the storage, because a previous131* query might set the initial render condition to false even *after* we re-132* initialized it to true.133*/134if (hq->rotate) {135hq->offset += hq->rotate;136hq->data += hq->rotate / sizeof(*hq->data);137if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)138nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);139140/* XXX: can we do this with the GPU, and sync with respect to a previous141* query ?142*/143hq->data[0] = hq->sequence; /* initialize sequence */144hq->data[1] = 1; /* initial render condition = true */145hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */146hq->data[5] = 0;147}148hq->sequence++;149150switch (q->type) {151case PIPE_QUERY_OCCLUSION_COUNTER:152case PIPE_QUERY_OCCLUSION_PREDICATE:153case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:154if (nv50->screen->num_occlusion_queries_active++) {155nv50_hw_query_get(push, q, 0x10, 0x0100f002);156} else {157PUSH_SPACE(push, 4);158BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);159PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);160BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);161PUSH_DATA (push, 1);162}163break;164case PIPE_QUERY_PRIMITIVES_GENERATED:165nv50_hw_query_get(push, q, 0x20, 0x06805002);166break;167case PIPE_QUERY_PRIMITIVES_EMITTED:168nv50_hw_query_get(push, q, 0x20, 0x05805002);169break;170case PIPE_QUERY_SO_STATISTICS:171nv50_hw_query_get(push, q, 0x30, 0x05805002);172nv50_hw_query_get(push, q, 0x40, 0x06805002);173break;174case PIPE_QUERY_PIPELINE_STATISTICS:175nv50_hw_query_get(push, q, 0x90, 0x00801002); /* VFETCH, VERTICES */176nv50_hw_query_get(push, q, 0xa0, 0x01801002); /* VFETCH, PRIMS */177nv50_hw_query_get(push, q, 0xb0, 0x02802002); /* VP, LAUNCHES */178nv50_hw_query_get(push, q, 0xc0, 0x03806002); /* GP, LAUNCHES */179nv50_hw_query_get(push, q, 0xd0, 0x04806002); /* GP, PRIMS_OUT */180nv50_hw_query_get(push, q, 0xe0, 0x07804002); /* RAST, PRIMS_IN */181nv50_hw_query_get(push, q, 0xf0, 0x08804002); /* RAST, PRIMS_OUT */182nv50_hw_query_get(push, q, 0x100, 0x0980a002); /* ROP, PIXELS */183((uint64_t *)hq->data)[2 * 0x11] = nv50->compute_invocations;184break;185case PIPE_QUERY_TIME_ELAPSED:186nv50_hw_query_get(push, q, 0x10, 0x00005002);187break;188default:189assert(0);190return false;191}192hq->state = NV50_HW_QUERY_STATE_ACTIVE;193return true;194}195196static void197nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)198{199struct nouveau_pushbuf *push = nv50->base.pushbuf;200struct nv50_hw_query *hq = nv50_hw_query(q);201202if (hq->funcs && hq->funcs->end_query) {203hq->funcs->end_query(nv50, hq);204return;205}206207hq->state = NV50_HW_QUERY_STATE_ENDED;208209switch (q->type) {210case PIPE_QUERY_OCCLUSION_COUNTER:211case PIPE_QUERY_OCCLUSION_PREDICATE:212case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:213nv50_hw_query_get(push, q, 0, 0x0100f002);214if (--nv50->screen->num_occlusion_queries_active == 0) {215PUSH_SPACE(push, 2);216BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);217PUSH_DATA (push, 0);218}219break;220case PIPE_QUERY_PRIMITIVES_GENERATED:221nv50_hw_query_get(push, q, 0x10, 0x06805002);222nv50_hw_query_get(push, q, 0x00, 0x00005010);223break;224case PIPE_QUERY_PRIMITIVES_EMITTED:225nv50_hw_query_get(push, q, 0x10, 0x05805002);226nv50_hw_query_get(push, q, 0x00, 0x00005010);227break;228case PIPE_QUERY_SO_STATISTICS:229nv50_hw_query_get(push, q, 0x10, 0x05805002);230nv50_hw_query_get(push, q, 0x20, 0x06805002);231nv50_hw_query_get(push, q, 0x00, 0x00005010);232break;233case PIPE_QUERY_PIPELINE_STATISTICS:234nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */235nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */236nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */237nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */238nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */239nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */240nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */241nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */242((uint64_t *)hq->data)[2 * 0x8] = nv50->compute_invocations;243break;244case PIPE_QUERY_TIMESTAMP:245hq->sequence++;246FALLTHROUGH;247case PIPE_QUERY_TIME_ELAPSED:248nv50_hw_query_get(push, q, 0, 0x00005002);249break;250case PIPE_QUERY_GPU_FINISHED:251hq->sequence++;252nv50_hw_query_get(push, q, 0, 0x1000f010);253break;254case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:255hq->sequence++;256nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));257break;258case PIPE_QUERY_TIMESTAMP_DISJOINT:259/* This query is not issued on GPU because disjoint is forced to false */260hq->state = NV50_HW_QUERY_STATE_READY;261break;262default:263assert(0);264break;265}266if (hq->is64bit)267nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);268}269270static bool271nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,272bool wait, union pipe_query_result *result)273{274struct nv50_hw_query *hq = nv50_hw_query(q);275uint64_t *res64 = (uint64_t *)result;276uint32_t *res32 = (uint32_t *)result;277uint8_t *res8 = (uint8_t *)result;278uint64_t *data64 = (uint64_t *)hq->data;279int i;280281if (hq->funcs && hq->funcs->get_query_result)282return hq->funcs->get_query_result(nv50, hq, wait, result);283284if (hq->state != NV50_HW_QUERY_STATE_READY)285nv50_hw_query_update(q);286287if (hq->state != NV50_HW_QUERY_STATE_READY) {288if (!wait) {289/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */290if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {291hq->state = NV50_HW_QUERY_STATE_FLUSHED;292PUSH_KICK(nv50->base.pushbuf);293}294return false;295}296if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))297return false;298}299hq->state = NV50_HW_QUERY_STATE_READY;300301switch (q->type) {302case PIPE_QUERY_GPU_FINISHED:303res8[0] = true;304break;305case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */306res64[0] = hq->data[1] - hq->data[5];307break;308case PIPE_QUERY_OCCLUSION_PREDICATE:309case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:310res8[0] = hq->data[1] != hq->data[5];311break;312case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */313case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */314res64[0] = data64[2] - data64[4];315break;316case PIPE_QUERY_SO_STATISTICS:317res64[0] = data64[2] - data64[6];318res64[1] = data64[4] - data64[8];319break;320case PIPE_QUERY_PIPELINE_STATISTICS:321for (i = 0; i < 8; ++i)322res64[i] = data64[i * 2] - data64[18 + i * 2];323result->pipeline_statistics.cs_invocations = data64[i * 2] - data64[18 + i * 2];324break;325case PIPE_QUERY_TIMESTAMP:326res64[0] = data64[1];327break;328case PIPE_QUERY_TIMESTAMP_DISJOINT:329res64[0] = 1000000000;330res8[8] = false;331break;332case PIPE_QUERY_TIME_ELAPSED:333res64[0] = data64[1] - data64[3];334break;335case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:336res32[0] = hq->data[1];337break;338default:339assert(0);340return false;341}342343return true;344}345346static const struct nv50_query_funcs hw_query_funcs = {347.destroy_query = nv50_hw_destroy_query,348.begin_query = nv50_hw_begin_query,349.end_query = nv50_hw_end_query,350.get_query_result = nv50_hw_get_query_result,351};352353struct nv50_query *354nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)355{356struct nv50_hw_query *hq;357struct nv50_query *q;358unsigned space = NV50_HW_QUERY_ALLOC_SPACE;359360hq = nv50_hw_sm_create_query(nv50, type);361if (hq) {362hq->base.funcs = &hw_query_funcs;363return (struct nv50_query *)hq;364}365366hq = nv50_hw_metric_create_query(nv50, type);367if (hq) {368hq->base.funcs = &hw_query_funcs;369return (struct nv50_query *)hq;370}371372hq = CALLOC_STRUCT(nv50_hw_query);373if (!hq)374return NULL;375376q = &hq->base;377q->funcs = &hw_query_funcs;378q->type = type;379380switch (q->type) {381case PIPE_QUERY_OCCLUSION_COUNTER:382case PIPE_QUERY_OCCLUSION_PREDICATE:383case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:384hq->rotate = 32;385break;386case PIPE_QUERY_PRIMITIVES_GENERATED:387case PIPE_QUERY_PRIMITIVES_EMITTED:388space = 32 + 16; /* separate fence value written here */389break;390case PIPE_QUERY_SO_STATISTICS:391space = 64 + 16; /* separate fence value written here */392break;393case PIPE_QUERY_PIPELINE_STATISTICS:394hq->is64bit = true;395space = 9 * 2 * 16; /* 9 values, start/end, 16-bytes each */396break;397case PIPE_QUERY_TIME_ELAPSED:398case PIPE_QUERY_TIMESTAMP:399case PIPE_QUERY_TIMESTAMP_DISJOINT:400case PIPE_QUERY_GPU_FINISHED:401space = 32;402break;403case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:404space = 16;405break;406default:407debug_printf("invalid query type: %u\n", type);408FREE(q);409return NULL;410}411412if (!nv50_hw_query_allocate(nv50, q, space)) {413FREE(hq);414return NULL;415}416417if (hq->rotate) {418/* we advance before query_begin ! */419hq->offset -= hq->rotate;420hq->data -= hq->rotate / sizeof(*hq->data);421} else422if (!hq->is64bit)423hq->data[0] = 0; /* initialize sequence */424425return q;426}427428int429nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,430struct pipe_driver_query_info *info)431{432int num_hw_sm_queries = 0, num_hw_metric_queries = 0;433434num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);435num_hw_metric_queries =436nv50_hw_metric_get_driver_query_info(screen, 0, NULL);437438if (!info)439return num_hw_sm_queries + num_hw_metric_queries;440441if (id < num_hw_sm_queries)442return nv50_hw_sm_get_driver_query_info(screen, id, info);443444return nv50_hw_metric_get_driver_query_info(screen,445id - num_hw_sm_queries, info);446}447448void449nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,450struct nv50_query *q, unsigned result_offset)451{452struct nv50_hw_query *hq = nv50_hw_query(q);453454nv50_hw_query_update(q);455if (hq->state != NV50_HW_QUERY_STATE_READY)456nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);457hq->state = NV50_HW_QUERY_STATE_READY;458459BEGIN_NV04(push, SUBC_3D(method), 1);460PUSH_DATA (push, hq->data[result_offset / 4]);461}462463void464nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)465{466struct nv50_hw_query *hq = nv50_hw_query(q);467unsigned offset = hq->offset;468469assert(!hq->is64bit);470471PUSH_SPACE(push, 5);472PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);473BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);474PUSH_DATAh(push, hq->bo->offset + offset);475PUSH_DATA (push, hq->bo->offset + offset);476PUSH_DATA (push, hq->sequence);477PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);478}479480481