Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/gfx10_query.c
4570 views
/*1* Copyright 2018 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "si_pipe.h"25#include "si_query.h"26#include "sid.h"27#include "util/u_memory.h"28#include "util/u_suballoc.h"2930#include <stddef.h>3132static void emit_shader_query(struct si_context *sctx)33{34assert(!list_is_empty(&sctx->shader_query_buffers));3536struct gfx10_sh_query_buffer *qbuf =37list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);38qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);39}4041static void gfx10_release_query_buffers(struct si_context *sctx,42struct gfx10_sh_query_buffer *first,43struct gfx10_sh_query_buffer *last)44{45while (first) {46struct gfx10_sh_query_buffer *qbuf = first;47if (first != last)48first = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);49else50first = NULL;5152qbuf->refcount--;53if (qbuf->refcount)54continue;5556if (qbuf->list.next == &sctx->shader_query_buffers)57continue; /* keep the most recent buffer; it may not be full yet */58if (qbuf->list.prev == &sctx->shader_query_buffers)59continue; /* keep the oldest buffer for recycling */6061list_del(&qbuf->list);62si_resource_reference(&qbuf->buf, NULL);63FREE(qbuf);64}65}6667static bool gfx10_alloc_query_buffer(struct si_context *sctx)68{69if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))70return true;7172struct gfx10_sh_query_buffer *qbuf = NULL;7374if (!list_is_empty(&sctx->shader_query_buffers)) {75qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);76if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)77goto success;7879qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);80if (!qbuf->refcount &&81!si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&82sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {83/* Can immediately re-use the oldest buffer */84list_del(&qbuf->list);85} else {86qbuf = NULL;87}88}8990if (!qbuf) {91qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);92if (unlikely(!qbuf))93return false;9495struct si_screen *screen = sctx->screen;96unsigned buf_size =97MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);98qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));99if (unlikely(!qbuf->buf)) {100FREE(qbuf);101return false;102}103}104105/* The buffer is currently unused by the GPU. Initialize it.106*107* We need to set the high bit of all the primitive counters for108* compatibility with the SET_PREDICATION packet.109*/110uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,111PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);112assert(results);113114for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;115++i) {116for (unsigned j = 0; j < 16; ++j)117results[32 * i + j] = (uint64_t)1 << 63;118results[32 * i + 16] = 0;119}120121list_addtail(&qbuf->list, &sctx->shader_query_buffers);122qbuf->head = 0;123qbuf->refcount = sctx->num_active_shader_queries;124125success:;126struct pipe_shader_buffer sbuf;127sbuf.buffer = &qbuf->buf->b.b;128sbuf.buffer_offset = qbuf->head;129sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);130si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);131sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1);132133si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);134return true;135}136137static void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)138{139struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;140gfx10_release_query_buffers(sctx, query->first, query->last);141FREE(query);142}143144static bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)145{146struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;147148gfx10_release_query_buffers(sctx, query->first, query->last);149query->first = query->last = NULL;150151if (unlikely(!gfx10_alloc_query_buffer(sctx)))152return false;153154query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);155query->first_begin = query->first->head;156157sctx->num_active_shader_queries++;158query->first->refcount++;159160return true;161}162163static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)164{165struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;166167if (unlikely(!query->first))168return false; /* earlier out of memory error */169170query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);171query->last_end = query->last->head;172173/* Signal the fence of the previous chunk */174if (query->last_end != 0) {175uint64_t fence_va = query->last->buf->gpu_address;176fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);177fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);178si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,179EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,1800xffffffff, PIPE_QUERY_GPU_FINISHED);181}182183sctx->num_active_shader_queries--;184185if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {186si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);187sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED;188189/* If a query_begin is followed by a query_end without a draw190* in-between, we need to clear the atom to ensure that the191* next query_begin will re-initialize the shader buffer. */192si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);193}194195return true;196}197198static void gfx10_sh_query_add_result(struct gfx10_sh_query *query,199struct gfx10_sh_query_buffer_mem *qmem,200union pipe_query_result *result)201{202static const uint64_t mask = ((uint64_t)1 << 63) - 1;203204switch (query->b.type) {205case PIPE_QUERY_PRIMITIVES_EMITTED:206result->u64 += qmem->stream[query->stream].emitted_primitives & mask;207break;208case PIPE_QUERY_PRIMITIVES_GENERATED:209result->u64 += qmem->stream[query->stream].generated_primitives & mask;210break;211case PIPE_QUERY_SO_STATISTICS:212result->so_statistics.num_primitives_written +=213qmem->stream[query->stream].emitted_primitives & mask;214result->so_statistics.primitives_storage_needed +=215qmem->stream[query->stream].generated_primitives & mask;216break;217case PIPE_QUERY_SO_OVERFLOW_PREDICATE:218result->b |= qmem->stream[query->stream].emitted_primitives !=219qmem->stream[query->stream].generated_primitives;220break;221case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:222for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {223result->b |= qmem->stream[stream].emitted_primitives !=224qmem->stream[stream].generated_primitives;225}226break;227default:228assert(0);229}230}231232static bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,233union pipe_query_result *result)234{235struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;236237util_query_clear_result(result, query->b.type);238239if (unlikely(!query->first))240return false; /* earlier out of memory error */241assert(query->last);242243for (struct gfx10_sh_query_buffer *qbuf = query->last;;244qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.prev, list)) {245unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);246void *map;247248if (rquery->b.flushed)249map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);250else251map = si_buffer_map(sctx, qbuf->buf, usage);252253if (!map)254return false;255256unsigned results_begin = 0;257unsigned results_end = qbuf->head;258if (qbuf == query->first)259results_begin = query->first_begin;260if (qbuf == query->last)261results_end = query->last_end;262263while (results_begin != results_end) {264struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;265results_begin += sizeof(*qmem);266267gfx10_sh_query_add_result(query, qmem, result);268}269270if (qbuf == query->first)271break;272}273274return true;275}276277static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,278bool wait, enum pipe_query_value_type result_type,279int index, struct pipe_resource *resource,280unsigned offset)281{282struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;283struct si_qbo_state saved_state = {};284struct pipe_resource *tmp_buffer = NULL;285unsigned tmp_buffer_offset = 0;286287if (!sctx->sh_query_result_shader) {288sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);289if (!sctx->sh_query_result_shader)290return;291}292293if (query->first != query->last) {294u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);295if (!tmp_buffer)296return;297}298299si_save_qbo_state(sctx, &saved_state);300301/* Pre-fill the constants configuring the shader behavior. */302struct {303uint32_t config;304uint32_t offset;305uint32_t chain;306uint32_t result_count;307} consts;308struct pipe_constant_buffer constant_buffer = {};309310if (index >= 0) {311switch (query->b.type) {312case PIPE_QUERY_PRIMITIVES_GENERATED:313consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);314consts.config = 0;315break;316case PIPE_QUERY_PRIMITIVES_EMITTED:317consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);318consts.config = 0;319break;320case PIPE_QUERY_SO_STATISTICS:321consts.offset = sizeof(uint32_t) * (4 * index + query->stream);322consts.config = 0;323break;324case PIPE_QUERY_SO_OVERFLOW_PREDICATE:325consts.offset = 4 * sizeof(uint64_t) * query->stream;326consts.config = 2;327break;328case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:329consts.offset = 0;330consts.config = 3;331break;332default:333unreachable("bad query type");334}335} else {336/* Check result availability. */337consts.offset = 0;338consts.config = 1;339}340341if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)342consts.config |= 8;343344constant_buffer.buffer_size = sizeof(consts);345constant_buffer.user_buffer = &consts;346347/* Pre-fill the SSBOs and grid. */348struct pipe_shader_buffer ssbo[3];349struct pipe_grid_info grid = {};350351ssbo[1].buffer = tmp_buffer;352ssbo[1].buffer_offset = tmp_buffer_offset;353ssbo[1].buffer_size = 16;354355ssbo[2] = ssbo[1];356357grid.block[0] = 1;358grid.block[1] = 1;359grid.block[2] = 1;360grid.grid[0] = 1;361grid.grid[1] = 1;362grid.grid[2] = 1;363364struct gfx10_sh_query_buffer *qbuf = query->first;365for (;;) {366unsigned begin = qbuf == query->first ? query->first_begin : 0;367unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;368if (!end)369continue;370371ssbo[0].buffer = &qbuf->buf->b.b;372ssbo[0].buffer_offset = begin;373ssbo[0].buffer_size = end - begin;374375consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);376consts.chain = 0;377if (qbuf != query->first)378consts.chain |= 1;379if (qbuf != query->last)380consts.chain |= 2;381382if (qbuf == query->last) {383ssbo[2].buffer = resource;384ssbo[2].buffer_offset = offset;385ssbo[2].buffer_size = 8;386}387388sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);389390if (wait) {391uint64_t va;392393/* Wait for result availability. Wait only for readiness394* of the last entry, since the fence writes should be395* serialized in the CP.396*/397va = qbuf->buf->gpu_address;398va += end - sizeof(struct gfx10_sh_query_buffer_mem);399va += offsetof(struct gfx10_sh_query_buffer_mem, fence);400401si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);402}403404si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader,405SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,4063, ssbo, 0x6);407408if (qbuf == query->last)409break;410qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);411}412413si_restore_qbo_state(sctx, &saved_state);414pipe_resource_reference(&tmp_buffer, NULL);415}416417static const struct si_query_ops gfx10_sh_query_ops = {418.destroy = gfx10_sh_query_destroy,419.begin = gfx10_sh_query_begin,420.end = gfx10_sh_query_end,421.get_result = gfx10_sh_query_get_result,422.get_result_resource = gfx10_sh_query_get_result_resource,423};424425struct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,426unsigned index)427{428struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);429if (unlikely(!query))430return NULL;431432query->b.ops = &gfx10_sh_query_ops;433query->b.type = query_type;434query->stream = index;435436return (struct pipe_query *)query;437}438439void gfx10_init_query(struct si_context *sctx)440{441list_inithead(&sctx->shader_query_buffers);442sctx->atoms.s.shader_query.emit = emit_shader_query;443}444445void gfx10_destroy_query(struct si_context *sctx)446{447while (!list_is_empty(&sctx->shader_query_buffers)) {448struct gfx10_sh_query_buffer *qbuf =449list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);450list_del(&qbuf->list);451452assert(!qbuf->refcount);453si_resource_reference(&qbuf->buf, NULL);454FREE(qbuf);455}456}457458459