Path: blob/21.2-virgl/src/gallium/drivers/svga/svga_pipe_query.c
4570 views
/**********************************************************1* Copyright 2008-2015 VMware, Inc. All rights reserved.2*3* Permission is hereby granted, free of charge, to any person4* obtaining a copy of this software and associated documentation5* files (the "Software"), to deal in the Software without6* restriction, including without limitation the rights to use, copy,7* modify, merge, publish, distribute, sublicense, and/or sell copies8* of the Software, and to permit persons to whom the Software is9* furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23**********************************************************/2425#include "pipe/p_state.h"26#include "pipe/p_context.h"2728#include "util/u_bitmask.h"29#include "util/u_memory.h"3031#include "svga_cmd.h"32#include "svga_context.h"33#include "svga_screen.h"34#include "svga_resource_buffer.h"35#include "svga_winsys.h"36#include "svga_debug.h"373839/* Fixme: want a public base class for all pipe structs, even if there40* isn't much in them.41*/42struct pipe_query {43int dummy;44};4546struct svga_query {47struct pipe_query base;48unsigned type; /**< PIPE_QUERY_x or SVGA_QUERY_x */49SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */5051unsigned id; /** Per-context query identifier */52boolean active; /** TRUE if query is active */5354struct pipe_fence_handle *fence;5556/** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */5758/* For VGPU9 */59struct svga_winsys_buffer *hwbuf;60volatile SVGA3dQueryResult *queryResult;6162/** For VGPU10 */63struct svga_winsys_gb_query *gb_query;64SVGA3dDXQueryFlags flags;65unsigned offset; /**< offset to the gb_query memory */66struct pipe_query *predicate; /** The associated query that can be used for predicate */6768/** For non-GPU SVGA_QUERY_x queries */69uint64_t begin_count, end_count;70};717273/** cast wrapper */74static inline struct svga_query *75svga_query(struct pipe_query *q)76{77return (struct svga_query *)q;78}7980/**81* VGPU982*/8384static bool85svga_get_query_result(struct pipe_context *pipe,86struct pipe_query *q,87bool wait,88union pipe_query_result *result);8990static enum pipe_error91define_query_vgpu9(struct svga_context *svga,92struct svga_query *sq)93{94struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;9596sq->hwbuf = svga_winsys_buffer_create(svga, 1,97SVGA_BUFFER_USAGE_PINNED,98sizeof *sq->queryResult);99if (!sq->hwbuf)100return PIPE_ERROR_OUT_OF_MEMORY;101102sq->queryResult = (SVGA3dQueryResult *)103sws->buffer_map(sws, sq->hwbuf, PIPE_MAP_WRITE);104if (!sq->queryResult) {105sws->buffer_destroy(sws, sq->hwbuf);106return PIPE_ERROR_OUT_OF_MEMORY;107}108109sq->queryResult->totalSize = sizeof *sq->queryResult;110sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;111112/* We request the buffer to be pinned and assume it is always mapped.113* The reason is that we don't want to wait for fences when checking the114* query status.115*/116sws->buffer_unmap(sws, sq->hwbuf);117118return PIPE_OK;119}120121static void122begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)123{124struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;125126if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {127/* The application doesn't care for the pending query result.128* We cannot let go of the existing buffer and just get a new one129* because its storage may be reused for other purposes and clobbered130* by the host when it determines the query result. So the only131* option here is to wait for the existing query's result -- not a132* big deal, given that no sane application would do this.133*/134uint64_t result;135svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);136assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);137}138139sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;140sws->fence_reference(sws, &sq->fence, NULL);141142SVGA_RETRY(svga, SVGA3D_BeginQuery(svga->swc, sq->svga_type));143}144145static void146end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)147{148/* Set to PENDING before sending EndQuery. */149sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;150151SVGA_RETRY(svga, SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf));152}153154static bool155get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,156bool wait, uint64_t *result)157{158struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;159SVGA3dQueryState state;160161if (!sq->fence) {162/* The query status won't be updated by the host unless163* SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause164* a synchronous wait on the host.165*/166SVGA_RETRY(svga, SVGA3D_WaitForQuery(svga->swc, sq->svga_type,167sq->hwbuf));168svga_context_flush(svga, &sq->fence);169assert(sq->fence);170}171172state = sq->queryResult->state;173if (state == SVGA3D_QUERYSTATE_PENDING) {174if (!wait)175return false;176sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,177SVGA_FENCE_FLAG_QUERY);178state = sq->queryResult->state;179}180181assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||182state == SVGA3D_QUERYSTATE_FAILED);183184*result = (uint64_t)sq->queryResult->result32;185return true;186}187188189/**190* VGPU10191*192* There is one query mob allocated for each context to be shared by all193* query types. The mob is used to hold queries's state and result. Since194* each query result type is of different length, to ease the query allocation195* management, the mob is divided into memory blocks. Each memory block196* will hold queries of the same type. Multiple memory blocks can be allocated197* for a particular query type.198*199* Currently each memory block is of 184 bytes. We support up to 512200* memory blocks. The query memory size is arbitrary right now.201* Each occlusion query takes about 8 bytes. One memory block can accomodate202* 23 occlusion queries. 512 of those blocks can support up to 11K occlusion203* queries. That seems reasonable for now. If we think this limit is204* not enough, we can increase the limit or try to grow the mob in runtime.205* Note, SVGA device does not impose one mob per context for queries,206* we could allocate multiple mobs for queries; however, wddm KMD does not207* currently support that.208*209* Also note that the GL guest driver does not issue any of the210* following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.211*/212#define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2)213#define SVGA_QUERY_MEM_SIZE (512 * SVGA_QUERY_MEM_BLOCK_SIZE)214215struct svga_qmem_alloc_entry216{217unsigned start_offset; /* start offset of the memory block */218unsigned block_index; /* block index of the memory block */219unsigned query_size; /* query size in this memory block */220unsigned nquery; /* number of queries allocated */221struct util_bitmask *alloc_mask; /* allocation mask */222struct svga_qmem_alloc_entry *next; /* next memory block */223};224225226/**227* Allocate a memory block from the query object memory228* \return NULL if out of memory, else pointer to the query memory block229*/230static struct svga_qmem_alloc_entry *231allocate_query_block(struct svga_context *svga)232{233int index;234unsigned offset;235struct svga_qmem_alloc_entry *alloc_entry = NULL;236237/* Find the next available query block */238index = util_bitmask_add(svga->gb_query_alloc_mask);239240if (index == UTIL_BITMASK_INVALID_INDEX)241return NULL;242243offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;244if (offset >= svga->gb_query_len) {245unsigned i;246247/* Deallocate the out-of-range index */248util_bitmask_clear(svga->gb_query_alloc_mask, index);249index = -1;250251/**252* All the memory blocks are allocated, lets see if there is253* any empty memory block around that can be freed up.254*/255for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) {256struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;257258alloc_entry = svga->gb_query_map[i];259while (alloc_entry && index == -1) {260if (alloc_entry->nquery == 0) {261/* This memory block is empty, it can be recycled. */262if (prev_alloc_entry) {263prev_alloc_entry->next = alloc_entry->next;264} else {265svga->gb_query_map[i] = alloc_entry->next;266}267index = alloc_entry->block_index;268} else {269prev_alloc_entry = alloc_entry;270alloc_entry = alloc_entry->next;271}272}273}274275if (index == -1) {276debug_printf("Query memory object is full\n");277return NULL;278}279}280281if (!alloc_entry) {282assert(index != -1);283alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);284alloc_entry->block_index = index;285}286287return alloc_entry;288}289290/**291* Allocate a slot in the specified memory block.292* All slots in this memory block are of the same size.293*294* \return -1 if out of memory, else index of the query slot295*/296static int297allocate_query_slot(struct svga_context *svga,298struct svga_qmem_alloc_entry *alloc)299{300int index;301unsigned offset;302303/* Find the next available slot */304index = util_bitmask_add(alloc->alloc_mask);305306if (index == UTIL_BITMASK_INVALID_INDEX)307return -1;308309offset = index * alloc->query_size;310if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)311return -1;312313alloc->nquery++;314315return index;316}317318/**319* Deallocate the specified slot in the memory block.320* If all slots are freed up, then deallocate the memory block321* as well, so it can be allocated for other query type322*/323static void324deallocate_query_slot(struct svga_context *svga,325struct svga_qmem_alloc_entry *alloc,326unsigned index)327{328assert(index != UTIL_BITMASK_INVALID_INDEX);329330util_bitmask_clear(alloc->alloc_mask, index);331alloc->nquery--;332333/**334* Don't worry about deallocating the empty memory block here.335* The empty memory block will be recycled when no more memory block336* can be allocated.337*/338}339340static struct svga_qmem_alloc_entry *341allocate_query_block_entry(struct svga_context *svga,342unsigned len)343{344struct svga_qmem_alloc_entry *alloc_entry;345346alloc_entry = allocate_query_block(svga);347if (!alloc_entry)348return NULL;349350assert(alloc_entry->block_index != -1);351alloc_entry->start_offset =352alloc_entry->block_index * SVGA_QUERY_MEM_BLOCK_SIZE;353alloc_entry->nquery = 0;354alloc_entry->alloc_mask = util_bitmask_create();355alloc_entry->next = NULL;356alloc_entry->query_size = len;357358return alloc_entry;359}360361/**362* Allocate a memory slot for a query of the specified type.363* It will first search through the memory blocks that are allocated364* for the query type. If no memory slot is available, it will try365* to allocate another memory block within the query object memory for366* this query type.367*/368static int369allocate_query(struct svga_context *svga,370SVGA3dQueryType type,371unsigned len)372{373struct svga_qmem_alloc_entry *alloc_entry;374int slot_index = -1;375unsigned offset;376377assert(type < SVGA3D_QUERYTYPE_MAX);378379alloc_entry = svga->gb_query_map[type];380381if (!alloc_entry) {382/**383* No query memory block has been allocated for this query type,384* allocate one now385*/386alloc_entry = allocate_query_block_entry(svga, len);387if (!alloc_entry)388return -1;389svga->gb_query_map[type] = alloc_entry;390}391392/* Allocate a slot within the memory block allocated for this query type */393slot_index = allocate_query_slot(svga, alloc_entry);394395if (slot_index == -1) {396/* This query memory block is full, allocate another one */397alloc_entry = allocate_query_block_entry(svga, len);398if (!alloc_entry)399return -1;400alloc_entry->next = svga->gb_query_map[type];401svga->gb_query_map[type] = alloc_entry;402slot_index = allocate_query_slot(svga, alloc_entry);403}404405assert(slot_index != -1);406offset = slot_index * len + alloc_entry->start_offset;407408return offset;409}410411412/**413* Deallocate memory slot allocated for the specified query414*/415static void416deallocate_query(struct svga_context *svga,417struct svga_query *sq)418{419struct svga_qmem_alloc_entry *alloc_entry;420unsigned slot_index;421unsigned offset = sq->offset;422423alloc_entry = svga->gb_query_map[sq->svga_type];424425while (alloc_entry) {426if (offset >= alloc_entry->start_offset &&427offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {428429/* The slot belongs to this memory block, deallocate it */430slot_index = (offset - alloc_entry->start_offset) /431alloc_entry->query_size;432deallocate_query_slot(svga, alloc_entry, slot_index);433alloc_entry = NULL;434} else {435alloc_entry = alloc_entry->next;436}437}438}439440441/**442* Destroy the gb query object and all the related query structures443*/444static void445destroy_gb_query_obj(struct svga_context *svga)446{447struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;448unsigned i;449450for (i = 0; i < SVGA3D_QUERYTYPE_MAX; i++) {451struct svga_qmem_alloc_entry *alloc_entry, *next;452alloc_entry = svga->gb_query_map[i];453while (alloc_entry) {454next = alloc_entry->next;455util_bitmask_destroy(alloc_entry->alloc_mask);456FREE(alloc_entry);457alloc_entry = next;458}459svga->gb_query_map[i] = NULL;460}461462if (svga->gb_query)463sws->query_destroy(sws, svga->gb_query);464svga->gb_query = NULL;465466util_bitmask_destroy(svga->gb_query_alloc_mask);467}468469/**470* Define query and create the gb query object if it is not already created.471* There is only one gb query object per context which will be shared by472* queries of all types.473*/474static enum pipe_error475define_query_vgpu10(struct svga_context *svga,476struct svga_query *sq, int resultLen)477{478struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;479int qlen;480enum pipe_error ret = PIPE_OK;481482SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);483484if (svga->gb_query == NULL) {485/* Create a gb query object */486svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);487if (!svga->gb_query)488return PIPE_ERROR_OUT_OF_MEMORY;489svga->gb_query_len = SVGA_QUERY_MEM_SIZE;490memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));491svga->gb_query_alloc_mask = util_bitmask_create();492493/* Bind the query object to the context */494SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query,495SVGA_QUERY_FLAG_SET));496}497498sq->gb_query = svga->gb_query;499500/* Make sure query length is in multiples of 8 bytes */501qlen = align(resultLen + sizeof(SVGA3dQueryState), 8);502503/* Find a slot for this query in the gb object */504sq->offset = allocate_query(svga, sq->svga_type, qlen);505if (sq->offset == -1)506return PIPE_ERROR_OUT_OF_MEMORY;507508assert((sq->offset & 7) == 0);509510SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n",511sq->svga_type, sq->id, sq->offset);512513/**514* Send SVGA3D commands to define the query515*/516SVGA_RETRY_OOM(svga, ret, SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id,517sq->svga_type,518sq->flags));519if (ret != PIPE_OK)520return PIPE_ERROR_OUT_OF_MEMORY;521522SVGA_RETRY(svga, SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id));523SVGA_RETRY(svga, SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id,524sq->offset));525526return PIPE_OK;527}528529static void530destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)531{532SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id));533534/* Deallocate the memory slot allocated for this query */535deallocate_query(svga, sq);536}537538539/**540* Rebind queryies to the context.541*/542static void543rebind_vgpu10_query(struct svga_context *svga)544{545SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query,546SVGA_QUERY_FLAG_REF));547svga->rebind.flags.query = FALSE;548}549550551static enum pipe_error552begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)553{554struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;555int status = 0;556557sws->fence_reference(sws, &sq->fence, NULL);558559/* Initialize the query state to NEW */560status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);561if (status)562return PIPE_ERROR;563564if (svga->rebind.flags.query) {565rebind_vgpu10_query(svga);566}567568/* Send the BeginQuery command to the device */569SVGA_RETRY(svga, SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id));570return PIPE_OK;571}572573static void574end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)575{576if (svga->rebind.flags.query) {577rebind_vgpu10_query(svga);578}579580SVGA_RETRY(svga, SVGA3D_vgpu10_EndQuery(svga->swc, sq->id));581}582583static bool584get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,585bool wait, void *result, int resultLen)586{587struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;588SVGA3dQueryState queryState;589590if (svga->rebind.flags.query) {591rebind_vgpu10_query(svga);592}593594sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);595596if (queryState != SVGA3D_QUERYSTATE_SUCCEEDED && !sq->fence) {597/* We don't have the query result yet, and the query hasn't been598* submitted. We need to submit it now since the GL spec says599* "Querying the state for a given occlusion query forces that600* occlusion query to complete within a finite amount of time."601*/602svga_context_flush(svga, &sq->fence);603}604605if (queryState == SVGA3D_QUERYSTATE_PENDING ||606queryState == SVGA3D_QUERYSTATE_NEW) {607if (!wait)608return false;609sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,610SVGA_FENCE_FLAG_QUERY);611sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);612}613614assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||615queryState == SVGA3D_QUERYSTATE_FAILED);616617return true;618}619620static struct pipe_query *621svga_create_query(struct pipe_context *pipe,622unsigned query_type,623unsigned index)624{625struct svga_context *svga = svga_context(pipe);626struct svga_query *sq;627enum pipe_error ret;628629assert(query_type < SVGA_QUERY_MAX);630631sq = CALLOC_STRUCT(svga_query);632if (!sq)633goto fail;634635/* Allocate an integer ID for the query */636sq->id = util_bitmask_add(svga->query_id_bm);637if (sq->id == UTIL_BITMASK_INVALID_INDEX)638goto fail;639640SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,641query_type, sq, sq->id);642643switch (query_type) {644case PIPE_QUERY_OCCLUSION_COUNTER:645sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;646if (svga_have_vgpu10(svga)) {647ret = define_query_vgpu10(svga, sq,648sizeof(SVGADXOcclusionQueryResult));649if (ret != PIPE_OK)650goto fail;651652/**653* In OpenGL, occlusion counter query can be used in conditional654* rendering; however, in DX10, only OCCLUSION_PREDICATE query can655* be used for predication. Hence, we need to create an occlusion656* predicate query along with the occlusion counter query. So when657* the occlusion counter query is used for predication, the associated658* query of occlusion predicate type will be used659* in the SetPredication command.660*/661sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);662663} else {664ret = define_query_vgpu9(svga, sq);665if (ret != PIPE_OK)666goto fail;667}668break;669case PIPE_QUERY_OCCLUSION_PREDICATE:670case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:671if (svga_have_vgpu10(svga)) {672sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;673ret = define_query_vgpu10(svga, sq,674sizeof(SVGADXOcclusionPredicateQueryResult));675if (ret != PIPE_OK)676goto fail;677} else {678sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;679ret = define_query_vgpu9(svga, sq);680if (ret != PIPE_OK)681goto fail;682}683break;684case PIPE_QUERY_PRIMITIVES_GENERATED:685case PIPE_QUERY_PRIMITIVES_EMITTED:686case PIPE_QUERY_SO_STATISTICS:687assert(svga_have_vgpu10(svga));688689/* Until the device supports the new query type for multiple streams,690* we will use the single stream query type for stream 0.691*/692if (svga_have_sm5(svga) && index > 0) {693assert(index < 4);694695sq->svga_type = SVGA3D_QUERYTYPE_SOSTATS_STREAM0 + index;696}697else {698assert(index == 0);699sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;700}701ret = define_query_vgpu10(svga, sq,702sizeof(SVGADXStreamOutStatisticsQueryResult));703if (ret != PIPE_OK)704goto fail;705break;706case PIPE_QUERY_TIMESTAMP:707assert(svga_have_vgpu10(svga));708sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;709ret = define_query_vgpu10(svga, sq,710sizeof(SVGADXTimestampQueryResult));711if (ret != PIPE_OK)712goto fail;713break;714case SVGA_QUERY_NUM_DRAW_CALLS:715case SVGA_QUERY_NUM_FALLBACKS:716case SVGA_QUERY_NUM_FLUSHES:717case SVGA_QUERY_NUM_VALIDATIONS:718case SVGA_QUERY_NUM_BUFFERS_MAPPED:719case SVGA_QUERY_NUM_TEXTURES_MAPPED:720case SVGA_QUERY_NUM_BYTES_UPLOADED:721case SVGA_QUERY_NUM_COMMAND_BUFFERS:722case SVGA_QUERY_COMMAND_BUFFER_SIZE:723case SVGA_QUERY_SURFACE_WRITE_FLUSHES:724case SVGA_QUERY_MEMORY_USED:725case SVGA_QUERY_NUM_SHADERS:726case SVGA_QUERY_NUM_RESOURCES:727case SVGA_QUERY_NUM_STATE_OBJECTS:728case SVGA_QUERY_NUM_SURFACE_VIEWS:729case SVGA_QUERY_NUM_GENERATE_MIPMAP:730case SVGA_QUERY_NUM_READBACKS:731case SVGA_QUERY_NUM_RESOURCE_UPDATES:732case SVGA_QUERY_NUM_BUFFER_UPLOADS:733case SVGA_QUERY_NUM_CONST_BUF_UPDATES:734case SVGA_QUERY_NUM_CONST_UPDATES:735case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:736case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:737case SVGA_QUERY_NUM_SHADER_RELOCATIONS:738case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:739case SVGA_QUERY_SHADER_MEM_USED:740break;741case SVGA_QUERY_FLUSH_TIME:742case SVGA_QUERY_MAP_BUFFER_TIME:743/* These queries need os_time_get() */744svga->hud.uses_time = TRUE;745break;746747default:748assert(!"unexpected query type in svga_create_query()");749}750751sq->type = query_type;752753return &sq->base;754755fail:756FREE(sq);757return NULL;758}759760static void761svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)762{763struct svga_context *svga = svga_context(pipe);764struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;765struct svga_query *sq;766767if (!q) {768destroy_gb_query_obj(svga);769return;770}771772sq = svga_query(q);773774SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,775sq, sq->id);776777switch (sq->type) {778case PIPE_QUERY_OCCLUSION_COUNTER:779case PIPE_QUERY_OCCLUSION_PREDICATE:780case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:781if (svga_have_vgpu10(svga)) {782/* make sure to also destroy any associated predicate query */783if (sq->predicate)784svga_destroy_query(pipe, sq->predicate);785destroy_query_vgpu10(svga, sq);786} else {787sws->buffer_destroy(sws, sq->hwbuf);788}789sws->fence_reference(sws, &sq->fence, NULL);790break;791case PIPE_QUERY_PRIMITIVES_GENERATED:792case PIPE_QUERY_PRIMITIVES_EMITTED:793case PIPE_QUERY_SO_STATISTICS:794case PIPE_QUERY_TIMESTAMP:795assert(svga_have_vgpu10(svga));796destroy_query_vgpu10(svga, sq);797sws->fence_reference(sws, &sq->fence, NULL);798break;799case SVGA_QUERY_NUM_DRAW_CALLS:800case SVGA_QUERY_NUM_FALLBACKS:801case SVGA_QUERY_NUM_FLUSHES:802case SVGA_QUERY_NUM_VALIDATIONS:803case SVGA_QUERY_MAP_BUFFER_TIME:804case SVGA_QUERY_NUM_BUFFERS_MAPPED:805case SVGA_QUERY_NUM_TEXTURES_MAPPED:806case SVGA_QUERY_NUM_BYTES_UPLOADED:807case SVGA_QUERY_NUM_COMMAND_BUFFERS:808case SVGA_QUERY_COMMAND_BUFFER_SIZE:809case SVGA_QUERY_FLUSH_TIME:810case SVGA_QUERY_SURFACE_WRITE_FLUSHES:811case SVGA_QUERY_MEMORY_USED:812case SVGA_QUERY_NUM_SHADERS:813case SVGA_QUERY_NUM_RESOURCES:814case SVGA_QUERY_NUM_STATE_OBJECTS:815case SVGA_QUERY_NUM_SURFACE_VIEWS:816case SVGA_QUERY_NUM_GENERATE_MIPMAP:817case SVGA_QUERY_NUM_READBACKS:818case SVGA_QUERY_NUM_RESOURCE_UPDATES:819case SVGA_QUERY_NUM_BUFFER_UPLOADS:820case SVGA_QUERY_NUM_CONST_BUF_UPDATES:821case SVGA_QUERY_NUM_CONST_UPDATES:822case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:823case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:824case SVGA_QUERY_NUM_SHADER_RELOCATIONS:825case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:826case SVGA_QUERY_SHADER_MEM_USED:827/* nothing */828break;829default:830assert(!"svga: unexpected query type in svga_destroy_query()");831}832833/* Free the query id */834util_bitmask_clear(svga->query_id_bm, sq->id);835836FREE(sq);837}838839840static bool841svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)842{843struct svga_context *svga = svga_context(pipe);844struct svga_query *sq = svga_query(q);845enum pipe_error ret = PIPE_OK;846847assert(sq);848assert(sq->type < SVGA_QUERY_MAX);849850/* Need to flush out buffered drawing commands so that they don't851* get counted in the query results.852*/853svga_hwtnl_flush_retry(svga);854855switch (sq->type) {856case PIPE_QUERY_OCCLUSION_COUNTER:857case PIPE_QUERY_OCCLUSION_PREDICATE:858case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:859if (svga_have_vgpu10(svga)) {860ret = begin_query_vgpu10(svga, sq);861/* also need to start the associated occlusion predicate query */862if (sq->predicate) {863enum pipe_error status;864status = begin_query_vgpu10(svga, svga_query(sq->predicate));865assert(status == PIPE_OK);866(void) status;867}868} else {869begin_query_vgpu9(svga, sq);870}871assert(ret == PIPE_OK);872(void) ret;873break;874case PIPE_QUERY_PRIMITIVES_GENERATED:875case PIPE_QUERY_PRIMITIVES_EMITTED:876case PIPE_QUERY_SO_STATISTICS:877case PIPE_QUERY_TIMESTAMP:878assert(svga_have_vgpu10(svga));879ret = begin_query_vgpu10(svga, sq);880assert(ret == PIPE_OK);881break;882case SVGA_QUERY_NUM_DRAW_CALLS:883sq->begin_count = svga->hud.num_draw_calls;884break;885case SVGA_QUERY_NUM_FALLBACKS:886sq->begin_count = svga->hud.num_fallbacks;887break;888case SVGA_QUERY_NUM_FLUSHES:889sq->begin_count = svga->hud.num_flushes;890break;891case SVGA_QUERY_NUM_VALIDATIONS:892sq->begin_count = svga->hud.num_validations;893break;894case SVGA_QUERY_MAP_BUFFER_TIME:895sq->begin_count = svga->hud.map_buffer_time;896break;897case SVGA_QUERY_NUM_BUFFERS_MAPPED:898sq->begin_count = svga->hud.num_buffers_mapped;899break;900case SVGA_QUERY_NUM_TEXTURES_MAPPED:901sq->begin_count = svga->hud.num_textures_mapped;902break;903case SVGA_QUERY_NUM_BYTES_UPLOADED:904sq->begin_count = svga->hud.num_bytes_uploaded;905break;906case SVGA_QUERY_NUM_COMMAND_BUFFERS:907sq->begin_count = svga->swc->num_command_buffers;908break;909case SVGA_QUERY_COMMAND_BUFFER_SIZE:910sq->begin_count = svga->hud.command_buffer_size;911break;912case SVGA_QUERY_FLUSH_TIME:913sq->begin_count = svga->hud.flush_time;914break;915case SVGA_QUERY_SURFACE_WRITE_FLUSHES:916sq->begin_count = svga->hud.surface_write_flushes;917break;918case SVGA_QUERY_NUM_READBACKS:919sq->begin_count = svga->hud.num_readbacks;920break;921case SVGA_QUERY_NUM_RESOURCE_UPDATES:922sq->begin_count = svga->hud.num_resource_updates;923break;924case SVGA_QUERY_NUM_BUFFER_UPLOADS:925sq->begin_count = svga->hud.num_buffer_uploads;926break;927case SVGA_QUERY_NUM_CONST_BUF_UPDATES:928sq->begin_count = svga->hud.num_const_buf_updates;929break;930case SVGA_QUERY_NUM_CONST_UPDATES:931sq->begin_count = svga->hud.num_const_updates;932break;933case SVGA_QUERY_NUM_SHADER_RELOCATIONS:934sq->begin_count = svga->swc->num_shader_reloc;935break;936case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:937sq->begin_count = svga->swc->num_surf_reloc;938break;939case SVGA_QUERY_MEMORY_USED:940case SVGA_QUERY_NUM_SHADERS:941case SVGA_QUERY_NUM_RESOURCES:942case SVGA_QUERY_NUM_STATE_OBJECTS:943case SVGA_QUERY_NUM_SURFACE_VIEWS:944case SVGA_QUERY_NUM_GENERATE_MIPMAP:945case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:946case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:947case SVGA_QUERY_SHADER_MEM_USED:948/* nothing */949break;950default:951assert(!"unexpected query type in svga_begin_query()");952}953954SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n",955__FUNCTION__, sq, sq->id, sq->type, sq->svga_type);956957sq->active = TRUE;958959return true;960}961962963static bool964svga_end_query(struct pipe_context *pipe, struct pipe_query *q)965{966struct svga_context *svga = svga_context(pipe);967struct svga_query *sq = svga_query(q);968969assert(sq);970assert(sq->type < SVGA_QUERY_MAX);971972SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x type=%d\n",973__FUNCTION__, sq, sq->type);974975if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active)976svga_begin_query(pipe, q);977978SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n",979__FUNCTION__, sq, sq->id, sq->type, sq->svga_type);980981svga_hwtnl_flush_retry(svga);982983assert(sq->active);984985switch (sq->type) {986case PIPE_QUERY_OCCLUSION_COUNTER:987case PIPE_QUERY_OCCLUSION_PREDICATE:988case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:989if (svga_have_vgpu10(svga)) {990end_query_vgpu10(svga, sq);991/* also need to end the associated occlusion predicate query */992if (sq->predicate) {993end_query_vgpu10(svga, svga_query(sq->predicate));994}995} else {996end_query_vgpu9(svga, sq);997}998break;999case PIPE_QUERY_PRIMITIVES_GENERATED:1000case PIPE_QUERY_PRIMITIVES_EMITTED:1001case PIPE_QUERY_SO_STATISTICS:1002case PIPE_QUERY_TIMESTAMP:1003assert(svga_have_vgpu10(svga));1004end_query_vgpu10(svga, sq);1005break;1006case SVGA_QUERY_NUM_DRAW_CALLS:1007sq->end_count = svga->hud.num_draw_calls;1008break;1009case SVGA_QUERY_NUM_FALLBACKS:1010sq->end_count = svga->hud.num_fallbacks;1011break;1012case SVGA_QUERY_NUM_FLUSHES:1013sq->end_count = svga->hud.num_flushes;1014break;1015case SVGA_QUERY_NUM_VALIDATIONS:1016sq->end_count = svga->hud.num_validations;1017break;1018case SVGA_QUERY_MAP_BUFFER_TIME:1019sq->end_count = svga->hud.map_buffer_time;1020break;1021case SVGA_QUERY_NUM_BUFFERS_MAPPED:1022sq->end_count = svga->hud.num_buffers_mapped;1023break;1024case SVGA_QUERY_NUM_TEXTURES_MAPPED:1025sq->end_count = svga->hud.num_textures_mapped;1026break;1027case SVGA_QUERY_NUM_BYTES_UPLOADED:1028sq->end_count = svga->hud.num_bytes_uploaded;1029break;1030case SVGA_QUERY_NUM_COMMAND_BUFFERS:1031sq->end_count = svga->swc->num_command_buffers;1032break;1033case SVGA_QUERY_COMMAND_BUFFER_SIZE:1034sq->end_count = svga->hud.command_buffer_size;1035break;1036case SVGA_QUERY_FLUSH_TIME:1037sq->end_count = svga->hud.flush_time;1038break;1039case SVGA_QUERY_SURFACE_WRITE_FLUSHES:1040sq->end_count = svga->hud.surface_write_flushes;1041break;1042case SVGA_QUERY_NUM_READBACKS:1043sq->end_count = svga->hud.num_readbacks;1044break;1045case SVGA_QUERY_NUM_RESOURCE_UPDATES:1046sq->end_count = svga->hud.num_resource_updates;1047break;1048case SVGA_QUERY_NUM_BUFFER_UPLOADS:1049sq->end_count = svga->hud.num_buffer_uploads;1050break;1051case SVGA_QUERY_NUM_CONST_BUF_UPDATES:1052sq->end_count = svga->hud.num_const_buf_updates;1053break;1054case SVGA_QUERY_NUM_CONST_UPDATES:1055sq->end_count = svga->hud.num_const_updates;1056break;1057case SVGA_QUERY_NUM_SHADER_RELOCATIONS:1058sq->end_count = svga->swc->num_shader_reloc;1059break;1060case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:1061sq->end_count = svga->swc->num_surf_reloc;1062break;1063case SVGA_QUERY_MEMORY_USED:1064case SVGA_QUERY_NUM_SHADERS:1065case SVGA_QUERY_NUM_RESOURCES:1066case SVGA_QUERY_NUM_STATE_OBJECTS:1067case SVGA_QUERY_NUM_SURFACE_VIEWS:1068case SVGA_QUERY_NUM_GENERATE_MIPMAP:1069case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:1070case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:1071case SVGA_QUERY_SHADER_MEM_USED:1072/* nothing */1073break;1074default:1075assert(!"unexpected query type in svga_end_query()");1076}1077sq->active = FALSE;1078return true;1079}108010811082static bool1083svga_get_query_result(struct pipe_context *pipe,1084struct pipe_query *q,1085bool wait,1086union pipe_query_result *vresult)1087{1088struct svga_screen *svgascreen = svga_screen(pipe->screen);1089struct svga_context *svga = svga_context(pipe);1090struct svga_query *sq = svga_query(q);1091uint64_t *result = (uint64_t *)vresult;1092bool ret = true;10931094assert(sq);10951096SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",1097__FUNCTION__, sq, sq->id, wait);10981099switch (sq->type) {1100case PIPE_QUERY_OCCLUSION_COUNTER:1101if (svga_have_vgpu10(svga)) {1102SVGADXOcclusionQueryResult occResult;1103ret = get_query_result_vgpu10(svga, sq, wait,1104(void *)&occResult, sizeof(occResult));1105*result = (uint64_t)occResult.samplesRendered;1106} else {1107ret = get_query_result_vgpu9(svga, sq, wait, result);1108}1109break;1110case PIPE_QUERY_OCCLUSION_PREDICATE:1111case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {1112if (svga_have_vgpu10(svga)) {1113SVGADXOcclusionPredicateQueryResult occResult;1114ret = get_query_result_vgpu10(svga, sq, wait,1115(void *)&occResult, sizeof(occResult));1116vresult->b = occResult.anySamplesRendered != 0;1117} else {1118uint64_t count = 0;1119ret = get_query_result_vgpu9(svga, sq, wait, &count);1120vresult->b = count != 0;1121}1122break;1123}1124case PIPE_QUERY_SO_STATISTICS: {1125SVGADXStreamOutStatisticsQueryResult sResult;1126struct pipe_query_data_so_statistics *pResult =1127(struct pipe_query_data_so_statistics *)vresult;11281129assert(svga_have_vgpu10(svga));1130ret = get_query_result_vgpu10(svga, sq, wait,1131(void *)&sResult, sizeof(sResult));1132pResult->num_primitives_written = sResult.numPrimitivesWritten;1133pResult->primitives_storage_needed = sResult.numPrimitivesRequired;1134break;1135}1136case PIPE_QUERY_TIMESTAMP: {1137SVGADXTimestampQueryResult sResult;11381139assert(svga_have_vgpu10(svga));1140ret = get_query_result_vgpu10(svga, sq, wait,1141(void *)&sResult, sizeof(sResult));1142*result = (uint64_t)sResult.timestamp;1143break;1144}1145case PIPE_QUERY_PRIMITIVES_GENERATED: {1146SVGADXStreamOutStatisticsQueryResult sResult;11471148assert(svga_have_vgpu10(svga));1149ret = get_query_result_vgpu10(svga, sq, wait,1150(void *)&sResult, sizeof sResult);1151*result = (uint64_t)sResult.numPrimitivesRequired;1152break;1153}1154case PIPE_QUERY_PRIMITIVES_EMITTED: {1155SVGADXStreamOutStatisticsQueryResult sResult;11561157assert(svga_have_vgpu10(svga));1158ret = get_query_result_vgpu10(svga, sq, wait,1159(void *)&sResult, sizeof sResult);1160*result = (uint64_t)sResult.numPrimitivesWritten;1161break;1162}1163/* These are per-frame counters */1164case SVGA_QUERY_NUM_DRAW_CALLS:1165case SVGA_QUERY_NUM_FALLBACKS:1166case SVGA_QUERY_NUM_FLUSHES:1167case SVGA_QUERY_NUM_VALIDATIONS:1168case SVGA_QUERY_MAP_BUFFER_TIME:1169case SVGA_QUERY_NUM_BUFFERS_MAPPED:1170case SVGA_QUERY_NUM_TEXTURES_MAPPED:1171case SVGA_QUERY_NUM_BYTES_UPLOADED:1172case SVGA_QUERY_NUM_COMMAND_BUFFERS:1173case SVGA_QUERY_COMMAND_BUFFER_SIZE:1174case SVGA_QUERY_FLUSH_TIME:1175case SVGA_QUERY_SURFACE_WRITE_FLUSHES:1176case SVGA_QUERY_NUM_READBACKS:1177case SVGA_QUERY_NUM_RESOURCE_UPDATES:1178case SVGA_QUERY_NUM_BUFFER_UPLOADS:1179case SVGA_QUERY_NUM_CONST_BUF_UPDATES:1180case SVGA_QUERY_NUM_CONST_UPDATES:1181case SVGA_QUERY_NUM_SHADER_RELOCATIONS:1182case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:1183vresult->u64 = sq->end_count - sq->begin_count;1184break;1185/* These are running total counters */1186case SVGA_QUERY_MEMORY_USED:1187vresult->u64 = svgascreen->hud.total_resource_bytes;1188break;1189case SVGA_QUERY_NUM_SHADERS:1190vresult->u64 = svga->hud.num_shaders;1191break;1192case SVGA_QUERY_NUM_RESOURCES:1193vresult->u64 = svgascreen->hud.num_resources;1194break;1195case SVGA_QUERY_NUM_STATE_OBJECTS:1196vresult->u64 = (svga->hud.num_blend_objects +1197svga->hud.num_depthstencil_objects +1198svga->hud.num_rasterizer_objects +1199svga->hud.num_sampler_objects +1200svga->hud.num_samplerview_objects +1201svga->hud.num_vertexelement_objects);1202break;1203case SVGA_QUERY_NUM_SURFACE_VIEWS:1204vresult->u64 = svga->hud.num_surface_views;1205break;1206case SVGA_QUERY_NUM_GENERATE_MIPMAP:1207vresult->u64 = svga->hud.num_generate_mipmap;1208break;1209case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:1210vresult->u64 = svgascreen->hud.num_failed_allocations;1211break;1212case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:1213vresult->f = (float) svga->swc->num_commands1214/ (float) svga->swc->num_draw_commands;1215break;1216case SVGA_QUERY_SHADER_MEM_USED:1217vresult->u64 = svga->hud.shader_mem_used;1218break;1219default:1220assert(!"unexpected query type in svga_get_query_result");1221}12221223SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));12241225return ret;1226}12271228static void1229svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,1230bool condition, enum pipe_render_cond_flag mode)1231{1232struct svga_context *svga = svga_context(pipe);1233struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;1234struct svga_query *sq = svga_query(q);1235SVGA3dQueryId queryId;12361237SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);12381239assert(svga_have_vgpu10(svga));1240if (sq == NULL) {1241queryId = SVGA3D_INVALID_ID;1242}1243else {1244assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||1245sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);12461247if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {1248assert(sq->predicate);1249/**1250* For conditional rendering, make sure to use the associated1251* predicate query.1252*/1253sq = svga_query(sq->predicate);1254}1255queryId = sq->id;12561257if ((mode == PIPE_RENDER_COND_WAIT ||1258mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {1259sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,1260SVGA_FENCE_FLAG_QUERY);1261}1262}1263/*1264* if the kernel module doesn't support the predication command,1265* we'll just render unconditionally.1266* This is probably acceptable for the typical case of occlusion culling.1267*/1268if (sws->have_set_predication_cmd) {1269SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, queryId,1270(uint32) condition));1271svga->pred.query_id = queryId;1272svga->pred.cond = condition;1273}12741275svga->render_condition = (sq != NULL);1276}127712781279/*1280* This function is a workaround because we lack the ability to query1281* renderer's time synchronously.1282*/1283static uint64_t1284svga_get_timestamp(struct pipe_context *pipe)1285{1286struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);1287union pipe_query_result result;12881289svga_begin_query(pipe, q);1290svga_end_query(pipe,q);1291svga_get_query_result(pipe, q, TRUE, &result);1292svga_destroy_query(pipe, q);12931294return result.u64;1295}129612971298static void1299svga_set_active_query_state(struct pipe_context *pipe, bool enable)1300{1301}130213031304/**1305* \brief Toggle conditional rendering if already enabled1306*1307* \param svga[in] The svga context1308* \param render_condition_enabled[in] Whether to ignore requests to turn1309* conditional rendering off1310* \param on[in] Whether to turn conditional rendering on or off1311*/1312void1313svga_toggle_render_condition(struct svga_context *svga,1314boolean render_condition_enabled,1315boolean on)1316{1317SVGA3dQueryId query_id;13181319if (render_condition_enabled ||1320svga->pred.query_id == SVGA3D_INVALID_ID) {1321return;1322}13231324/*1325* If we get here, it means that the system supports1326* conditional rendering since svga->pred.query_id has already been1327* modified for this context and thus support has already been1328* verified.1329*/1330query_id = on ? svga->pred.query_id : SVGA3D_INVALID_ID;13311332SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, query_id,1333(uint32) svga->pred.cond));1334}133513361337void1338svga_init_query_functions(struct svga_context *svga)1339{1340svga->pipe.create_query = svga_create_query;1341svga->pipe.destroy_query = svga_destroy_query;1342svga->pipe.begin_query = svga_begin_query;1343svga->pipe.end_query = svga_end_query;1344svga->pipe.get_query_result = svga_get_query_result;1345svga->pipe.set_active_query_state = svga_set_active_query_state;1346svga->pipe.render_condition = svga_render_condition;1347svga->pipe.get_timestamp = svga_get_timestamp;1348}134913501351