Path: blob/21.2-virgl/src/gallium/drivers/zink/zink_query.c
4570 views
#include "zink_query.h"12#include "zink_context.h"3#include "zink_fence.h"4#include "zink_resource.h"5#include "zink_screen.h"67#include "util/hash_table.h"8#include "util/set.h"9#include "util/u_dump.h"10#include "util/u_inlines.h"11#include "util/u_memory.h"1213#define NUM_QUERIES 50001415struct zink_query_buffer {16struct list_head list;17unsigned num_results;18struct pipe_resource *buffer;19struct pipe_resource *xfb_buffers[PIPE_MAX_VERTEX_STREAMS - 1];20};2122struct zink_query {23struct threaded_query base;24enum pipe_query_type type;2526VkQueryPool query_pool;27VkQueryPool xfb_query_pool[PIPE_MAX_VERTEX_STREAMS - 1]; //stream 0 is in the base pool28unsigned curr_query, last_start;2930VkQueryType vkqtype;31unsigned index;32bool precise;33bool xfb_running;34bool xfb_overflow;3536bool active; /* query is considered active by vk */37bool needs_reset; /* query is considered active by vk and cannot be destroyed */38bool dead; /* query should be destroyed when its fence finishes */39bool needs_update; /* query needs to update its qbos */4041unsigned fences;42struct list_head active_list;4344struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */45bool have_gs[NUM_QUERIES]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT */46bool have_xfb[NUM_QUERIES]; /* xfb was active during this query */4748struct zink_batch_usage *batch_id; //batch that the query was started in4950struct list_head buffers;51struct zink_query_buffer *curr_qbo;5253struct zink_resource *predicate;54bool predicate_dirty;55};5657static void58update_qbo(struct zink_context *ctx, struct zink_query *q);59static void60reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q);6162static inline unsigned63get_num_results(enum pipe_query_type query_type)64{65switch (query_type) {66case PIPE_QUERY_OCCLUSION_COUNTER:67case PIPE_QUERY_OCCLUSION_PREDICATE:68case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:69case PIPE_QUERY_TIME_ELAPSED:70case PIPE_QUERY_TIMESTAMP:71case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:72return 1;73case PIPE_QUERY_PRIMITIVES_GENERATED:74case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:75case PIPE_QUERY_SO_OVERFLOW_PREDICATE:76case PIPE_QUERY_PRIMITIVES_EMITTED:77return 2;78default:79debug_printf("unknown query: %s\n",80util_str_query_type(query_type, true));81unreachable("zink: unknown query type");82}83}8485static VkQueryPipelineStatisticFlags86pipeline_statistic_convert(enum pipe_statistics_query_index idx)87{88unsigned map[] = {89[PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,90[PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,91[PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,92[PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,93[PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,94[PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,95[PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,96[PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,97[PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,98[PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,99[PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT100};101assert(idx < ARRAY_SIZE(map));102return map[idx];103}104105static void106timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)107{108/* The number of valid bits in a timestamp value is determined by109* the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.110* - 17.5. Timestamp Queries111*/112if (screen->timestamp_valid_bits < 64)113*timestamp &= (1ull << screen->timestamp_valid_bits) - 1;114115/* The number of nanoseconds it takes for a timestamp value to be incremented by 1116* can be obtained from VkPhysicalDeviceLimits::timestampPeriod117* - 17.5. Timestamp Queries118*/119*timestamp *= screen->info.props.limits.timestampPeriod;120}121122static VkQueryType123convert_query_type(unsigned query_type, bool *precise)124{125*precise = false;126switch (query_type) {127case PIPE_QUERY_OCCLUSION_COUNTER:128*precise = true;129FALLTHROUGH;130case PIPE_QUERY_OCCLUSION_PREDICATE:131case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:132return VK_QUERY_TYPE_OCCLUSION;133case PIPE_QUERY_TIME_ELAPSED:134case PIPE_QUERY_TIMESTAMP:135return VK_QUERY_TYPE_TIMESTAMP;136case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:137case PIPE_QUERY_PRIMITIVES_GENERATED:138return VK_QUERY_TYPE_PIPELINE_STATISTICS;139case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:140case PIPE_QUERY_SO_OVERFLOW_PREDICATE:141case PIPE_QUERY_PRIMITIVES_EMITTED:142return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;143default:144debug_printf("unknown query: %s\n",145util_str_query_type(query_type, true));146unreachable("zink: unknown query type");147}148}149150static bool151needs_stats_list(struct zink_query *query)152{153return query->type == PIPE_QUERY_PRIMITIVES_GENERATED ||154query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||155query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;156}157158static bool159is_time_query(struct zink_query *query)160{161return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;162}163164static bool165is_so_overflow_query(struct zink_query *query)166{167return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;168}169170static bool171is_bool_query(struct zink_query *query)172{173return is_so_overflow_query(query) ||174query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||175query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||176query->type == PIPE_QUERY_GPU_FINISHED;177}178179static void180qbo_sync_from_prev(struct zink_context *ctx, struct zink_query *query, unsigned id_offset, unsigned last_start)181{182assert(id_offset);183184struct zink_query_buffer *prev = list_last_entry(&query->buffers, struct zink_query_buffer, list);185unsigned result_size = get_num_results(query->type) * sizeof(uint64_t);186/* this is get_buffer_offset() but without the zink_query object */187unsigned qbo_offset = last_start * get_num_results(query->type) * sizeof(uint64_t);188query->curr_query = id_offset;189query->curr_qbo->num_results = id_offset;190zink_copy_buffer(ctx, NULL, zink_resource(query->curr_qbo->buffer), zink_resource(prev->buffer), 0,191qbo_offset,192id_offset * result_size);193}194195static bool196qbo_append(struct pipe_screen *screen, struct zink_query *query)197{198if (query->curr_qbo && query->curr_qbo->list.next)199return true;200struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);201if (!qbo)202return false;203qbo->buffer = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,204PIPE_USAGE_STREAM,205/* this is the maximum possible size of the results in a given buffer */206NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));207if (!qbo->buffer)208goto fail;209if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {210/* need separate xfb buffer */211qbo->xfb_buffers[0] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,212PIPE_USAGE_STREAM,213/* this is the maximum possible size of the results in a given buffer */214NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));215if (!qbo->xfb_buffers[0])216goto fail;217} else if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {218/* need to monitor all xfb streams */219for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) {220/* need separate xfb buffer */221qbo->xfb_buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,222PIPE_USAGE_STREAM,223/* this is the maximum possible size of the results in a given buffer */224NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t));225if (!qbo->xfb_buffers[i])226goto fail;227}228}229list_addtail(&qbo->list, &query->buffers);230231return true;232fail:233pipe_resource_reference(&qbo->buffer, NULL);234for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)235pipe_resource_reference(&qbo->xfb_buffers[i], NULL);236FREE(qbo);237return false;238}239240static void241destroy_query(struct zink_screen *screen, struct zink_query *query)242{243assert(!p_atomic_read(&query->fences));244if (query->query_pool)245vkDestroyQueryPool(screen->dev, query->query_pool, NULL);246struct zink_query_buffer *qbo, *next;247LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {248pipe_resource_reference(&qbo->buffer, NULL);249for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++)250pipe_resource_reference(&qbo->xfb_buffers[i], NULL);251FREE(qbo);252}253for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {254if (query->xfb_query_pool[i])255vkDestroyQueryPool(screen->dev, query->xfb_query_pool[i], NULL);256}257pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);258FREE(query);259}260261static void262reset_qbo(struct zink_query *q)263{264q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);265q->curr_qbo->num_results = 0;266}267268static struct pipe_query *269zink_create_query(struct pipe_context *pctx,270unsigned query_type, unsigned index)271{272struct zink_screen *screen = zink_screen(pctx->screen);273struct zink_query *query = CALLOC_STRUCT(zink_query);274VkQueryPoolCreateInfo pool_create = {0};275276if (!query)277return NULL;278list_inithead(&query->buffers);279280query->index = index;281query->type = query_type;282query->vkqtype = convert_query_type(query_type, &query->precise);283if (query->vkqtype == -1)284return NULL;285286query->curr_query = 0;287288pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;289pool_create.queryType = query->vkqtype;290pool_create.queryCount = NUM_QUERIES;291if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED)292pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |293VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT;294else if (query_type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)295pool_create.pipelineStatistics = pipeline_statistic_convert(index);296297VkResult status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->query_pool);298if (status != VK_SUCCESS)299goto fail;300if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {301/* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */302pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;303pool_create.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;304pool_create.queryCount = NUM_QUERIES;305306status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->xfb_query_pool[0]);307if (status != VK_SUCCESS)308goto fail;309} else if (query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {310/* need to monitor all xfb streams */311for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) {312status = vkCreateQueryPool(screen->dev, &pool_create, NULL, &query->xfb_query_pool[i]);313if (status != VK_SUCCESS)314goto fail;315}316}317if (!qbo_append(pctx->screen, query))318goto fail;319struct zink_batch *batch = &zink_context(pctx)->batch;320batch->has_work = true;321query->needs_reset = true;322if (query->type == PIPE_QUERY_TIMESTAMP) {323query->active = true;324/* defer pool reset until end_query since we're guaranteed to be threadsafe then */325reset_qbo(query);326}327return (struct pipe_query *)query;328fail:329destroy_query(screen, query);330return NULL;331}332333static void334zink_destroy_query(struct pipe_context *pctx,335struct pipe_query *q)336{337struct zink_screen *screen = zink_screen(pctx->screen);338struct zink_query *query = (struct zink_query *)q;339340p_atomic_set(&query->dead, true);341if (p_atomic_read(&query->fences)) {342if (query->xfb_running)343zink_fence_wait(pctx);344return;345}346347destroy_query(screen, query);348}349350void351zink_prune_query(struct zink_screen *screen, struct zink_query *query)352{353if (!p_atomic_dec_return(&query->fences)) {354if (p_atomic_read(&query->dead))355destroy_query(screen, query);356}357}358359static void360check_query_results(struct zink_query *query, union pipe_query_result *result,361int num_results, uint64_t *results, uint64_t *xfb_results)362{363uint64_t last_val = 0;364int result_size = get_num_results(query->type);365for (int i = 0; i < num_results * result_size; i += result_size) {366switch (query->type) {367case PIPE_QUERY_OCCLUSION_PREDICATE:368case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:369case PIPE_QUERY_GPU_FINISHED:370result->b |= results[i] != 0;371break;372373case PIPE_QUERY_TIME_ELAPSED:374case PIPE_QUERY_TIMESTAMP:375/* the application can sum the differences between all N queries to determine the total execution time.376* - 17.5. Timestamp Queries377*/378if (query->type != PIPE_QUERY_TIME_ELAPSED || i)379result->u64 += results[i] - last_val;380last_val = results[i];381break;382case PIPE_QUERY_OCCLUSION_COUNTER:383result->u64 += results[i];384break;385case PIPE_QUERY_PRIMITIVES_GENERATED:386if (query->have_xfb[query->last_start + i / 2] || query->index)387result->u64 += xfb_results[i + 1];388else389/* if a given draw had a geometry shader, we need to use the second result */390result->u64 += results[i + query->have_gs[query->last_start + i / 2]];391break;392case PIPE_QUERY_PRIMITIVES_EMITTED:393/* A query pool created with this type will capture 2 integers -394* numPrimitivesWritten and numPrimitivesNeeded -395* for the specified vertex stream output from the last vertex processing stage.396* - from VK_EXT_transform_feedback spec397*/398result->u64 += results[i];399break;400case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:401case PIPE_QUERY_SO_OVERFLOW_PREDICATE:402/* A query pool created with this type will capture 2 integers -403* numPrimitivesWritten and numPrimitivesNeeded -404* for the specified vertex stream output from the last vertex processing stage.405* - from VK_EXT_transform_feedback spec406*/407if (query->have_xfb[query->last_start + i / 2])408result->b |= results[i] != results[i + 1];409break;410case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:411result->u64 += results[i];412break;413414default:415debug_printf("unhandled query type: %s\n",416util_str_query_type(query->type, true));417unreachable("unexpected query type");418}419}420}421422static bool423get_query_result(struct pipe_context *pctx,424struct pipe_query *q,425bool wait,426union pipe_query_result *result)427{428struct zink_screen *screen = zink_screen(pctx->screen);429struct zink_query *query = (struct zink_query *)q;430unsigned flags = PIPE_MAP_READ;431432if (!wait)433flags |= PIPE_MAP_DONTBLOCK;434435util_query_clear_result(result, query->type);436437int num_results = query->curr_query - query->last_start;438int result_size = get_num_results(query->type) * sizeof(uint64_t);439440struct zink_query_buffer *qbo;441struct pipe_transfer *xfer;442LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {443uint64_t *xfb_results = NULL;444uint64_t *results;445bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT;446results = pipe_buffer_map_range(pctx, qbo->buffer, 0,447(is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer);448if (!results) {449if (wait)450debug_printf("zink: qbo read failed!");451return false;452}453struct pipe_transfer *xfb_xfer = NULL;454if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) {455xfb_results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[0], 0,456qbo->num_results * result_size, flags, &xfb_xfer);457if (!xfb_results) {458if (wait)459debug_printf("zink: xfb qbo read failed!");460}461}462check_query_results(query, result, is_timestamp ? 1 : qbo->num_results, results, xfb_results);463pipe_buffer_unmap(pctx, xfer);464if (xfb_xfer)465pipe_buffer_unmap(pctx, xfb_xfer);466if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {467for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers) && !result->b; i++) {468uint64_t *results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[i],4690,470qbo->num_results * result_size, flags, &xfer);471if (!results) {472if (wait)473debug_printf("zink: qbo read failed!");474return false;475}476check_query_results(query, result, num_results, results, xfb_results);477pipe_buffer_unmap(pctx, xfer);478}479/* if overflow is detected we can stop */480if (result->b)481break;482}483}484485if (is_time_query(query))486timestamp_to_nanoseconds(screen, &result->u64);487488return true;489}490491static void492force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)493{494struct pipe_context *pctx = &ctx->base;495unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);496struct zink_query *query = (struct zink_query*)pquery;497union pipe_query_result result;498499if (query->needs_update)500update_qbo(ctx, query);501502bool success = get_query_result(pctx, pquery, true, &result);503if (!success) {504debug_printf("zink: getting query result failed\n");505return;506}507508if (result_type <= PIPE_QUERY_TYPE_U32) {509uint32_t u32;510uint32_t limit;511if (result_type == PIPE_QUERY_TYPE_I32)512limit = INT_MAX;513else514limit = UINT_MAX;515if (is_bool_query(query))516u32 = result.b;517else518u32 = MIN2(limit, result.u64);519pipe_buffer_write(pctx, pres, offset, result_size, &u32);520} else {521uint64_t u64;522if (is_bool_query(query))523u64 = result.b;524else525u64 = result.u64;526pipe_buffer_write(pctx, pres, offset, result_size, &u64);527}528}529530static void531copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,532unsigned query_id, struct zink_resource *res, unsigned offset,533int num_results, VkQueryResultFlags flags)534{535struct zink_batch *batch = &ctx->batch;536unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);537unsigned base_result_size = get_num_results(query->type) * type_size;538unsigned result_size = base_result_size * num_results;539if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)540result_size += type_size;541zink_batch_no_rp(ctx);542/* if it's a single query that doesn't need special handling, we can copy it and be done */543zink_batch_reference_resource_rw(batch, res, true);544zink_resource_buffer_barrier(ctx, batch, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0);545util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);546assert(query_id < NUM_QUERIES);547vkCmdCopyQueryPoolResults(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,548offset, 0, flags);549}550551static void552copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)553{554copy_pool_results_to_buffer(ctx, query, query->query_pool, query->last_start, res, offset, num_results, flags);555}556557static void558reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)559{560unsigned last_start = q->last_start;561unsigned id_offset = q->curr_query - q->last_start;562/* This command must only be called outside of a render pass instance563*564* - vkCmdResetQueryPool spec565*/566zink_batch_no_rp(ctx);567if (q->needs_update)568update_qbo(ctx, q);569570vkCmdResetQueryPool(batch->state->cmdbuf, q->query_pool, 0, NUM_QUERIES);571if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)572vkCmdResetQueryPool(batch->state->cmdbuf, q->xfb_query_pool[0], 0, NUM_QUERIES);573else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {574for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)575vkCmdResetQueryPool(batch->state->cmdbuf, q->xfb_query_pool[i], 0, NUM_QUERIES);576}577memset(q->have_gs, 0, sizeof(q->have_gs));578memset(q->have_xfb, 0, sizeof(q->have_xfb));579q->last_start = q->curr_query = 0;580q->needs_reset = false;581/* create new qbo for non-timestamp queries */582if (q->type != PIPE_QUERY_TIMESTAMP) {583if (qbo_append(ctx->base.screen, q))584reset_qbo(q);585else586debug_printf("zink: qbo alloc failed on reset!");587}588if (id_offset)589qbo_sync_from_prev(ctx, q, id_offset, last_start);590}591592static inline unsigned593get_buffer_offset(struct zink_query *q, struct pipe_resource *pres, unsigned query_id)594{595return (query_id - q->last_start) * get_num_results(q->type) * sizeof(uint64_t);596}597598static void599update_qbo(struct zink_context *ctx, struct zink_query *q)600{601struct zink_query_buffer *qbo = q->curr_qbo;602unsigned offset = 0;603uint32_t query_id = q->curr_query - 1;604bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP || q->type == PIPE_QUERY_TIMESTAMP_DISJOINT;605/* timestamp queries just write to offset 0 always */606if (!is_timestamp)607offset = get_buffer_offset(q, qbo->buffer, query_id);608copy_pool_results_to_buffer(ctx, q, q->query_pool, query_id, zink_resource(qbo->buffer),609offset,6101, VK_QUERY_RESULT_64_BIT);611612if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||613q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||614q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {615copy_pool_results_to_buffer(ctx, q,616q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,617query_id,618zink_resource(qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer),619get_buffer_offset(q, qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer, query_id),6201, VK_QUERY_RESULT_64_BIT);621}622623else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {624for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {625copy_pool_results_to_buffer(ctx, q, q->xfb_query_pool[i], query_id, zink_resource(qbo->xfb_buffers[i]),626get_buffer_offset(q, qbo->xfb_buffers[i], query_id),6271, VK_QUERY_RESULT_64_BIT);628}629}630631if (!is_timestamp)632q->curr_qbo->num_results++;633q->needs_update = false;634}635636static void637begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)638{639VkQueryControlFlags flags = 0;640641q->predicate_dirty = true;642if (q->needs_reset)643reset_pool(ctx, batch, q);644assert(q->curr_query < NUM_QUERIES);645q->active = true;646batch->has_work = true;647if (q->type == PIPE_QUERY_TIME_ELAPSED) {648vkCmdWriteTimestamp(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, q->query_pool, q->curr_query);649q->curr_query++;650update_qbo(ctx, q);651}652/* ignore the rest of begin_query for timestamps */653if (is_time_query(q))654return;655if (q->precise)656flags |= VK_QUERY_CONTROL_PRECISE_BIT;657if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||658q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||659q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {660zink_screen(ctx->base.screen)->vk.CmdBeginQueryIndexedEXT(batch->state->cmdbuf,661q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool,662q->curr_query,663flags,664q->index);665q->xfb_running = true;666} else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {667zink_screen(ctx->base.screen)->vk.CmdBeginQueryIndexedEXT(batch->state->cmdbuf,668q->query_pool,669q->curr_query,670flags,6710);672for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++)673zink_screen(ctx->base.screen)->vk.CmdBeginQueryIndexedEXT(batch->state->cmdbuf,674q->xfb_query_pool[i],675q->curr_query,676flags,677i + 1);678q->xfb_running = true;679}680if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)681vkCmdBeginQuery(batch->state->cmdbuf, q->query_pool, q->curr_query, flags);682if (needs_stats_list(q))683list_addtail(&q->stats_list, &ctx->primitives_generated_queries);684p_atomic_inc(&q->fences);685zink_batch_usage_set(&q->batch_id, batch->state);686_mesa_set_add(batch->state->active_queries, q);687}688689static bool690zink_begin_query(struct pipe_context *pctx,691struct pipe_query *q)692{693struct zink_query *query = (struct zink_query *)q;694struct zink_context *ctx = zink_context(pctx);695struct zink_batch *batch = &ctx->batch;696697query->last_start = query->curr_query;698/* drop all past results */699reset_qbo(query);700701begin_query(ctx, batch, query);702703return true;704}705706static void707end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)708{709struct zink_screen *screen = zink_screen(ctx->base.screen);710ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;711assert(qbo);712batch->has_work = true;713q->active = q->type == PIPE_QUERY_TIMESTAMP;714if (is_time_query(q)) {715if (q->needs_reset)716reset_pool(ctx, batch, q);717vkCmdWriteTimestamp(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,718q->query_pool, q->curr_query);719zink_batch_usage_set(&q->batch_id, batch->state);720} else if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||721q->type == PIPE_QUERY_PRIMITIVES_GENERATED ||722q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {723screen->vk.CmdEndQueryIndexedEXT(batch->state->cmdbuf, q->xfb_query_pool[0] ? q->xfb_query_pool[0] :724q->query_pool,725q->curr_query, q->index);726}727728else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {729screen->vk.CmdEndQueryIndexedEXT(batch->state->cmdbuf, q->query_pool, q->curr_query, 0);730for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) {731screen->vk.CmdEndQueryIndexedEXT(batch->state->cmdbuf, q->xfb_query_pool[i], q->curr_query, i + 1);732}733}734if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && !is_time_query(q))735vkCmdEndQuery(batch->state->cmdbuf, q->query_pool, q->curr_query);736737if (needs_stats_list(q))738list_delinit(&q->stats_list);739if (++q->curr_query == NUM_QUERIES) {740/* always reset on start; this ensures we can actually submit the batch that the current query is on */741q->needs_reset = true;742}743744if (batch->in_rp)745q->needs_update = true;746else747update_qbo(ctx, q);748}749750static bool751zink_end_query(struct pipe_context *pctx,752struct pipe_query *q)753{754struct zink_context *ctx = zink_context(pctx);755struct zink_query *query = (struct zink_query *)q;756struct zink_batch *batch = &ctx->batch;757758/* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */759threaded_context_unwrap_sync(pctx);760761if (needs_stats_list(query))762list_delinit(&query->stats_list);763if (query->active)764end_query(ctx, batch, query);765766return true;767}768769static bool770zink_get_query_result(struct pipe_context *pctx,771struct pipe_query *q,772bool wait,773union pipe_query_result *result)774{775struct zink_query *query = (void*)q;776struct zink_context *ctx = zink_context(pctx);777778if (query->needs_update)779update_qbo(ctx, query);780781if (zink_batch_usage_is_unflushed(query->batch_id)) {782if (!threaded_query(q)->flushed)783pctx->flush(pctx, NULL, 0);784if (!wait)785return false;786}787788return get_query_result(pctx, q, wait, result);789}790791void792zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)793{794set_foreach(batch->state->active_queries, entry) {795struct zink_query *query = (void*)entry->key;796/* if a query isn't active here then we don't need to reactivate it on the next batch */797if (query->active) {798end_query(ctx, batch, query);799/* the fence is going to steal the set off the batch, so we have to copy800* the active queries onto a list801*/802list_addtail(&query->active_list, &ctx->suspended_queries);803}804if (query->needs_update)805update_qbo(ctx, query);806if (query->last_start && query->curr_query > NUM_QUERIES / 2)807reset_pool(ctx, batch, query);808}809}810811void812zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)813{814struct zink_query *query, *next;815LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {816begin_query(ctx, batch, query);817list_delinit(&query->active_list);818}819}820821void822zink_query_update_gs_states(struct zink_context *ctx)823{824struct zink_query *query;825LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {826assert(query->curr_query < ARRAY_SIZE(query->have_gs));827assert(query->active);828query->have_gs[query->curr_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY];829query->have_xfb[query->curr_query] = !!ctx->num_so_targets;830}831}832833static void834zink_set_active_query_state(struct pipe_context *pctx, bool enable)835{836struct zink_context *ctx = zink_context(pctx);837ctx->queries_disabled = !enable;838839struct zink_batch *batch = &ctx->batch;840if (ctx->queries_disabled)841zink_suspend_queries(ctx, batch);842else843zink_resume_queries(ctx, batch);844}845846void847zink_start_conditional_render(struct zink_context *ctx)848{849struct zink_batch *batch = &ctx->batch;850struct zink_screen *screen = zink_screen(ctx->base.screen);851VkConditionalRenderingFlagsEXT begin_flags = 0;852if (ctx->render_condition.inverted)853begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;854VkConditionalRenderingBeginInfoEXT begin_info = {0};855begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;856begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;857begin_info.flags = begin_flags;858screen->vk.CmdBeginConditionalRenderingEXT(batch->state->cmdbuf, &begin_info);859zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);860}861862void863zink_stop_conditional_render(struct zink_context *ctx)864{865struct zink_batch *batch = &ctx->batch;866struct zink_screen *screen = zink_screen(ctx->base.screen);867zink_clear_apply_conditionals(ctx);868screen->vk.CmdEndConditionalRenderingEXT(batch->state->cmdbuf);869}870871static void872zink_render_condition(struct pipe_context *pctx,873struct pipe_query *pquery,874bool condition,875enum pipe_render_cond_flag mode)876{877struct zink_context *ctx = zink_context(pctx);878struct zink_query *query = (struct zink_query *)pquery;879zink_batch_no_rp(ctx);880VkQueryResultFlagBits flags = 0;881882if (query == NULL) {883/* force conditional clears if they exist */884if (ctx->clears_enabled && !ctx->batch.in_rp)885zink_batch_rp(ctx);886if (ctx->batch.in_rp)887zink_stop_conditional_render(ctx);888ctx->render_condition_active = false;889ctx->render_condition.query = NULL;890return;891}892893if (!query->predicate) {894struct pipe_resource *pres;895896/* need to create a vulkan buffer to copy the data into */897pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));898if (!pres)899return;900901query->predicate = zink_resource(pres);902}903if (query->predicate_dirty) {904struct zink_resource *res = query->predicate;905906if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)907flags |= VK_QUERY_RESULT_WAIT_BIT;908909flags |= VK_QUERY_RESULT_64_BIT;910int num_results = query->curr_query - query->last_start;911if (query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&912!is_so_overflow_query(query)) {913copy_results_to_buffer(ctx, query, res, 0, num_results, flags);914} else {915/* these need special handling */916force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);917}918query->predicate_dirty = false;919}920ctx->render_condition.inverted = condition;921ctx->render_condition_active = true;922ctx->render_condition.query = query;923if (ctx->batch.in_rp)924zink_start_conditional_render(ctx);925}926927static void928zink_get_query_result_resource(struct pipe_context *pctx,929struct pipe_query *pquery,930bool wait,931enum pipe_query_value_type result_type,932int index,933struct pipe_resource *pres,934unsigned offset)935{936struct zink_context *ctx = zink_context(pctx);937struct zink_screen *screen = zink_screen(pctx->screen);938struct zink_query *query = (struct zink_query*)pquery;939struct zink_resource *res = zink_resource(pres);940unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);941VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;942unsigned num_queries = query->curr_query - query->last_start;943unsigned query_id = query->last_start;944unsigned fences = p_atomic_read(&query->fences);945946if (index == -1) {947/* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data948* in addition to the availability result, which is a problem if we're just trying to get availability data949*950* if we know that there's no valid buffer data in the preceding buffer range, then we can just951* stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the952* buffer953*/954955VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;956if (!fences) {957uint64_t u64[2] = {0};958if (vkGetQueryPoolResults(screen->dev, query->query_pool, query_id, 1, 2 * result_size, u64,9590, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) {960pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + result_size);961return;962}963}964struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, result_size * 2);965copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);966zink_copy_buffer(ctx, &ctx->batch, res, zink_resource(staging), offset, result_size, result_size);967pipe_resource_reference(&staging, NULL);968return;969}970971if (!is_time_query(query) && !is_bool_query(query)) {972if (num_queries == 1 && query->type != PIPE_QUERY_PRIMITIVES_GENERATED &&973query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&974!is_bool_query(query)) {975if (size_flags == VK_QUERY_RESULT_64_BIT) {976if (query->needs_update)977update_qbo(ctx, query);978/* internal qbo always writes 64bit value so we can just direct copy */979zink_copy_buffer(ctx, NULL, res, zink_resource(query->curr_qbo->buffer), offset,980get_buffer_offset(query, query->curr_qbo->buffer, query->last_start),981result_size);982} else983/* have to do a new copy for 32bit */984copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);985return;986}987}988989/* TODO: use CS to aggregate results */990991/* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either992* clobbering all but the last result or writing the results sequentially, so we have to manually write the result993*/994force_cpu_read(ctx, pquery, result_type, pres, offset);995}996997static uint64_t998zink_get_timestamp(struct pipe_context *pctx)999{1000struct zink_screen *screen = zink_screen(pctx->screen);1001uint64_t timestamp, deviation;1002assert(screen->info.have_EXT_calibrated_timestamps);1003VkCalibratedTimestampInfoEXT cti = {0};1004cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;1005cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;1006screen->vk.GetCalibratedTimestampsEXT(screen->dev, 1, &cti, ×tamp, &deviation);1007timestamp_to_nanoseconds(screen, ×tamp);1008return timestamp;1009}10101011void1012zink_context_query_init(struct pipe_context *pctx)1013{1014struct zink_context *ctx = zink_context(pctx);1015list_inithead(&ctx->suspended_queries);1016list_inithead(&ctx->primitives_generated_queries);10171018pctx->create_query = zink_create_query;1019pctx->destroy_query = zink_destroy_query;1020pctx->begin_query = zink_begin_query;1021pctx->end_query = zink_end_query;1022pctx->get_query_result = zink_get_query_result;1023pctx->get_query_result_resource = zink_get_query_result_resource;1024pctx->set_active_query_state = zink_set_active_query_state;1025pctx->render_condition = zink_render_condition;1026pctx->get_timestamp = zink_get_timestamp;1027}102810291030