Path: blob/21.2-virgl/src/gallium/drivers/d3d12/d3d12_query.cpp
4570 views
/*1* Copyright © Microsoft Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "d3d12_query.h"24#include "d3d12_context.h"25#include "d3d12_resource.h"26#include "d3d12_screen.h"2728#include "util/u_dump.h"29#include "util/u_inlines.h"30#include "util/u_memory.h"3132#include <dxguids/dxguids.h>3334struct d3d12_query {35enum pipe_query_type type;3637ID3D12QueryHeap *query_heap;38unsigned curr_query, num_queries;39size_t query_size;40struct d3d12_query *subquery;4142D3D12_QUERY_TYPE d3d12qtype;4344pipe_resource *buffer;45unsigned buffer_offset;46uint64_t fence_value;4748struct list_head active_list;49struct d3d12_resource *predicate;50};5152static D3D12_QUERY_HEAP_TYPE53d3d12_query_heap_type(unsigned query_type)54{55switch (query_type) {56case PIPE_QUERY_OCCLUSION_COUNTER:57case PIPE_QUERY_OCCLUSION_PREDICATE:58case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:59return D3D12_QUERY_HEAP_TYPE_OCCLUSION;60case PIPE_QUERY_PIPELINE_STATISTICS:61return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;62case PIPE_QUERY_PRIMITIVES_GENERATED:63case PIPE_QUERY_PRIMITIVES_EMITTED:64case PIPE_QUERY_SO_STATISTICS:65return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;66case PIPE_QUERY_TIMESTAMP:67case PIPE_QUERY_TIME_ELAPSED:68return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;6970default:71debug_printf("unknown query: %s\n",72util_str_query_type(query_type, true));73unreachable("d3d12: unknown query type");74}75}7677static D3D12_QUERY_TYPE78d3d12_query_type(unsigned query_type)79{80switch (query_type) {81case PIPE_QUERY_OCCLUSION_COUNTER:82return D3D12_QUERY_TYPE_OCCLUSION;83case PIPE_QUERY_OCCLUSION_PREDICATE:84case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:85return D3D12_QUERY_TYPE_BINARY_OCCLUSION;86case PIPE_QUERY_PIPELINE_STATISTICS:87return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;88case PIPE_QUERY_PRIMITIVES_GENERATED:89case PIPE_QUERY_PRIMITIVES_EMITTED:90case PIPE_QUERY_SO_STATISTICS:91return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0;92case PIPE_QUERY_TIMESTAMP:93case PIPE_QUERY_TIME_ELAPSED:94return D3D12_QUERY_TYPE_TIMESTAMP;95default:96debug_printf("unknown query: %s\n",97util_str_query_type(query_type, true));98unreachable("d3d12: unknown query type");99}100}101102static struct pipe_query *103d3d12_create_query(struct pipe_context *pctx,104unsigned query_type, unsigned index)105{106struct d3d12_context *ctx = d3d12_context(pctx);107struct d3d12_screen *screen = d3d12_screen(pctx->screen);108struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);109D3D12_QUERY_HEAP_DESC desc = {};110111if (!query)112return NULL;113114query->type = (pipe_query_type)query_type;115query->d3d12qtype = d3d12_query_type(query_type);116query->num_queries = 16;117118/* With timer queries we want a few more queries, especially since we need two slots119* per query for TIME_ELAPSED queries */120if (unlikely(query->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP))121query->num_queries = 64;122123query->curr_query = 0;124125switch (query->d3d12qtype) {126case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:127query->query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);128break;129case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:130query->query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);131break;132default:133query->query_size = sizeof(uint64_t);134break;135}136137desc.Count = query->num_queries;138desc.Type = d3d12_query_heap_type(query_type);139if (FAILED(screen->dev->CreateQueryHeap(&desc,140IID_PPV_ARGS(&query->query_heap)))) {141FREE(query);142return NULL;143}144145/* Query result goes into a readback buffer */146size_t buffer_size = query->query_size * query->num_queries;147u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,148&query->buffer_offset, &query->buffer);149150return (struct pipe_query *)query;151}152153static void154d3d12_destroy_query(struct pipe_context *pctx,155struct pipe_query *q)156{157struct d3d12_query *query = (struct d3d12_query *)q;158pipe_resource *predicate = &query->predicate->base;159if (query->subquery)160d3d12_destroy_query(pctx, (struct pipe_query *)query->subquery);161pipe_resource_reference(&predicate, NULL);162query->query_heap->Release();163FREE(query);164}165166static bool167accumulate_result(struct d3d12_context *ctx, struct d3d12_query *q,168union pipe_query_result *result, bool write)169{170struct pipe_transfer *transfer = NULL;171struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);172unsigned access = PIPE_MAP_READ;173void *results;174175if (write)176access |= PIPE_MAP_WRITE;177results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,178q->num_queries * q->query_size,179access, &transfer);180181if (results == NULL)182return false;183184uint64_t *results_u64 = (uint64_t *)results;185D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;186D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;187188util_query_clear_result(result, q->type);189for (unsigned i = 0; i < q->curr_query; ++i) {190switch (q->type) {191case PIPE_QUERY_OCCLUSION_PREDICATE:192case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:193result->b |= results_u64[i] != 0;194break;195196case PIPE_QUERY_OCCLUSION_COUNTER:197result->u64 += results_u64[i];198break;199case PIPE_QUERY_TIMESTAMP:200result->u64 = results_u64[i];201break;202203case PIPE_QUERY_PIPELINE_STATISTICS:204result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;205result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;206result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;207result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;208result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;209result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;210result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;211result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;212result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;213result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;214result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;215break;216217case PIPE_QUERY_PRIMITIVES_GENERATED:218result->u64 += results_so[i].PrimitivesStorageNeeded;219break;220221case PIPE_QUERY_PRIMITIVES_EMITTED:222result->u64 += results_so[i].NumPrimitivesWritten;223break;224225case PIPE_QUERY_TIME_ELAPSED:226result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];227break;228229case PIPE_QUERY_SO_STATISTICS:230result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;231result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;232break;233234default:235debug_printf("unsupported query type: %s\n",236util_str_query_type(q->type, true));237unreachable("unexpected query type");238}239}240241if (q->subquery) {242union pipe_query_result subresult;243244accumulate_result(ctx, q->subquery, &subresult, false);245q->subquery->curr_query = 0;246if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED)247result->u64 += subresult.pipeline_statistics.ia_primitives;248}249250if (write) {251if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {252results_stats[0].IAVertices = result->pipeline_statistics.ia_vertices;253results_stats[0].IAPrimitives = result->pipeline_statistics.ia_primitives;254results_stats[0].VSInvocations = result->pipeline_statistics.vs_invocations;255results_stats[0].GSInvocations = result->pipeline_statistics.gs_invocations;256results_stats[0].GSPrimitives = result->pipeline_statistics.gs_primitives;257results_stats[0].CInvocations = result->pipeline_statistics.c_invocations;258results_stats[0].CPrimitives = result->pipeline_statistics.c_primitives;259results_stats[0].PSInvocations = result->pipeline_statistics.ps_invocations;260results_stats[0].HSInvocations = result->pipeline_statistics.hs_invocations;261results_stats[0].DSInvocations = result->pipeline_statistics.ds_invocations;262results_stats[0].CSInvocations = result->pipeline_statistics.cs_invocations;263} else if (q->type == PIPE_QUERY_SO_STATISTICS) {264results_so[0].NumPrimitivesWritten = result->so_statistics.num_primitives_written;265results_so[0].PrimitivesStorageNeeded = result->so_statistics.primitives_storage_needed;266} else {267if (unlikely(q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)) {268results_u64[0] = 0;269results_u64[1] = result->u64;270} else {271results_u64[0] = result->u64;272}273}274}275276pipe_buffer_unmap(&ctx->base, transfer);277278if (q->type == PIPE_QUERY_TIME_ELAPSED ||279q->type == PIPE_QUERY_TIMESTAMP)280result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);281282return true;283}284285static void286begin_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)287{288if (restart) {289q->curr_query = 0;290} else if (q->curr_query == q->num_queries) {291union pipe_query_result result;292293/* Accumulate current results and store in first slot */294d3d12_flush_cmdlist_and_wait(ctx);295accumulate_result(ctx, q, &result, true);296q->curr_query = 1;297}298299if (q->subquery)300begin_query(ctx, q->subquery, restart);301302ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);303}304305306static void307begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q, bool restart)308{309/* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in310* EndQuery, so we need two query slots */311unsigned query_index = 2 * q->curr_query;312313if (restart) {314q->curr_query = 0;315query_index = 0;316} else if (query_index == q->num_queries) {317union pipe_query_result result;318319/* Accumulate current results and store in first slot */320d3d12_flush_cmdlist_and_wait(ctx);321accumulate_result(ctx, q, &result, true);322q->curr_query = 2;323}324325ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);326}327328static bool329d3d12_begin_query(struct pipe_context *pctx,330struct pipe_query *q)331{332struct d3d12_context *ctx = d3d12_context(pctx);333struct d3d12_query *query = (struct d3d12_query *)q;334335assert(query->type != PIPE_QUERY_TIMESTAMP);336337if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))338begin_timer_query(ctx, query, true);339else {340begin_query(ctx, query, true);341list_addtail(&query->active_list, &ctx->active_queries);342}343344return true;345}346347static void348end_query(struct d3d12_context *ctx, struct d3d12_query *q)349{350uint64_t offset = 0;351struct d3d12_batch *batch = d3d12_current_batch(ctx);352struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;353ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);354355/* End subquery first so that we can use fence value from parent */356if (q->subquery)357end_query(ctx, q->subquery);358359/* With QUERY_TIME_ELAPSED we have recorded one value at360* (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)361* and when resolving the query we subtract the latter from the former */362363unsigned resolve_count = q->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;364unsigned resolve_index = resolve_count * q->curr_query;365unsigned end_index = resolve_index + resolve_count - 1;366367offset += q->buffer_offset + resolve_index * q->query_size;368ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);369d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL);370d3d12_apply_resource_states(ctx);371ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,372resolve_count, d3d12_res, offset);373374d3d12_batch_reference_object(batch, q->query_heap);375d3d12_batch_reference_resource(batch, res);376377assert(q->curr_query < q->num_queries);378q->curr_query++;379}380381static bool382d3d12_end_query(struct pipe_context *pctx,383struct pipe_query *q)384{385struct d3d12_context *ctx = d3d12_context(pctx);386struct d3d12_query *query = (struct d3d12_query *)q;387388end_query(ctx, query);389390if (query->type != PIPE_QUERY_TIMESTAMP &&391query->type != PIPE_QUERY_TIME_ELAPSED)392list_delinit(&query->active_list);393394query->fence_value = ctx->fence_value;395return true;396}397398static bool399d3d12_get_query_result(struct pipe_context *pctx,400struct pipe_query *q,401bool wait,402union pipe_query_result *result)403{404struct d3d12_context *ctx = d3d12_context(pctx);405struct d3d12_query *query = (struct d3d12_query *)q;406407if (ctx->cmdqueue_fence->GetCompletedValue() < query->fence_value) {408if (!wait)409return false;410d3d12_flush_cmdlist_and_wait(ctx);411}412413return accumulate_result(ctx, query, result, false);414}415416void417d3d12_suspend_queries(struct d3d12_context *ctx)418{419list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {420end_query(ctx, query);421}422}423424void425d3d12_resume_queries(struct d3d12_context *ctx)426{427list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {428begin_query(ctx, query, false);429}430}431432void433d3d12_validate_queries(struct d3d12_context *ctx)434{435bool have_xfb = !!ctx->gfx_pipeline_state.num_so_targets;436437list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {438if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED && !have_xfb && !query->subquery) {439struct pipe_query *subquery = d3d12_create_query(&ctx->base, PIPE_QUERY_PIPELINE_STATISTICS, 0);440query->subquery = (struct d3d12_query *)subquery;441if (!ctx->queries_disabled)442begin_query(ctx, query->subquery, true);443}444}445}446447static void448d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)449{450struct d3d12_context *ctx = d3d12_context(pctx);451ctx->queries_disabled = !enable;452453if (enable)454d3d12_resume_queries(ctx);455else456d3d12_suspend_queries(ctx);457}458459static void460d3d12_render_condition(struct pipe_context *pctx,461struct pipe_query *pquery,462bool condition,463enum pipe_render_cond_flag mode)464{465struct d3d12_context *ctx = d3d12_context(pctx);466struct d3d12_query *query = (struct d3d12_query *)pquery;467468if (query == nullptr) {469ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);470ctx->current_predication = nullptr;471return;472}473474if (!query->predicate)475query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,476PIPE_USAGE_DEFAULT, sizeof(uint64_t)));477478if (mode == PIPE_RENDER_COND_WAIT) {479d3d12_flush_cmdlist_and_wait(ctx);480union pipe_query_result result;481accumulate_result(ctx, (d3d12_query *)pquery, &result, true);482}483484struct d3d12_resource *res = (struct d3d12_resource *)query->buffer;485d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL);486d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_NONE);487d3d12_apply_resource_states(ctx);488ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0,489d3d12_resource_resource(res), 0,490sizeof(uint64_t));491492d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_BIND_INVALIDATE_NONE);493d3d12_apply_resource_states(ctx);494495ctx->current_predication = query->predicate;496/* documentation of ID3D12GraphicsCommandList::SetPredication method:497* "resource manipulation commands are _not_ actually performed498* if the resulting predicate data of the predicate is equal to499* the operation specified."500*/501ctx->cmdlist->SetPredication(d3d12_resource_resource(query->predicate), 0,502condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :503D3D12_PREDICATION_OP_EQUAL_ZERO);504}505506void507d3d12_context_query_init(struct pipe_context *pctx)508{509struct d3d12_context *ctx = d3d12_context(pctx);510list_inithead(&ctx->active_queries);511512u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,5130, true);514515pctx->create_query = d3d12_create_query;516pctx->destroy_query = d3d12_destroy_query;517pctx->begin_query = d3d12_begin_query;518pctx->end_query = d3d12_end_query;519pctx->get_query_result = d3d12_get_query_result;520pctx->set_active_query_state = d3d12_set_active_query_state;521pctx->render_condition = d3d12_render_condition;522}523524525