Path: blob/21.2-virgl/src/gallium/drivers/iris/iris_query.c
4565 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file iris_query.c24*25* ============================= GENXML CODE =============================26* [This file is compiled once per generation.]27* =======================================================================28*29* Query object support. This allows measuring various simple statistics30* via counters on the GPU. We use GenX code for MI_MATH calculations.31*/3233#include <stdio.h>34#include <errno.h>35#include "pipe/p_defines.h"36#include "pipe/p_state.h"37#include "pipe/p_context.h"38#include "pipe/p_screen.h"39#include "util/u_inlines.h"40#include "util/u_upload_mgr.h"41#include "iris_context.h"42#include "iris_defines.h"43#include "iris_fence.h"44#include "iris_monitor.h"45#include "iris_resource.h"46#include "iris_screen.h"4748#include "iris_genx_macros.h"4950#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)51#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)5253struct iris_query {54struct threaded_query b;5556enum pipe_query_type type;57int index;5859bool ready;6061bool stalled;6263uint64_t result;6465struct iris_state_ref query_state_ref;66struct iris_query_snapshots *map;67struct iris_syncobj *syncobj;6869int batch_idx;7071struct iris_monitor_object *monitor;7273/* Fence for PIPE_QUERY_GPU_FINISHED. */74struct pipe_fence_handle *fence;75};7677struct iris_query_snapshots {78/** iris_render_condition's saved MI_PREDICATE_RESULT value. */79uint64_t predicate_result;8081/** Have the start/end snapshots landed? */82uint64_t snapshots_landed;8384/** Starting and ending counter snapshots */85uint64_t start;86uint64_t end;87};8889struct iris_query_so_overflow {90uint64_t predicate_result;91uint64_t snapshots_landed;9293struct {94uint64_t prim_storage_needed[2];95uint64_t num_prims[2];96} stream[4];97};9899static struct mi_value100query_mem64(struct iris_query *q, uint32_t offset)101{102struct iris_address addr = {103.bo = iris_resource_bo(q->query_state_ref.res),104.offset = q->query_state_ref.offset + offset,105.access = IRIS_DOMAIN_OTHER_WRITE106};107return mi_mem64(addr);108}109110/**111* Is this type of query written by PIPE_CONTROL?112*/113static bool114iris_is_query_pipelined(struct iris_query *q)115{116switch (q->type) {117case PIPE_QUERY_OCCLUSION_COUNTER:118case PIPE_QUERY_OCCLUSION_PREDICATE:119case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:120case PIPE_QUERY_TIMESTAMP:121case PIPE_QUERY_TIMESTAMP_DISJOINT:122case PIPE_QUERY_TIME_ELAPSED:123return true;124125default:126return false;127}128}129130static void131mark_available(struct iris_context *ice, struct iris_query *q)132{133struct iris_batch *batch = &ice->batches[q->batch_idx];134unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;135unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);136struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);137offset += q->query_state_ref.offset;138139if (!iris_is_query_pipelined(q)) {140batch->screen->vtbl.store_data_imm64(batch, bo, offset, true);141} else {142/* Order available *after* the query results. */143flags |= PIPE_CONTROL_FLUSH_ENABLE;144iris_emit_pipe_control_write(batch, "query: mark available",145flags, bo, offset, true);146}147}148149/**150* Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.151*/152static void153iris_pipelined_write(struct iris_batch *batch,154struct iris_query *q,155enum pipe_control_flags flags,156unsigned offset)157{158const struct intel_device_info *devinfo = &batch->screen->devinfo;159const unsigned optional_cs_stall =160GFX_VER == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0;161struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);162163iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",164flags | optional_cs_stall,165bo, offset, 0ull);166}167168static void169write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)170{171struct iris_batch *batch = &ice->batches[q->batch_idx];172struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);173174if (!iris_is_query_pipelined(q)) {175iris_emit_pipe_control_flush(batch,176"query: non-pipelined snapshot write",177PIPE_CONTROL_CS_STALL |178PIPE_CONTROL_STALL_AT_SCOREBOARD);179q->stalled = true;180}181182switch (q->type) {183case PIPE_QUERY_OCCLUSION_COUNTER:184case PIPE_QUERY_OCCLUSION_PREDICATE:185case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:186if (GFX_VER >= 10) {187/* "Driver must program PIPE_CONTROL with only Depth Stall Enable188* bit set prior to programming a PIPE_CONTROL with Write PS Depth189* Count sync operation."190*/191iris_emit_pipe_control_flush(batch,192"workaround: depth stall before writing "193"PS_DEPTH_COUNT",194PIPE_CONTROL_DEPTH_STALL);195}196iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,197PIPE_CONTROL_WRITE_DEPTH_COUNT |198PIPE_CONTROL_DEPTH_STALL,199offset);200break;201case PIPE_QUERY_TIME_ELAPSED:202case PIPE_QUERY_TIMESTAMP:203case PIPE_QUERY_TIMESTAMP_DISJOINT:204iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,205PIPE_CONTROL_WRITE_TIMESTAMP,206offset);207break;208case PIPE_QUERY_PRIMITIVES_GENERATED:209batch->screen->vtbl.store_register_mem64(batch,210q->index == 0 ?211GENX(CL_INVOCATION_COUNT_num) :212SO_PRIM_STORAGE_NEEDED(q->index),213bo, offset, false);214break;215case PIPE_QUERY_PRIMITIVES_EMITTED:216batch->screen->vtbl.store_register_mem64(batch,217SO_NUM_PRIMS_WRITTEN(q->index),218bo, offset, false);219break;220case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {221static const uint32_t index_to_reg[] = {222GENX(IA_VERTICES_COUNT_num),223GENX(IA_PRIMITIVES_COUNT_num),224GENX(VS_INVOCATION_COUNT_num),225GENX(GS_INVOCATION_COUNT_num),226GENX(GS_PRIMITIVES_COUNT_num),227GENX(CL_INVOCATION_COUNT_num),228GENX(CL_PRIMITIVES_COUNT_num),229GENX(PS_INVOCATION_COUNT_num),230GENX(HS_INVOCATION_COUNT_num),231GENX(DS_INVOCATION_COUNT_num),232GENX(CS_INVOCATION_COUNT_num),233};234const uint32_t reg = index_to_reg[q->index];235236batch->screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);237break;238}239default:240assert(false);241}242}243244static void245write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)246{247struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];248uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;249struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);250uint32_t offset = q->query_state_ref.offset;251252iris_emit_pipe_control_flush(batch,253"query: write SO overflow snapshots",254PIPE_CONTROL_CS_STALL |255PIPE_CONTROL_STALL_AT_SCOREBOARD);256for (uint32_t i = 0; i < count; i++) {257int s = q->index + i;258int g_idx = offset + offsetof(struct iris_query_so_overflow,259stream[s].num_prims[end]);260int w_idx = offset + offsetof(struct iris_query_so_overflow,261stream[s].prim_storage_needed[end]);262batch->screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),263bo, g_idx, false);264batch->screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),265bo, w_idx, false);266}267}268269static uint64_t270iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)271{272if (time0 > time1) {273return (1ULL << TIMESTAMP_BITS) + time1 - time0;274} else {275return time1 - time0;276}277}278279static bool280stream_overflowed(struct iris_query_so_overflow *so, int s)281{282return (so->stream[s].prim_storage_needed[1] -283so->stream[s].prim_storage_needed[0]) !=284(so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);285}286287static void288calculate_result_on_cpu(const struct intel_device_info *devinfo,289struct iris_query *q)290{291switch (q->type) {292case PIPE_QUERY_OCCLUSION_PREDICATE:293case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:294q->result = q->map->end != q->map->start;295break;296case PIPE_QUERY_TIMESTAMP:297case PIPE_QUERY_TIMESTAMP_DISJOINT:298/* The timestamp is the single starting snapshot. */299q->result = intel_device_info_timebase_scale(devinfo, q->map->start);300q->result &= (1ull << TIMESTAMP_BITS) - 1;301break;302case PIPE_QUERY_TIME_ELAPSED:303q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);304q->result = intel_device_info_timebase_scale(devinfo, q->result);305q->result &= (1ull << TIMESTAMP_BITS) - 1;306break;307case PIPE_QUERY_SO_OVERFLOW_PREDICATE:308q->result = stream_overflowed((void *) q->map, q->index);309break;310case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:311q->result = false;312for (int i = 0; i < MAX_VERTEX_STREAMS; i++)313q->result |= stream_overflowed((void *) q->map, i);314break;315case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:316q->result = q->map->end - q->map->start;317318/* WaDividePSInvocationCountBy4:HSW,BDW */319if (GFX_VER == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)320q->result /= 4;321break;322case PIPE_QUERY_OCCLUSION_COUNTER:323case PIPE_QUERY_PRIMITIVES_GENERATED:324case PIPE_QUERY_PRIMITIVES_EMITTED:325default:326q->result = q->map->end - q->map->start;327break;328}329330q->ready = true;331}332333/**334* Calculate the streamout overflow for stream \p idx:335*336* (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])337*/338static struct mi_value339calc_overflow_for_stream(struct mi_builder *b,340struct iris_query *q,341int idx)342{343#define C(counter, i) query_mem64(q, \344offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))345346return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),347mi_isub(b, C(prim_storage_needed, 1),348C(prim_storage_needed, 0)));349#undef C350}351352/**353* Calculate whether any stream has overflowed.354*/355static struct mi_value356calc_overflow_any_stream(struct mi_builder *b, struct iris_query *q)357{358struct mi_value stream_result[MAX_VERTEX_STREAMS];359for (int i = 0; i < MAX_VERTEX_STREAMS; i++)360stream_result[i] = calc_overflow_for_stream(b, q, i);361362struct mi_value result = stream_result[0];363for (int i = 1; i < MAX_VERTEX_STREAMS; i++)364result = mi_ior(b, result, stream_result[i]);365366return result;367}368369static bool370query_is_boolean(enum pipe_query_type type)371{372switch (type) {373case PIPE_QUERY_OCCLUSION_PREDICATE:374case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:375case PIPE_QUERY_SO_OVERFLOW_PREDICATE:376case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:377return true;378default:379return false;380}381}382383/**384* Calculate the result using MI_MATH.385*/386static struct mi_value387calculate_result_on_gpu(const struct intel_device_info *devinfo,388struct mi_builder *b,389struct iris_query *q)390{391struct mi_value result;392struct mi_value start_val =393query_mem64(q, offsetof(struct iris_query_snapshots, start));394struct mi_value end_val =395query_mem64(q, offsetof(struct iris_query_snapshots, end));396397switch (q->type) {398case PIPE_QUERY_SO_OVERFLOW_PREDICATE:399result = calc_overflow_for_stream(b, q, q->index);400break;401case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:402result = calc_overflow_any_stream(b, q);403break;404case PIPE_QUERY_TIMESTAMP: {405/* TODO: This discards any fractional bits of the timebase scale.406* We would need to do a bit of fixed point math on the CS ALU, or407* launch an actual shader to calculate this with full precision.408*/409uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;410result = mi_iand(b, mi_imm((1ull << 36) - 1),411mi_imul_imm(b, start_val, scale));412break;413}414case PIPE_QUERY_TIME_ELAPSED: {415/* TODO: This discards fractional bits (see above). */416uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;417result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);418break;419}420default:421result = mi_isub(b, end_val, start_val);422break;423}424425/* WaDividePSInvocationCountBy4:HSW,BDW */426if (GFX_VER == 8 &&427q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&428q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)429result = mi_ushr32_imm(b, result, 2);430431if (query_is_boolean(q->type))432result = mi_iand(b, mi_nz(b, result), mi_imm(1));433434return result;435}436437static struct pipe_query *438iris_create_query(struct pipe_context *ctx,439unsigned query_type,440unsigned index)441{442struct iris_query *q = calloc(1, sizeof(struct iris_query));443444q->type = query_type;445q->index = index;446q->monitor = NULL;447448if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&449q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)450q->batch_idx = IRIS_BATCH_COMPUTE;451else452q->batch_idx = IRIS_BATCH_RENDER;453return (struct pipe_query *) q;454}455456static struct pipe_query *457iris_create_batch_query(struct pipe_context *ctx,458unsigned num_queries,459unsigned *query_types)460{461struct iris_context *ice = (void *) ctx;462struct iris_query *q = calloc(1, sizeof(struct iris_query));463if (unlikely(!q))464return NULL;465q->type = PIPE_QUERY_DRIVER_SPECIFIC;466q->index = -1;467q->monitor = iris_create_monitor_object(ice, num_queries, query_types);468if (unlikely(!q->monitor)) {469free(q);470return NULL;471}472473return (struct pipe_query *) q;474}475476static void477iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)478{479struct iris_query *query = (void *) p_query;480struct iris_screen *screen = (void *) ctx->screen;481if (query->monitor) {482iris_destroy_monitor_object(ctx, query->monitor);483query->monitor = NULL;484} else {485iris_syncobj_reference(screen, &query->syncobj, NULL);486screen->base.fence_reference(ctx->screen, &query->fence, NULL);487}488pipe_resource_reference(&query->query_state_ref.res, NULL);489free(query);490}491492493static bool494iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)495{496struct iris_context *ice = (void *) ctx;497struct iris_query *q = (void *) query;498499if (q->monitor)500return iris_begin_monitor(ctx, q->monitor);501502void *ptr = NULL;503uint32_t size;504505if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||506q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)507size = sizeof(struct iris_query_so_overflow);508else509size = sizeof(struct iris_query_snapshots);510511u_upload_alloc(ice->query_buffer_uploader, 0,512size, size, &q->query_state_ref.offset,513&q->query_state_ref.res, &ptr);514515if (!iris_resource_bo(q->query_state_ref.res))516return false;517518q->map = ptr;519if (!q->map)520return false;521522q->result = 0ull;523q->ready = false;524WRITE_ONCE(q->map->snapshots_landed, false);525526if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {527ice->state.prims_generated_query_active = true;528ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;529}530531if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||532q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)533write_overflow_values(ice, q, false);534else535write_value(ice, q,536q->query_state_ref.offset +537offsetof(struct iris_query_snapshots, start));538539return true;540}541542static bool543iris_end_query(struct pipe_context *ctx, struct pipe_query *query)544{545struct iris_context *ice = (void *) ctx;546struct iris_query *q = (void *) query;547548if (q->monitor)549return iris_end_monitor(ctx, q->monitor);550551if (q->type == PIPE_QUERY_GPU_FINISHED) {552ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);553return true;554}555556struct iris_batch *batch = &ice->batches[q->batch_idx];557558if (q->type == PIPE_QUERY_TIMESTAMP) {559iris_begin_query(ctx, query);560iris_batch_reference_signal_syncobj(batch, &q->syncobj);561mark_available(ice, q);562return true;563}564565if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {566ice->state.prims_generated_query_active = false;567ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;568}569570if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||571q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)572write_overflow_values(ice, q, true);573else574write_value(ice, q,575q->query_state_ref.offset +576offsetof(struct iris_query_snapshots, end));577578iris_batch_reference_signal_syncobj(batch, &q->syncobj);579mark_available(ice, q);580581return true;582}583584/**585* See if the snapshots have landed for a query, and if so, compute the586* result and mark it ready. Does not flush (unlike iris_get_query_result).587*/588static void589iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)590{591struct iris_screen *screen = (void *) ice->ctx.screen;592const struct intel_device_info *devinfo = &screen->devinfo;593594if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {595calculate_result_on_cpu(devinfo, q);596}597}598599static bool600iris_get_query_result(struct pipe_context *ctx,601struct pipe_query *query,602bool wait,603union pipe_query_result *result)604{605struct iris_context *ice = (void *) ctx;606struct iris_query *q = (void *) query;607608if (q->monitor)609return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);610611struct iris_screen *screen = (void *) ctx->screen;612const struct intel_device_info *devinfo = &screen->devinfo;613614if (unlikely(screen->no_hw)) {615result->u64 = 0;616return true;617}618619if (q->type == PIPE_QUERY_GPU_FINISHED) {620struct pipe_screen *screen = ctx->screen;621622result->b = screen->fence_finish(screen, ctx, q->fence,623wait ? PIPE_TIMEOUT_INFINITE : 0);624return result->b;625}626627if (!q->ready) {628struct iris_batch *batch = &ice->batches[q->batch_idx];629if (q->syncobj == iris_batch_get_signal_syncobj(batch))630iris_batch_flush(batch);631632while (!READ_ONCE(q->map->snapshots_landed)) {633if (wait)634iris_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);635else636return false;637}638639assert(READ_ONCE(q->map->snapshots_landed));640calculate_result_on_cpu(devinfo, q);641}642643assert(q->ready);644645result->u64 = q->result;646647return true;648}649650static void651iris_get_query_result_resource(struct pipe_context *ctx,652struct pipe_query *query,653bool wait,654enum pipe_query_value_type result_type,655int index,656struct pipe_resource *p_res,657unsigned offset)658{659struct iris_context *ice = (void *) ctx;660struct iris_query *q = (void *) query;661struct iris_batch *batch = &ice->batches[q->batch_idx];662const struct intel_device_info *devinfo = &batch->screen->devinfo;663struct iris_resource *res = (void *) p_res;664struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);665struct iris_bo *dst_bo = iris_resource_bo(p_res);666unsigned snapshots_landed_offset =667offsetof(struct iris_query_snapshots, snapshots_landed);668669res->bind_history |= PIPE_BIND_QUERY_BUFFER;670671if (index == -1) {672/* They're asking for the availability of the result. If we still673* have commands queued up which produce the result, submit them674* now so that progress happens. Either way, copy the snapshots675* landed field to the destination resource.676*/677if (q->syncobj == iris_batch_get_signal_syncobj(batch))678iris_batch_flush(batch);679680batch->screen->vtbl.copy_mem_mem(batch, dst_bo, offset,681query_bo, snapshots_landed_offset,682result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);683return;684}685686if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {687/* The final snapshots happen to have landed, so let's just compute688* the result on the CPU now...689*/690calculate_result_on_cpu(devinfo, q);691}692693if (q->ready) {694/* We happen to have the result on the CPU, so just copy it. */695if (result_type <= PIPE_QUERY_TYPE_U32) {696batch->screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);697} else {698batch->screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);699}700701/* Make sure the result lands before they use bind the QBO elsewhere702* and use the result.703*/704// XXX: Why? i965 doesn't do this.705iris_emit_pipe_control_flush(batch,706"query: unknown QBO flushing hack",707PIPE_CONTROL_CS_STALL);708return;709}710711bool predicated = !wait && !q->stalled;712713struct mi_builder b;714mi_builder_init(&b, &batch->screen->devinfo, batch);715716iris_batch_sync_region_start(batch);717718struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);719struct mi_value dst =720result_type <= PIPE_QUERY_TYPE_U32 ?721mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :722mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));723724if (predicated) {725mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),726mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));727mi_store_if(&b, dst, result);728} else {729mi_store(&b, dst, result);730}731732iris_batch_sync_region_end(batch);733}734735static void736iris_set_active_query_state(struct pipe_context *ctx, bool enable)737{738struct iris_context *ice = (void *) ctx;739740if (ice->state.statistics_counters_enabled == enable)741return;742743// XXX: most packets aren't paying attention to this yet, because it'd744// have to be done dynamically at draw time, which is a pain745ice->state.statistics_counters_enabled = enable;746ice->state.dirty |= IRIS_DIRTY_CLIP |747IRIS_DIRTY_RASTER |748IRIS_DIRTY_STREAMOUT |749IRIS_DIRTY_WM;750ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |751IRIS_STAGE_DIRTY_TCS |752IRIS_STAGE_DIRTY_TES |753IRIS_STAGE_DIRTY_VS;754}755756static void757set_predicate_enable(struct iris_context *ice, bool value)758{759if (value)760ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;761else762ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;763}764765static void766set_predicate_for_result(struct iris_context *ice,767struct iris_query *q,768bool inverted)769{770struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];771struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);772773iris_batch_sync_region_start(batch);774775/* The CPU doesn't have the query result yet; use hardware predication */776ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;777778/* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */779iris_emit_pipe_control_flush(batch,780"conditional rendering: set predicate",781PIPE_CONTROL_FLUSH_ENABLE);782q->stalled = true;783784struct mi_builder b;785mi_builder_init(&b, &batch->screen->devinfo, batch);786787struct mi_value result;788789switch (q->type) {790case PIPE_QUERY_SO_OVERFLOW_PREDICATE:791result = calc_overflow_for_stream(&b, q, q->index);792break;793case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:794result = calc_overflow_any_stream(&b, q);795break;796default: {797/* PIPE_QUERY_OCCLUSION_* */798struct mi_value start =799query_mem64(q, offsetof(struct iris_query_snapshots, start));800struct mi_value end =801query_mem64(q, offsetof(struct iris_query_snapshots, end));802result = mi_isub(&b, end, start);803break;804}805}806807result = inverted ? mi_z(&b, result) : mi_nz(&b, result);808result = mi_iand(&b, result, mi_imm(1));809810/* We immediately set the predicate on the render batch, as all the811* counters come from 3D operations. However, we may need to predicate812* a compute dispatch, which executes in a different GEM context and has813* a different MI_PREDICATE_RESULT register. So, we save the result to814* memory and reload it in iris_launch_grid.815*/816mi_value_ref(&b, result);817mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), result);818mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,819predicate_result)), result);820ice->state.compute_predicate = bo;821822iris_batch_sync_region_end(batch);823}824825static void826iris_render_condition(struct pipe_context *ctx,827struct pipe_query *query,828bool condition,829enum pipe_render_cond_flag mode)830{831struct iris_context *ice = (void *) ctx;832struct iris_query *q = (void *) query;833834/* The old condition isn't relevant; we'll update it if necessary */835ice->state.compute_predicate = NULL;836837if (!q) {838ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;839return;840}841842iris_check_query_no_flush(ice, q);843844if (q->result || q->ready) {845set_predicate_enable(ice, (q->result != 0) ^ condition);846} else {847if (mode == PIPE_RENDER_COND_NO_WAIT ||848mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {849perf_debug(&ice->dbg, "Conditional rendering demoted from "850"\"no wait\" to \"wait\".");851}852set_predicate_for_result(ice, q, condition);853}854}855856void857genX(init_query)(struct iris_context *ice)858{859struct pipe_context *ctx = &ice->ctx;860861ctx->create_query = iris_create_query;862ctx->create_batch_query = iris_create_batch_query;863ctx->destroy_query = iris_destroy_query;864ctx->begin_query = iris_begin_query;865ctx->end_query = iris_end_query;866ctx->get_query_result = iris_get_query_result;867ctx->get_query_result_resource = iris_get_query_result_resource;868ctx->set_active_query_state = iris_set_active_query_state;869ctx->render_condition = iris_render_condition;870}871872873