Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_build_pm4.h
4570 views
/*1* Copyright 2013 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324/**25* This file contains helpers for writing commands to commands streams.26*/2728#ifndef SI_BUILD_PM4_H29#define SI_BUILD_PM4_H3031#include "si_pipe.h"32#include "sid.h"3334#if 035#include "ac_shadowed_regs.h"36#define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count)37#else38#define SI_CHECK_SHADOWED_REGS(reg_offset, count)39#endif4041#define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \42unsigned __cs_num = __cs->current.cdw; \43UNUSED unsigned __cs_num_initial = __cs_num; \44uint32_t *__cs_buf = __cs->current.buf4546#define radeon_begin_again(cs) do { \47assert(__cs == NULL); \48__cs = (cs); \49__cs_num = __cs->current.cdw; \50__cs_num_initial = __cs_num; \51__cs_buf = __cs->current.buf; \52} while (0)5354#define radeon_end() do { \55__cs->current.cdw = __cs_num; \56assert(__cs->current.cdw <= __cs->current.max_dw); \57__cs = NULL; \58} while (0)5960#define radeon_emit(cs, value) __cs_buf[__cs_num++] = (value)61#define radeon_packets_added() (__cs_num != __cs_num_initial)6263#define radeon_end_update_context_roll(sctx) do { \64radeon_end(); \65if (radeon_packets_added()) \66(sctx)->context_roll = true; \67} while (0)6869#define radeon_emit_array(cs, values, num) do { \70unsigned __n = (num); \71memcpy(__cs_buf + __cs_num, (values), __n * 4); \72__cs_num += __n; \73} while (0)7475#define radeon_set_config_reg_seq(cs, reg, num) do { \76SI_CHECK_SHADOWED_REGS(reg, num); \77assert((reg) < SI_CONTEXT_REG_OFFSET); \78radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); \79radeon_emit(cs, ((reg) - SI_CONFIG_REG_OFFSET) >> 2); \80} while (0)8182#define radeon_set_config_reg(cs, reg, value) do { \83radeon_set_config_reg_seq(cs, reg, 1); \84radeon_emit(cs, value); \85} while (0)8687#define radeon_set_context_reg_seq(cs, reg, num) do { \88SI_CHECK_SHADOWED_REGS(reg, num); \89assert((reg) >= SI_CONTEXT_REG_OFFSET); \90radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \91radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \92} while (0)9394#define radeon_set_context_reg(cs, reg, value) do { \95radeon_set_context_reg_seq(cs, reg, 1); \96radeon_emit(cs, value); \97} while (0)9899#define radeon_set_context_reg_seq_array(cs, reg, num, values) do { \100radeon_set_context_reg_seq(cs, reg, num); \101radeon_emit_array(cs, values, num); \102} while (0)103104#define radeon_set_context_reg_idx(cs, reg, idx, value) do { \105SI_CHECK_SHADOWED_REGS(reg, 1); \106assert((reg) >= SI_CONTEXT_REG_OFFSET); \107radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \108radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \109radeon_emit(cs, value); \110} while (0)111112#define radeon_set_sh_reg_seq(cs, reg, num) do { \113SI_CHECK_SHADOWED_REGS(reg, num); \114assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \115radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); \116radeon_emit(cs, ((reg) - SI_SH_REG_OFFSET) >> 2); \117} while (0)118119#define radeon_set_sh_reg(cs, reg, value) do { \120radeon_set_sh_reg_seq(cs, reg, 1); \121radeon_emit(cs, value); \122} while (0)123124#define radeon_set_uconfig_reg_seq(cs, reg, num, perfctr) do { \125SI_CHECK_SHADOWED_REGS(reg, num); \126assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \127radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \128radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \129} while (0)130131#define radeon_set_uconfig_reg(cs, reg, value) do { \132radeon_set_uconfig_reg_seq(cs, reg, 1, false); \133radeon_emit(cs, value); \134} while (0)135136#define radeon_set_uconfig_reg_perfctr(cs, reg, value) do { \137radeon_set_uconfig_reg_seq(cs, reg, 1, true); \138radeon_emit(cs, value); \139} while (0)140141#define radeon_set_uconfig_reg_idx(cs, screen, chip_class, reg, idx, value) do { \142SI_CHECK_SHADOWED_REGS(reg, 1); \143assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \144assert((idx) != 0); \145unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \146if ((chip_class) < GFX9 || \147((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \148__opcode = PKT3_SET_UCONFIG_REG; \149radeon_emit(cs, PKT3(__opcode, 1, 0)); \150radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \151radeon_emit(cs, value); \152} while (0)153154#define radeon_set_context_reg_rmw(cs, reg, value, mask) do { \155SI_CHECK_SHADOWED_REGS(reg, 1); \156assert((reg) >= SI_CONTEXT_REG_OFFSET); \157radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); \158radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \159radeon_emit(cs, mask); \160radeon_emit(cs, value); \161} while (0)162163/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */164#define radeon_opt_set_context_reg_rmw(sctx, offset, reg, val, mask) do { \165unsigned __value = (val); \166assert((__value & ~mask) == 0); \167__value &= mask; \168if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \169sctx->tracked_regs.reg_value[reg] != __value) { \170radeon_set_context_reg_rmw(&sctx->gfx_cs, offset, __value, mask); \171sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \172sctx->tracked_regs.reg_value[reg] = __value; \173} \174} while (0)175176/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */177#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \178unsigned __value = val; \179if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \180sctx->tracked_regs.reg_value[reg] != __value) { \181radeon_set_context_reg(&sctx->gfx_cs, offset, __value); \182sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \183sctx->tracked_regs.reg_value[reg] = __value; \184} \185} while (0)186187/**188* Set 2 consecutive registers if any registers value is different.189* @param offset starting register offset190* @param val1 is written to first register191* @param val2 is written to second register192*/193#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \194unsigned __value1 = (val1), __value2 = (val2); \195if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \196sctx->tracked_regs.reg_value[reg] != __value1 || \197sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \198radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 2); \199radeon_emit(cs, __value1); \200radeon_emit(cs, __value2); \201sctx->tracked_regs.reg_value[reg] = __value1; \202sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \203sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \204} \205} while (0)206207/**208* Set 3 consecutive registers if any registers value is different.209*/210#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \211unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \212if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \213sctx->tracked_regs.reg_value[reg] != __value1 || \214sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \215sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \216radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 3); \217radeon_emit(cs, __value1); \218radeon_emit(cs, __value2); \219radeon_emit(cs, __value3); \220sctx->tracked_regs.reg_value[reg] = __value1; \221sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \222sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \223sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \224} \225} while (0)226227/**228* Set 4 consecutive registers if any registers value is different.229*/230#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \231unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \232if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \233sctx->tracked_regs.reg_value[reg] != __value1 || \234sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \235sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \236sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \237radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 4); \238radeon_emit(cs, __value1); \239radeon_emit(cs, __value2); \240radeon_emit(cs, __value3); \241radeon_emit(cs, __value4); \242sctx->tracked_regs.reg_value[reg] = __value1; \243sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \244sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \245sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \246sctx->tracked_regs.reg_saved |= 0xfull << (reg); \247} \248} while (0)249250/**251* Set consecutive registers if any registers value is different.252*/253#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \254for (unsigned i = 0; i < (num); i++) { \255if ((saved_val)[i] != (value)[i]) { \256radeon_set_context_reg_seq(&(sctx)->gfx_cs, offset, num); \257for (unsigned j = 0; j < (num); j++) \258radeon_emit(cs, value[j]); \259memcpy(saved_val, value, sizeof(uint32_t) * (num)); \260break; \261} \262} \263} while (0)264265#define radeon_set_privileged_config_reg(cs, reg, value) do { \266assert((reg) < CIK_UCONFIG_REG_OFFSET); \267radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); \268radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \269COPY_DATA_DST_SEL(COPY_DATA_PERF)); \270radeon_emit(cs, value); \271radeon_emit(cs, 0); /* unused */ \272radeon_emit(cs, (reg) >> 2); \273radeon_emit(cs, 0); /* unused */ \274} while (0)275276#define radeon_emit_32bit_pointer(sscreen, cs, va) do { \277radeon_emit(cs, va); \278assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \279} while (0)280281#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \282unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \283radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, 1); \284radeon_emit_32bit_pointer(sctx->screen, cs, (desc)->gpu_address); \285} while (0)286287/* This should be evaluated at compile time if all parameters are constants. */288static ALWAYS_INLINE unsigned289si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,290enum si_has_gs has_gs, enum si_has_ngg ngg,291enum pipe_shader_type shader)292{293switch (shader) {294case PIPE_SHADER_VERTEX:295/* VS can be bound as VS, ES, or LS. */296if (has_tess) {297if (chip_class >= GFX10) {298return R_00B430_SPI_SHADER_USER_DATA_HS_0;299} else if (chip_class == GFX9) {300return R_00B430_SPI_SHADER_USER_DATA_LS_0;301} else {302return R_00B530_SPI_SHADER_USER_DATA_LS_0;303}304} else if (chip_class >= GFX10) {305if (ngg || has_gs) {306return R_00B230_SPI_SHADER_USER_DATA_GS_0;307} else {308return R_00B130_SPI_SHADER_USER_DATA_VS_0;309}310} else if (has_gs) {311return R_00B330_SPI_SHADER_USER_DATA_ES_0;312} else {313return R_00B130_SPI_SHADER_USER_DATA_VS_0;314}315316case PIPE_SHADER_TESS_CTRL:317if (chip_class == GFX9) {318return R_00B430_SPI_SHADER_USER_DATA_LS_0;319} else {320return R_00B430_SPI_SHADER_USER_DATA_HS_0;321}322323case PIPE_SHADER_TESS_EVAL:324/* TES can be bound as ES, VS, or not bound. */325if (has_tess) {326if (chip_class >= GFX10) {327if (ngg || has_gs) {328return R_00B230_SPI_SHADER_USER_DATA_GS_0;329} else {330return R_00B130_SPI_SHADER_USER_DATA_VS_0;331}332} else if (has_gs) {333return R_00B330_SPI_SHADER_USER_DATA_ES_0;334} else {335return R_00B130_SPI_SHADER_USER_DATA_VS_0;336}337} else {338return 0;339}340341case PIPE_SHADER_GEOMETRY:342if (chip_class == GFX9) {343return R_00B330_SPI_SHADER_USER_DATA_ES_0;344} else {345return R_00B230_SPI_SHADER_USER_DATA_GS_0;346}347348default:349assert(0);350return 0;351}352}353354#endif355356357