Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_state.c
4570 views
/*1* Copyright 2012 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "si_build_pm4.h"25#include "si_query.h"26#include "si_shader_internal.h"27#include "sid.h"28#include "util/fast_idiv_by_const.h"29#include "util/format/u_format.h"30#include "util/format/u_format_s3tc.h"31#include "util/u_dual_blend.h"32#include "util/u_memory.h"33#include "util/u_resource.h"34#include "util/u_upload_mgr.h"35#include "util/u_blend.h"3637#include "gfx10_format_table.h"3839static unsigned si_map_swizzle(unsigned swizzle)40{41switch (swizzle) {42case PIPE_SWIZZLE_Y:43return V_008F0C_SQ_SEL_Y;44case PIPE_SWIZZLE_Z:45return V_008F0C_SQ_SEL_Z;46case PIPE_SWIZZLE_W:47return V_008F0C_SQ_SEL_W;48case PIPE_SWIZZLE_0:49return V_008F0C_SQ_SEL_0;50case PIPE_SWIZZLE_1:51return V_008F0C_SQ_SEL_1;52default: /* PIPE_SWIZZLE_X */53return V_008F0C_SQ_SEL_X;54}55}5657/* 12.4 fixed-point */58static unsigned si_pack_float_12p4(float x)59{60return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16;61}6263/*64* Inferred framebuffer and blender state.65*66* CB_TARGET_MASK is emitted here to avoid a hang with dual source blending67* if there is not enough PS outputs.68*/69static void si_emit_cb_render_state(struct si_context *sctx)70{71struct radeon_cmdbuf *cs = &sctx->gfx_cs;72struct si_state_blend *blend = sctx->queued.named.blend;73/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,74* but you never know. */75uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask;76unsigned i;7778/* Avoid a hang that happens when dual source blending is enabled79* but there is not enough color outputs. This is undefined behavior,80* so disable color writes completely.81*82* Reproducible with Unigine Heaven 4.0 and drirc missing.83*/84if (blend->dual_src_blend && sctx->shader.ps.cso &&85(sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3)86cb_target_mask = 0;8788/* GFX9: Flush DFSM when CB_TARGET_MASK changes.89* I think we don't have to do anything between IBs.90*/91if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) {92sctx->last_cb_target_mask = cb_target_mask;9394radeon_begin(cs);95radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));96radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));97radeon_end();98}99100radeon_begin(cs);101radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,102cb_target_mask);103104if (sctx->chip_class >= GFX8) {105/* DCC MSAA workaround.106* Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-107* COMBINER_DISABLE, but that would be more complicated.108*/109bool oc_disable =110blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2;111unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;112113radeon_opt_set_context_reg(114sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,115S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) |116S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |117S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |118S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode));119}120121/* RB+ register settings. */122if (sctx->screen->info.rbplus_allowed) {123unsigned spi_shader_col_format =124sctx->shader.ps.cso ? sctx->shader.ps.current->key.part.ps.epilog.spi_shader_col_format125: 0;126unsigned sx_ps_downconvert = 0;127unsigned sx_blend_opt_epsilon = 0;128unsigned sx_blend_opt_control = 0;129130for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {131struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i];132unsigned format, swap, spi_format, colormask;133bool has_alpha, has_rgb;134135if (!surf) {136/* If the color buffer is not set, the driver sets 32_R137* as the SPI color format, because the hw doesn't allow138* holes between color outputs, so also set this to139* enable RB+.140*/141sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);142continue;143}144145format = G_028C70_FORMAT(surf->cb_color_info);146swap = G_028C70_COMP_SWAP(surf->cb_color_info);147spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;148colormask = (cb_target_mask >> (i * 4)) & 0xf;149150/* Set if RGB and A are present. */151has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);152153if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 ||154format == V_028C70_COLOR_32)155has_rgb = !has_alpha;156else157has_rgb = true;158159/* Check the colormask and export format. */160if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))161has_rgb = false;162if (!(colormask & PIPE_MASK_A))163has_alpha = false;164165if (spi_format == V_028714_SPI_SHADER_ZERO) {166has_rgb = false;167has_alpha = false;168}169170/* Disable value checking for disabled channels. */171if (!has_rgb)172sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);173if (!has_alpha)174sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);175176/* Enable down-conversion for 32bpp and smaller formats. */177switch (format) {178case V_028C70_COLOR_8:179case V_028C70_COLOR_8_8:180case V_028C70_COLOR_8_8_8_8:181/* For 1 and 2-channel formats, use the superset thereof. */182if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||183spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||184spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {185sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);186sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);187}188break;189190case V_028C70_COLOR_5_6_5:191if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {192sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);193sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);194}195break;196197case V_028C70_COLOR_1_5_5_5:198if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {199sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);200sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);201}202break;203204case V_028C70_COLOR_4_4_4_4:205if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {206sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);207sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);208}209break;210211case V_028C70_COLOR_32:212if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)213sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);214else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)215sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);216break;217218case V_028C70_COLOR_16:219case V_028C70_COLOR_16_16:220/* For 1-channel formats, use the superset thereof. */221if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||222spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||223spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||224spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {225if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)226sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);227else228sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);229}230break;231232case V_028C70_COLOR_10_11_11:233if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)234sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);235break;236237case V_028C70_COLOR_2_10_10_10:238if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {239sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);240sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);241}242break;243244case V_028C70_COLOR_5_9_9_9:245if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)246sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);247break;248}249}250251/* If there are no color outputs, the first color export is252* always enabled as 32_R, so also set this to enable RB+.253*/254if (!sx_ps_downconvert)255sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;256257/* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */258radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,259sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);260}261radeon_end_update_context_roll(sctx);262}263264/*265* Blender functions266*/267268static uint32_t si_translate_blend_function(int blend_func)269{270switch (blend_func) {271case PIPE_BLEND_ADD:272return V_028780_COMB_DST_PLUS_SRC;273case PIPE_BLEND_SUBTRACT:274return V_028780_COMB_SRC_MINUS_DST;275case PIPE_BLEND_REVERSE_SUBTRACT:276return V_028780_COMB_DST_MINUS_SRC;277case PIPE_BLEND_MIN:278return V_028780_COMB_MIN_DST_SRC;279case PIPE_BLEND_MAX:280return V_028780_COMB_MAX_DST_SRC;281default:282PRINT_ERR("Unknown blend function %d\n", blend_func);283assert(0);284break;285}286return 0;287}288289static uint32_t si_translate_blend_factor(int blend_fact)290{291switch (blend_fact) {292case PIPE_BLENDFACTOR_ONE:293return V_028780_BLEND_ONE;294case PIPE_BLENDFACTOR_SRC_COLOR:295return V_028780_BLEND_SRC_COLOR;296case PIPE_BLENDFACTOR_SRC_ALPHA:297return V_028780_BLEND_SRC_ALPHA;298case PIPE_BLENDFACTOR_DST_ALPHA:299return V_028780_BLEND_DST_ALPHA;300case PIPE_BLENDFACTOR_DST_COLOR:301return V_028780_BLEND_DST_COLOR;302case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:303return V_028780_BLEND_SRC_ALPHA_SATURATE;304case PIPE_BLENDFACTOR_CONST_COLOR:305return V_028780_BLEND_CONSTANT_COLOR;306case PIPE_BLENDFACTOR_CONST_ALPHA:307return V_028780_BLEND_CONSTANT_ALPHA;308case PIPE_BLENDFACTOR_ZERO:309return V_028780_BLEND_ZERO;310case PIPE_BLENDFACTOR_INV_SRC_COLOR:311return V_028780_BLEND_ONE_MINUS_SRC_COLOR;312case PIPE_BLENDFACTOR_INV_SRC_ALPHA:313return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;314case PIPE_BLENDFACTOR_INV_DST_ALPHA:315return V_028780_BLEND_ONE_MINUS_DST_ALPHA;316case PIPE_BLENDFACTOR_INV_DST_COLOR:317return V_028780_BLEND_ONE_MINUS_DST_COLOR;318case PIPE_BLENDFACTOR_INV_CONST_COLOR:319return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;320case PIPE_BLENDFACTOR_INV_CONST_ALPHA:321return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;322case PIPE_BLENDFACTOR_SRC1_COLOR:323return V_028780_BLEND_SRC1_COLOR;324case PIPE_BLENDFACTOR_SRC1_ALPHA:325return V_028780_BLEND_SRC1_ALPHA;326case PIPE_BLENDFACTOR_INV_SRC1_COLOR:327return V_028780_BLEND_INV_SRC1_COLOR;328case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:329return V_028780_BLEND_INV_SRC1_ALPHA;330default:331PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact);332assert(0);333break;334}335return 0;336}337338static uint32_t si_translate_blend_opt_function(int blend_func)339{340switch (blend_func) {341case PIPE_BLEND_ADD:342return V_028760_OPT_COMB_ADD;343case PIPE_BLEND_SUBTRACT:344return V_028760_OPT_COMB_SUBTRACT;345case PIPE_BLEND_REVERSE_SUBTRACT:346return V_028760_OPT_COMB_REVSUBTRACT;347case PIPE_BLEND_MIN:348return V_028760_OPT_COMB_MIN;349case PIPE_BLEND_MAX:350return V_028760_OPT_COMB_MAX;351default:352return V_028760_OPT_COMB_BLEND_DISABLED;353}354}355356static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)357{358switch (blend_fact) {359case PIPE_BLENDFACTOR_ZERO:360return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;361case PIPE_BLENDFACTOR_ONE:362return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;363case PIPE_BLENDFACTOR_SRC_COLOR:364return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0365: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;366case PIPE_BLENDFACTOR_INV_SRC_COLOR:367return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1368: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;369case PIPE_BLENDFACTOR_SRC_ALPHA:370return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;371case PIPE_BLENDFACTOR_INV_SRC_ALPHA:372return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;373case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:374return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE375: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;376default:377return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;378}379}380381static void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend,382enum pipe_blend_func func, enum pipe_blendfactor src,383enum pipe_blendfactor dst, unsigned chanmask)384{385/* Src factor is allowed when it does not depend on Dst */386static const uint32_t src_allowed =387(1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) |388(1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |389(1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) |390(1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |391(1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |392(1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |393(1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |394(1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);395396if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src))) {397/* Addition is commutative, but floating point addition isn't398* associative: subtle changes can be introduced via different399* rounding.400*401* Out-of-order is also non-deterministic, which means that402* this breaks OpenGL invariance requirements. So only enable403* out-of-order additive blending if explicitly allowed by a404* setting.405*/406if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||407(func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))408blend->commutative_4bit |= chanmask;409}410}411412/**413* Get rid of DST in the blend factors by commuting the operands:414* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)415*/416static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor,417unsigned expected_dst, unsigned replacement_src)418{419if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) {420*src_factor = PIPE_BLENDFACTOR_ZERO;421*dst_factor = replacement_src;422423/* Commuting the operands requires reversing subtractions. */424if (*func == PIPE_BLEND_SUBTRACT)425*func = PIPE_BLEND_REVERSE_SUBTRACT;426else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)427*func = PIPE_BLEND_SUBTRACT;428}429}430431static void *si_create_blend_state_mode(struct pipe_context *ctx,432const struct pipe_blend_state *state, unsigned mode)433{434struct si_context *sctx = (struct si_context *)ctx;435struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);436struct si_pm4_state *pm4 = &blend->pm4;437uint32_t sx_mrt_blend_opt[8] = {0};438uint32_t color_control = 0;439bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY;440441if (!blend)442return NULL;443444blend->alpha_to_coverage = state->alpha_to_coverage;445blend->alpha_to_one = state->alpha_to_one;446blend->dual_src_blend = util_blend_state_is_dual(state, 0);447blend->logicop_enable = logicop_enable;448blend->allows_noop_optimization =449state->rt[0].rgb_func == PIPE_BLEND_ADD &&450state->rt[0].alpha_func == PIPE_BLEND_ADD &&451state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&452state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR &&453state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&454state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO &&455mode == V_028808_CB_NORMAL;456457unsigned num_shader_outputs = state->max_rt + 1; /* estimate */458if (blend->dual_src_blend)459num_shader_outputs = MAX2(num_shader_outputs, 2);460461if (logicop_enable) {462color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));463} else {464color_control |= S_028808_ROP3(0xcc);465}466467if (state->alpha_to_coverage && state->alpha_to_coverage_dither) {468si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,469S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |470S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |471S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |472S_028B70_OFFSET_ROUND(1));473} else {474si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,475S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |476S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) |477S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |478S_028B70_OFFSET_ROUND(0));479}480481if (state->alpha_to_coverage)482blend->need_src_alpha_4bit |= 0xf;483484blend->cb_target_mask = 0;485blend->cb_target_enabled_4bit = 0;486487for (int i = 0; i < num_shader_outputs; i++) {488/* state->rt entries > 0 only written if independent blending */489const int j = state->independent_blend_enable ? i : 0;490491unsigned eqRGB = state->rt[j].rgb_func;492unsigned srcRGB = state->rt[j].rgb_src_factor;493unsigned dstRGB = state->rt[j].rgb_dst_factor;494unsigned eqA = state->rt[j].alpha_func;495unsigned srcA = state->rt[j].alpha_src_factor;496unsigned dstA = state->rt[j].alpha_dst_factor;497498unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;499unsigned blend_cntl = 0;500501sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |502S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);503504/* Only set dual source blending for MRT0 to avoid a hang. */505if (i >= 1 && blend->dual_src_blend) {506/* Vulkan does this for dual source blending. */507if (i == 1)508blend_cntl |= S_028780_ENABLE(1);509510si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);511continue;512}513514/* Only addition and subtraction equations are supported with515* dual source blending.516*/517if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||518eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {519assert(!"Unsupported equation for dual source blending");520si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);521continue;522}523524/* cb_render_state will disable unused ones */525blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);526if (state->rt[j].colormask)527blend->cb_target_enabled_4bit |= 0xf << (4 * i);528529if (!state->rt[j].colormask || !state->rt[j].blend_enable) {530si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);531continue;532}533534si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));535si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i));536537/* Blending optimizations for RB+.538* These transformations don't change the behavior.539*540* First, get rid of DST in the blend factors:541* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)542*/543si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR,544PIPE_BLENDFACTOR_SRC_COLOR);545si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR,546PIPE_BLENDFACTOR_SRC_COLOR);547si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA,548PIPE_BLENDFACTOR_SRC_ALPHA);549550/* Look up the ideal settings from tables. */551srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);552dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);553srcA_opt = si_translate_blend_opt_factor(srcA, true);554dstA_opt = si_translate_blend_opt_factor(dstA, true);555556/* Handle interdependencies. */557if (util_blend_factor_uses_dest(srcRGB, false))558dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;559if (util_blend_factor_uses_dest(srcA, false))560dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;561562if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&563(dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||564dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))565dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;566567/* Set the final value. */568sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) |569S_028760_COLOR_DST_OPT(dstRGB_opt) |570S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |571S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |572S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));573574/* Set blend state. */575blend_cntl |= S_028780_ENABLE(1);576blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));577blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));578blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));579580if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {581blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);582blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));583blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));584blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));585}586si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);587588blend->blend_enable_4bit |= 0xfu << (i * 4);589590if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10)591blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4);592593/* This is only important for formats without alpha. */594if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||595srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||596dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||597srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)598blend->need_src_alpha_4bit |= 0xfu << (i * 4);599}600601if (sctx->chip_class >= GFX8 && sctx->chip_class <= GFX10 && logicop_enable)602blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit;603604if (blend->cb_target_mask) {605color_control |= S_028808_MODE(mode);606} else {607color_control |= S_028808_MODE(V_028808_CB_DISABLE);608}609610if (sctx->screen->info.rbplus_allowed) {611/* Disable RB+ blend optimizations for dual source blending.612* Vulkan does this.613*/614if (blend->dual_src_blend) {615for (int i = 0; i < num_shader_outputs; i++) {616sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |617S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);618}619}620621for (int i = 0; i < num_shader_outputs; i++)622si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]);623624/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */625if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE)626color_control |= S_028808_DISABLE_DUAL_QUAD(1);627}628629si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);630return blend;631}632633static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state)634{635return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);636}637638static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx,639const struct pipe_draw_info *info,640unsigned drawid_offset,641const struct pipe_draw_indirect_info *indirect,642const struct pipe_draw_start_count_bias *draws,643unsigned num_draws) {644struct si_context *sctx = (struct si_context *)ctx;645646if (sctx->framebuffer.state.nr_cbufs == 1) {647struct si_shader_selector *sel = sctx->shader.ps.cso;648bool free_nir;649if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) {650struct nir_shader *nir = si_get_nir_shader(sel, NULL, &free_nir);651652/* Determine if this fragment shader always writes vec4(1) if a specific texture653* is all 1s.654*/655float in[4] = { 1.0, 1.0, 1.0, 1.0 };656float out[4];657int texunit;658if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) &&659!memcmp(in, out, 4 * sizeof(float))) {660sel->info.writes_1_if_tex_is_1 = 1 + texunit;661} else {662sel->info.writes_1_if_tex_is_1 = 0;663}664665if (free_nir)666ralloc_free(nir);667}668669if (sel->info.writes_1_if_tex_is_1 &&670sel->info.writes_1_if_tex_is_1 != 0xff) {671/* Now check if the texture is cleared to 1 */672int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1;673struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT];674if ((1u << unit) & samp->enabled_mask) {675struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture;676if (tex->is_depth &&677tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) &&678tex->depth_clear_value[0] == 1) {679return;680}681/* TODO: handle color textures */682}683}684}685686sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws);687}688689static void si_bind_blend_state(struct pipe_context *ctx, void *state)690{691struct si_context *sctx = (struct si_context *)ctx;692struct si_state_blend *old_blend = sctx->queued.named.blend;693struct si_state_blend *blend = (struct si_state_blend *)state;694695if (!blend)696blend = (struct si_state_blend *)sctx->noop_blend;697698si_pm4_bind_state(sctx, blend, blend);699700if (old_blend->cb_target_mask != blend->cb_target_mask ||701old_blend->dual_src_blend != blend->dual_src_blend ||702(old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit &&703sctx->framebuffer.has_dcc_msaa))704si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);705706if (old_blend->cb_target_mask != blend->cb_target_mask ||707old_blend->alpha_to_coverage != blend->alpha_to_coverage ||708old_blend->alpha_to_one != blend->alpha_to_one ||709old_blend->dual_src_blend != blend->dual_src_blend ||710old_blend->blend_enable_4bit != blend->blend_enable_4bit ||711old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)712sctx->do_update_shaders = true;713714if (sctx->screen->dpbb_allowed &&715(old_blend->alpha_to_coverage != blend->alpha_to_coverage ||716old_blend->blend_enable_4bit != blend->blend_enable_4bit ||717old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))718si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);719720if (sctx->screen->has_out_of_order_rast &&721((old_blend->blend_enable_4bit != blend->blend_enable_4bit ||722old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||723old_blend->commutative_4bit != blend->commutative_4bit ||724old_blend->logicop_enable != blend->logicop_enable)))725si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);726727if (likely(!radeon_uses_secure_bos(sctx->ws))) {728if (unlikely(blend->allows_noop_optimization)) {729si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop);730} else {731si_install_draw_wrapper(sctx, NULL);732}733}734}735736static void si_delete_blend_state(struct pipe_context *ctx, void *state)737{738struct si_context *sctx = (struct si_context *)ctx;739740if (sctx->queued.named.blend == state)741si_bind_blend_state(ctx, sctx->noop_blend);742743si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend));744}745746static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state)747{748struct si_context *sctx = (struct si_context *)ctx;749static const struct pipe_blend_color zeros;750751sctx->blend_color = *state;752sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;753si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color);754}755756static void si_emit_blend_color(struct si_context *sctx)757{758struct radeon_cmdbuf *cs = &sctx->gfx_cs;759760radeon_begin(cs);761radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);762radeon_emit_array(cs, (uint32_t *)sctx->blend_color.color, 4);763radeon_end();764}765766/*767* Clipping768*/769770static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state)771{772struct si_context *sctx = (struct si_context *)ctx;773struct pipe_constant_buffer cb;774static const struct pipe_clip_state zeros;775776if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0)777return;778779sctx->clip_state = *state;780sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;781si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state);782783cb.buffer = NULL;784cb.user_buffer = state->ucp;785cb.buffer_offset = 0;786cb.buffer_size = 4 * 4 * 8;787si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);788}789790static void si_emit_clip_state(struct si_context *sctx)791{792struct radeon_cmdbuf *cs = &sctx->gfx_cs;793794radeon_begin(cs);795radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4);796radeon_emit_array(cs, (uint32_t *)sctx->clip_state.ucp, 6 * 4);797radeon_end();798}799800static void si_emit_clip_regs(struct si_context *sctx)801{802struct si_shader *vs = si_get_vs(sctx)->current;803struct si_shader_selector *vs_sel = vs->selector;804struct si_shader_info *info = &vs_sel->info;805struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;806bool window_space = info->stage == MESA_SHADER_VERTEX ?807info->base.vs.window_space_position : 0;808unsigned clipdist_mask = vs_sel->clipdist_mask;809unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;810unsigned culldist_mask = vs_sel->culldist_mask;811unsigned vs_out_mask = (clipdist_mask & ~vs->key.opt.kill_clip_distances) | culldist_mask;812813/* Clip distances on points have no effect, so need to be implemented814* as cull distances. This applies for the clipvertex case as well.815*816* Setting this for primitives other than points should have no adverse817* effects.818*/819clipdist_mask &= rs->clip_plane_enable;820culldist_mask |= clipdist_mask;821822unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |823S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |824S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&825!sctx->screen->options.vrs2x2) |826S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |827clipdist_mask | (culldist_mask << 8);828829radeon_begin(&sctx->gfx_cs);830831if (sctx->chip_class >= GFX10) {832radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,833SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,834~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);835} else {836radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,837vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl);838}839radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,840rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));841radeon_end_update_context_roll(sctx);842}843844/*845* inferred state between framebuffer and rasterizer846*/847static void si_update_poly_offset_state(struct si_context *sctx)848{849struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;850851if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {852si_pm4_bind_state(sctx, poly_offset, NULL);853return;854}855856/* Use the user format, not db_render_format, so that the polygon857* offset behaves as expected by applications.858*/859switch (sctx->framebuffer.state.zsbuf->texture->format) {860case PIPE_FORMAT_Z16_UNORM:861si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);862break;863default: /* 24-bit */864si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);865break;866case PIPE_FORMAT_Z32_FLOAT:867case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:868si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);869break;870}871}872873/*874* Rasterizer875*/876877static uint32_t si_translate_fill(uint32_t func)878{879switch (func) {880case PIPE_POLYGON_MODE_FILL:881return V_028814_X_DRAW_TRIANGLES;882case PIPE_POLYGON_MODE_LINE:883return V_028814_X_DRAW_LINES;884case PIPE_POLYGON_MODE_POINT:885return V_028814_X_DRAW_POINTS;886default:887assert(0);888return V_028814_X_DRAW_POINTS;889}890}891892static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state)893{894struct si_screen *sscreen = ((struct si_context *)ctx)->screen;895struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);896struct si_pm4_state *pm4 = &rs->pm4;897unsigned tmp, i;898float psize_min, psize_max;899900if (!rs) {901return NULL;902}903904if (!state->front_ccw) {905rs->cull_front = !!(state->cull_face & PIPE_FACE_FRONT);906rs->cull_back = !!(state->cull_face & PIPE_FACE_BACK);907} else {908rs->cull_back = !!(state->cull_face & PIPE_FACE_FRONT);909rs->cull_front = !!(state->cull_face & PIPE_FACE_BACK);910}911rs->depth_clamp_any = !state->depth_clip_near || !state->depth_clip_far;912rs->provoking_vertex_first = state->flatshade_first;913rs->scissor_enable = state->scissor;914rs->clip_halfz = state->clip_halfz;915rs->two_side = state->light_twoside;916rs->multisample_enable = state->multisample;917rs->force_persample_interp = state->force_persample_interp;918rs->clip_plane_enable = state->clip_plane_enable;919rs->half_pixel_center = state->half_pixel_center;920rs->line_stipple_enable = state->line_stipple_enable;921rs->poly_stipple_enable = state->poly_stipple_enable;922rs->line_smooth = state->line_smooth;923rs->line_width = state->line_width;924rs->poly_smooth = state->poly_smooth;925rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri;926rs->clamp_fragment_color = state->clamp_fragment_color;927rs->clamp_vertex_color = state->clamp_vertex_color;928rs->flatshade = state->flatshade;929rs->flatshade_first = state->flatshade_first;930rs->sprite_coord_enable = state->sprite_coord_enable;931rs->rasterizer_discard = state->rasterizer_discard;932rs->polygon_mode_enabled =933(state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) ||934(state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK));935rs->polygon_mode_is_lines =936(state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||937(state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));938rs->polygon_mode_is_points =939(state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||940(state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));941rs->pa_sc_line_stipple = state->line_stipple_enable942? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |943S_028A0C_REPEAT_COUNT(state->line_stipple_factor)944: 0;945rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |946S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |947S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |948S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |949S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);950951if (rs->rasterizer_discard) {952rs->ngg_cull_flags = SI_NGG_CULL_FRONT_FACE | SI_NGG_CULL_BACK_FACE;953rs->ngg_cull_flags_y_inverted = rs->ngg_cull_flags;954} else {955/* Polygon mode can't use view and small primitive culling,956* because it draws points or lines where the culling depends957* on the point or line width.958*/959if (!rs->polygon_mode_enabled) {960rs->ngg_cull_flags |= SI_NGG_CULL_VIEW_SMALLPRIMS;961rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_VIEW_SMALLPRIMS;962}963964if (rs->cull_front) {965rs->ngg_cull_flags |= SI_NGG_CULL_FRONT_FACE;966rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_BACK_FACE;967}968969if (rs->cull_back) {970rs->ngg_cull_flags |= SI_NGG_CULL_BACK_FACE;971rs->ngg_cull_flags_y_inverted |= SI_NGG_CULL_FRONT_FACE;972}973}974975si_pm4_set_reg(976pm4, R_0286D4_SPI_INTERP_CONTROL_0,977S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |978S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |979S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |980S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |981S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |982S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));983984/* point size 12.4 fixed point */985tmp = (unsigned)(state->point_size * 8.0);986si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));987988if (state->point_size_per_vertex) {989psize_min = util_get_min_point_size(state);990psize_max = SI_MAX_POINT_SIZE;991} else {992/* Force the point size to be as if the vertex output was disabled. */993psize_min = state->point_size;994psize_max = state->point_size;995}996rs->max_point_size = psize_max;997998/* Divide by two, because 0.5 = 1 pixel. */999si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,1000S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) |1001S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2)));10021003si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL,1004S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2)));1005si_pm4_set_reg(1006pm4, R_028A48_PA_SC_MODE_CNTL_0,1007S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |1008S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) |1009S_028A48_VPORT_SCISSOR_ENABLE(1) |1010S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9));10111012si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));1013si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,1014S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |1015S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |1016S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |1017S_028814_FACE(!state->front_ccw) |1018S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |1019S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |1020S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |1021S_028814_POLY_MODE(rs->polygon_mode_enabled) |1022S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |1023S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) |1024/* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */1025S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.chip_class >= GFX10 ? rs->polygon_mode_enabled : 0));10261027if (!rs->uses_poly_offset)1028return rs;10291030rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state));1031if (!rs->pm4_poly_offset) {1032FREE(rs);1033return NULL;1034}10351036/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */1037for (i = 0; i < 3; i++) {1038struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];1039float offset_units = state->offset_units;1040float offset_scale = state->offset_scale * 16.0f;1041uint32_t pa_su_poly_offset_db_fmt_cntl = 0;10421043if (!state->offset_units_unscaled) {1044switch (i) {1045case 0: /* 16-bit zbuffer */1046offset_units *= 4.0f;1047pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);1048break;1049case 1: /* 24-bit zbuffer */1050offset_units *= 2.0f;1051pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);1052break;1053case 2: /* 32-bit zbuffer */1054offset_units *= 1.0f;1055pa_su_poly_offset_db_fmt_cntl =1056S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);1057break;1058}1059}10601061si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale));1062si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units));1063si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale));1064si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units));1065si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl);1066}10671068return rs;1069}10701071static void si_bind_rs_state(struct pipe_context *ctx, void *state)1072{1073struct si_context *sctx = (struct si_context *)ctx;1074struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer;1075struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;10761077if (!rs)1078rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state;10791080if (old_rs->multisample_enable != rs->multisample_enable) {1081si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);10821083si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);10841085/* Update the small primitive filter workaround if necessary. */1086if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1)1087si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);10881089/* NGG cull state uses multisample_enable. */1090if (sctx->screen->use_ngg_culling)1091si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);1092}10931094sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;1095sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);10961097si_pm4_bind_state(sctx, rasterizer, rs);1098si_update_poly_offset_state(sctx);10991100if (old_rs->scissor_enable != rs->scissor_enable)1101si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);11021103if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size ||1104old_rs->half_pixel_center != rs->half_pixel_center)1105si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);11061107if (old_rs->clip_halfz != rs->clip_halfz)1108si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports);11091110if (old_rs->clip_plane_enable != rs->clip_plane_enable ||1111old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)1112si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);11131114if (old_rs->clip_plane_enable != rs->clip_plane_enable ||1115old_rs->rasterizer_discard != rs->rasterizer_discard ||1116old_rs->sprite_coord_enable != rs->sprite_coord_enable ||1117old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side ||1118old_rs->multisample_enable != rs->multisample_enable ||1119old_rs->poly_stipple_enable != rs->poly_stipple_enable ||1120old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||1121old_rs->clamp_fragment_color != rs->clamp_fragment_color ||1122old_rs->force_persample_interp != rs->force_persample_interp ||1123old_rs->polygon_mode_is_points != rs->polygon_mode_is_points)1124sctx->do_update_shaders = true;1125}11261127static void si_delete_rs_state(struct pipe_context *ctx, void *state)1128{1129struct si_context *sctx = (struct si_context *)ctx;1130struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;11311132if (sctx->queued.named.rasterizer == state)1133si_bind_rs_state(ctx, sctx->discard_rasterizer_state);11341135FREE(rs->pm4_poly_offset);1136si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer));1137}11381139/*1140* inferred state between dsa and stencil ref1141*/1142static void si_emit_stencil_ref(struct si_context *sctx)1143{1144struct radeon_cmdbuf *cs = &sctx->gfx_cs;1145struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;1146struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;11471148radeon_begin(cs);1149radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);1150radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |1151S_028430_STENCILMASK(dsa->valuemask[0]) |1152S_028430_STENCILWRITEMASK(dsa->writemask[0]) | S_028430_STENCILOPVAL(1));1153radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |1154S_028434_STENCILMASK_BF(dsa->valuemask[1]) |1155S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |1156S_028434_STENCILOPVAL_BF(1));1157radeon_end();1158}11591160static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)1161{1162struct si_context *sctx = (struct si_context *)ctx;11631164if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0)1165return;11661167sctx->stencil_ref.state = state;1168si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);1169}11701171/*1172* DSA1173*/11741175static uint32_t si_translate_stencil_op(int s_op)1176{1177switch (s_op) {1178case PIPE_STENCIL_OP_KEEP:1179return V_02842C_STENCIL_KEEP;1180case PIPE_STENCIL_OP_ZERO:1181return V_02842C_STENCIL_ZERO;1182case PIPE_STENCIL_OP_REPLACE:1183return V_02842C_STENCIL_REPLACE_TEST;1184case PIPE_STENCIL_OP_INCR:1185return V_02842C_STENCIL_ADD_CLAMP;1186case PIPE_STENCIL_OP_DECR:1187return V_02842C_STENCIL_SUB_CLAMP;1188case PIPE_STENCIL_OP_INCR_WRAP:1189return V_02842C_STENCIL_ADD_WRAP;1190case PIPE_STENCIL_OP_DECR_WRAP:1191return V_02842C_STENCIL_SUB_WRAP;1192case PIPE_STENCIL_OP_INVERT:1193return V_02842C_STENCIL_INVERT;1194default:1195PRINT_ERR("Unknown stencil op %d", s_op);1196assert(0);1197break;1198}1199return 0;1200}12011202static bool si_order_invariant_stencil_op(enum pipe_stencil_op op)1203{1204/* REPLACE is normally order invariant, except when the stencil1205* reference value is written by the fragment shader. Tracking this1206* interaction does not seem worth the effort, so be conservative. */1207return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE;1208}12091210/* Compute whether, assuming Z writes are disabled, this stencil state is order1211* invariant in the sense that the set of passing fragments as well as the1212* final stencil buffer result does not depend on the order of fragments. */1213static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)1214{1215return !state->enabled || !state->writemask ||1216/* The following assumes that Z writes are disabled. */1217(state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) &&1218si_order_invariant_stencil_op(state->zfail_op)) ||1219(state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op));1220}12211222static void *si_create_dsa_state(struct pipe_context *ctx,1223const struct pipe_depth_stencil_alpha_state *state)1224{1225struct si_context *sctx = (struct si_context *)ctx;1226struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);1227struct si_pm4_state *pm4 = &dsa->pm4;1228unsigned db_depth_control;1229uint32_t db_stencil_control = 0;12301231if (!dsa) {1232return NULL;1233}12341235dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;1236dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;1237dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;1238dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;12391240db_depth_control =1241S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) |1242S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test);12431244/* stencil */1245if (state->stencil[0].enabled) {1246db_depth_control |= S_028800_STENCIL_ENABLE(1);1247db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);1248db_stencil_control |=1249S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));1250db_stencil_control |=1251S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));1252db_stencil_control |=1253S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));12541255if (state->stencil[1].enabled) {1256db_depth_control |= S_028800_BACKFACE_ENABLE(1);1257db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);1258db_stencil_control |=1259S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));1260db_stencil_control |=1261S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));1262db_stencil_control |=1263S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));1264}1265}12661267/* alpha */1268if (state->alpha_enabled) {1269dsa->alpha_func = state->alpha_func;12701271si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,1272fui(state->alpha_ref_value));1273} else {1274dsa->alpha_func = PIPE_FUNC_ALWAYS;1275}12761277si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);1278if (state->stencil[0].enabled)1279si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);1280if (state->depth_bounds_test) {1281si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min));1282si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max));1283}12841285dsa->depth_enabled = state->depth_enabled;1286dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask;1287dsa->stencil_enabled = state->stencil[0].enabled;1288dsa->stencil_write_enabled =1289(util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1]));1290dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled;12911292bool zfunc_is_ordered =1293state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS ||1294state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER ||1295state->depth_func == PIPE_FUNC_GEQUAL;12961297bool nozwrite_and_order_invariant_stencil =1298!dsa->db_can_write ||1299(!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) &&1300si_order_invariant_stencil_state(&state->stencil[1]));13011302dsa->order_invariance[1].zs =1303nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered);1304dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;13051306dsa->order_invariance[1].pass_set =1307nozwrite_and_order_invariant_stencil ||1308(!dsa->stencil_write_enabled &&1309(state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER));1310dsa->order_invariance[0].pass_set =1311!dsa->depth_write_enabled ||1312(state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER);13131314dsa->order_invariance[1].pass_last = sctx->screen->assume_no_z_fights &&1315!dsa->stencil_write_enabled && dsa->depth_write_enabled &&1316zfunc_is_ordered;1317dsa->order_invariance[0].pass_last =1318sctx->screen->assume_no_z_fights && dsa->depth_write_enabled && zfunc_is_ordered;13191320return dsa;1321}13221323static void si_bind_dsa_state(struct pipe_context *ctx, void *state)1324{1325struct si_context *sctx = (struct si_context *)ctx;1326struct si_state_dsa *old_dsa = sctx->queued.named.dsa;1327struct si_state_dsa *dsa = state;13281329if (!dsa)1330dsa = (struct si_state_dsa *)sctx->noop_dsa;13311332si_pm4_bind_state(sctx, dsa, dsa);13331334if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,1335sizeof(struct si_dsa_stencil_ref_part)) != 0) {1336sctx->stencil_ref.dsa_part = dsa->stencil_ref;1337si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);1338}13391340if (old_dsa->alpha_func != dsa->alpha_func)1341sctx->do_update_shaders = true;13421343if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled ||1344old_dsa->stencil_enabled != dsa->stencil_enabled ||1345old_dsa->db_can_write != dsa->db_can_write)))1346si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);13471348if (sctx->screen->has_out_of_order_rast &&1349(memcmp(old_dsa->order_invariance, dsa->order_invariance,1350sizeof(old_dsa->order_invariance))))1351si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);1352}13531354static void si_delete_dsa_state(struct pipe_context *ctx, void *state)1355{1356struct si_context *sctx = (struct si_context *)ctx;13571358if (sctx->queued.named.dsa == state)1359si_bind_dsa_state(ctx, sctx->noop_dsa);13601361si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa));1362}13631364static void *si_create_db_flush_dsa(struct si_context *sctx)1365{1366struct pipe_depth_stencil_alpha_state dsa = {};13671368return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa);1369}13701371/* DB RENDER STATE */13721373static void si_set_active_query_state(struct pipe_context *ctx, bool enable)1374{1375struct si_context *sctx = (struct si_context *)ctx;13761377/* Pipeline stat & streamout queries. */1378if (enable) {1379sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;1380sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;1381} else {1382sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;1383sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;1384}13851386/* Occlusion queries. */1387if (sctx->occlusion_queries_disabled != !enable) {1388sctx->occlusion_queries_disabled = !enable;1389si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);1390}1391}13921393void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable)1394{1395si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);13961397bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;13981399if (perfect_enable != old_perfect_enable)1400si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);1401}14021403void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)1404{1405si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);1406}14071408void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st)1409{1410sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0);1411}14121413static void si_emit_db_render_state(struct si_context *sctx)1414{1415struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;1416unsigned db_shader_control, db_render_control, db_count_control;14171418/* DB_RENDER_CONTROL */1419if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {1420db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |1421S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |1422S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);1423} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {1424db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |1425S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);1426} else {1427db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |1428S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);1429}14301431/* DB_COUNT_CONTROL (occlusion queries) */1432if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) {1433bool perfect = sctx->num_perfect_occlusion_queries > 0;1434bool gfx10_perfect = sctx->chip_class >= GFX10 && perfect;14351436if (sctx->chip_class >= GFX7) {1437unsigned log_sample_rate = sctx->framebuffer.log_samples;14381439db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |1440S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |1441S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |1442S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);1443} else {1444db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |1445S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);1446}1447} else {1448/* Disable occlusion queries. */1449if (sctx->chip_class >= GFX7) {1450db_count_control = 0;1451} else {1452db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);1453}1454}14551456radeon_begin(&sctx->gfx_cs);1457radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,1458db_render_control, db_count_control);14591460/* DB_RENDER_OVERRIDE2 */1461radeon_opt_set_context_reg(1462sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,1463S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |1464S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |1465S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |1466S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 1 : 0));14671468db_shader_control = sctx->ps_db_shader_control;14691470/* Bug workaround for smoothing (overrasterization) on GFX6. */1471if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) {1472db_shader_control &= C_02880C_Z_ORDER;1473db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);1474}14751476/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */1477if (!rs->multisample_enable)1478db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;14791480if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed)1481db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);14821483radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,1484db_shader_control);14851486if (sctx->chip_class >= GFX10_3) {1487if (sctx->allow_flat_shading) {1488radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,1489SI_TRACKED_DB_VRS_OVERRIDE_CNTL,1490S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(1491V_028064_VRS_COMB_MODE_OVERRIDE) |1492S_028064_VRS_OVERRIDE_RATE_X(1) |1493S_028064_VRS_OVERRIDE_RATE_Y(1));1494} else {1495/* If the shader is using discard, turn off coarse shading because1496* discard at 2x2 pixel granularity degrades quality too much.1497*1498* MIN allows sample shading but not coarse shading.1499*/1500unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ?1501V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU;15021503radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,1504SI_TRACKED_DB_VRS_OVERRIDE_CNTL,1505S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |1506S_028064_VRS_OVERRIDE_RATE_X(0) |1507S_028064_VRS_OVERRIDE_RATE_Y(0));1508}1509}1510radeon_end_update_context_roll(sctx);1511}15121513/*1514* format translation1515*/1516static uint32_t si_translate_colorformat(enum chip_class chip_class,1517enum pipe_format format)1518{1519const struct util_format_description *desc = util_format_description(format);1520if (!desc)1521return V_028C70_COLOR_INVALID;15221523#define HAS_SIZE(x, y, z, w) \1524(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \1525desc->channel[2].size == (z) && desc->channel[3].size == (w))15261527if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */1528return V_028C70_COLOR_10_11_11;15291530if (chip_class >= GFX10_3 &&1531format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */1532return V_028C70_COLOR_5_9_9_9;15331534if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)1535return V_028C70_COLOR_INVALID;15361537/* hw cannot support mixed formats (except depth/stencil, since1538* stencil is not written to). */1539if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)1540return V_028C70_COLOR_INVALID;15411542switch (desc->nr_channels) {1543case 1:1544switch (desc->channel[0].size) {1545case 8:1546return V_028C70_COLOR_8;1547case 16:1548return V_028C70_COLOR_16;1549case 32:1550return V_028C70_COLOR_32;1551}1552break;1553case 2:1554if (desc->channel[0].size == desc->channel[1].size) {1555switch (desc->channel[0].size) {1556case 8:1557return V_028C70_COLOR_8_8;1558case 16:1559return V_028C70_COLOR_16_16;1560case 32:1561return V_028C70_COLOR_32_32;1562}1563} else if (HAS_SIZE(8, 24, 0, 0)) {1564return V_028C70_COLOR_24_8;1565} else if (HAS_SIZE(24, 8, 0, 0)) {1566return V_028C70_COLOR_8_24;1567}1568break;1569case 3:1570if (HAS_SIZE(5, 6, 5, 0)) {1571return V_028C70_COLOR_5_6_5;1572} else if (HAS_SIZE(32, 8, 24, 0)) {1573return V_028C70_COLOR_X24_8_32_FLOAT;1574}1575break;1576case 4:1577if (desc->channel[0].size == desc->channel[1].size &&1578desc->channel[0].size == desc->channel[2].size &&1579desc->channel[0].size == desc->channel[3].size) {1580switch (desc->channel[0].size) {1581case 4:1582return V_028C70_COLOR_4_4_4_4;1583case 8:1584return V_028C70_COLOR_8_8_8_8;1585case 16:1586return V_028C70_COLOR_16_16_16_16;1587case 32:1588return V_028C70_COLOR_32_32_32_32;1589}1590} else if (HAS_SIZE(5, 5, 5, 1)) {1591return V_028C70_COLOR_1_5_5_5;1592} else if (HAS_SIZE(1, 5, 5, 5)) {1593return V_028C70_COLOR_5_5_5_1;1594} else if (HAS_SIZE(10, 10, 10, 2)) {1595return V_028C70_COLOR_2_10_10_10;1596}1597break;1598}1599return V_028C70_COLOR_INVALID;1600}16011602static uint32_t si_colorformat_endian_swap(uint32_t colorformat)1603{1604if (SI_BIG_ENDIAN) {1605switch (colorformat) {1606/* 8-bit buffers. */1607case V_028C70_COLOR_8:1608return V_028C70_ENDIAN_NONE;16091610/* 16-bit buffers. */1611case V_028C70_COLOR_5_6_5:1612case V_028C70_COLOR_1_5_5_5:1613case V_028C70_COLOR_4_4_4_4:1614case V_028C70_COLOR_16:1615case V_028C70_COLOR_8_8:1616return V_028C70_ENDIAN_8IN16;16171618/* 32-bit buffers. */1619case V_028C70_COLOR_8_8_8_8:1620case V_028C70_COLOR_2_10_10_10:1621case V_028C70_COLOR_8_24:1622case V_028C70_COLOR_24_8:1623case V_028C70_COLOR_16_16:1624return V_028C70_ENDIAN_8IN32;16251626/* 64-bit buffers. */1627case V_028C70_COLOR_16_16_16_16:1628return V_028C70_ENDIAN_8IN16;16291630case V_028C70_COLOR_32_32:1631return V_028C70_ENDIAN_8IN32;16321633/* 128-bit buffers. */1634case V_028C70_COLOR_32_32_32_32:1635return V_028C70_ENDIAN_8IN32;1636default:1637return V_028C70_ENDIAN_NONE; /* Unsupported. */1638}1639} else {1640return V_028C70_ENDIAN_NONE;1641}1642}16431644static uint32_t si_translate_dbformat(enum pipe_format format)1645{1646switch (format) {1647case PIPE_FORMAT_Z16_UNORM:1648return V_028040_Z_16;1649case PIPE_FORMAT_S8_UINT_Z24_UNORM:1650case PIPE_FORMAT_X8Z24_UNORM:1651case PIPE_FORMAT_Z24X8_UNORM:1652case PIPE_FORMAT_Z24_UNORM_S8_UINT:1653return V_028040_Z_24; /* deprecated on AMD GCN */1654case PIPE_FORMAT_Z32_FLOAT:1655case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:1656return V_028040_Z_32_FLOAT;1657default:1658return V_028040_Z_INVALID;1659}1660}16611662/*1663* Texture translation1664*/16651666static uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format,1667const struct util_format_description *desc,1668int first_non_void)1669{1670struct si_screen *sscreen = (struct si_screen *)screen;1671bool uniform = true;1672int i;16731674assert(sscreen->info.chip_class <= GFX9);16751676/* Colorspace (return non-RGB formats directly). */1677switch (desc->colorspace) {1678/* Depth stencil formats */1679case UTIL_FORMAT_COLORSPACE_ZS:1680switch (format) {1681case PIPE_FORMAT_Z16_UNORM:1682return V_008F14_IMG_DATA_FORMAT_16;1683case PIPE_FORMAT_X24S8_UINT:1684case PIPE_FORMAT_S8X24_UINT:1685/*1686* Implemented as an 8_8_8_8 data format to fix texture1687* gathers in stencil sampling. This affects at least1688* GL45-CTS.texture_cube_map_array.sampling on GFX8.1689*/1690if (sscreen->info.chip_class <= GFX8)1691return V_008F14_IMG_DATA_FORMAT_8_8_8_8;16921693if (format == PIPE_FORMAT_X24S8_UINT)1694return V_008F14_IMG_DATA_FORMAT_8_24;1695else1696return V_008F14_IMG_DATA_FORMAT_24_8;1697case PIPE_FORMAT_Z24X8_UNORM:1698case PIPE_FORMAT_Z24_UNORM_S8_UINT:1699return V_008F14_IMG_DATA_FORMAT_8_24;1700case PIPE_FORMAT_X8Z24_UNORM:1701case PIPE_FORMAT_S8_UINT_Z24_UNORM:1702return V_008F14_IMG_DATA_FORMAT_24_8;1703case PIPE_FORMAT_S8_UINT:1704return V_008F14_IMG_DATA_FORMAT_8;1705case PIPE_FORMAT_Z32_FLOAT:1706return V_008F14_IMG_DATA_FORMAT_32;1707case PIPE_FORMAT_X32_S8X24_UINT:1708case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:1709return V_008F14_IMG_DATA_FORMAT_X24_8_32;1710default:1711goto out_unknown;1712}17131714case UTIL_FORMAT_COLORSPACE_YUV:1715goto out_unknown; /* TODO */17161717case UTIL_FORMAT_COLORSPACE_SRGB:1718if (desc->nr_channels != 4 && desc->nr_channels != 1)1719goto out_unknown;1720break;17211722default:1723break;1724}17251726if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {1727if (!sscreen->info.has_format_bc1_through_bc7)1728goto out_unknown;17291730switch (format) {1731case PIPE_FORMAT_RGTC1_SNORM:1732case PIPE_FORMAT_LATC1_SNORM:1733case PIPE_FORMAT_RGTC1_UNORM:1734case PIPE_FORMAT_LATC1_UNORM:1735return V_008F14_IMG_DATA_FORMAT_BC4;1736case PIPE_FORMAT_RGTC2_SNORM:1737case PIPE_FORMAT_LATC2_SNORM:1738case PIPE_FORMAT_RGTC2_UNORM:1739case PIPE_FORMAT_LATC2_UNORM:1740return V_008F14_IMG_DATA_FORMAT_BC5;1741default:1742goto out_unknown;1743}1744}17451746if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&1747(sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 ||1748sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) {1749switch (format) {1750case PIPE_FORMAT_ETC1_RGB8:1751case PIPE_FORMAT_ETC2_RGB8:1752case PIPE_FORMAT_ETC2_SRGB8:1753return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;1754case PIPE_FORMAT_ETC2_RGB8A1:1755case PIPE_FORMAT_ETC2_SRGB8A1:1756return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;1757case PIPE_FORMAT_ETC2_RGBA8:1758case PIPE_FORMAT_ETC2_SRGBA8:1759return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;1760case PIPE_FORMAT_ETC2_R11_UNORM:1761case PIPE_FORMAT_ETC2_R11_SNORM:1762return V_008F14_IMG_DATA_FORMAT_ETC2_R;1763case PIPE_FORMAT_ETC2_RG11_UNORM:1764case PIPE_FORMAT_ETC2_RG11_SNORM:1765return V_008F14_IMG_DATA_FORMAT_ETC2_RG;1766default:1767goto out_unknown;1768}1769}17701771if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {1772if (!sscreen->info.has_format_bc1_through_bc7)1773goto out_unknown;17741775switch (format) {1776case PIPE_FORMAT_BPTC_RGBA_UNORM:1777case PIPE_FORMAT_BPTC_SRGBA:1778return V_008F14_IMG_DATA_FORMAT_BC7;1779case PIPE_FORMAT_BPTC_RGB_FLOAT:1780case PIPE_FORMAT_BPTC_RGB_UFLOAT:1781return V_008F14_IMG_DATA_FORMAT_BC6;1782default:1783goto out_unknown;1784}1785}17861787if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {1788switch (format) {1789case PIPE_FORMAT_R8G8_B8G8_UNORM:1790case PIPE_FORMAT_G8R8_B8R8_UNORM:1791return V_008F14_IMG_DATA_FORMAT_GB_GR;1792case PIPE_FORMAT_G8R8_G8B8_UNORM:1793case PIPE_FORMAT_R8G8_R8B8_UNORM:1794return V_008F14_IMG_DATA_FORMAT_BG_RG;1795default:1796goto out_unknown;1797}1798}17991800if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {1801if (!sscreen->info.has_format_bc1_through_bc7)1802goto out_unknown;18031804switch (format) {1805case PIPE_FORMAT_DXT1_RGB:1806case PIPE_FORMAT_DXT1_RGBA:1807case PIPE_FORMAT_DXT1_SRGB:1808case PIPE_FORMAT_DXT1_SRGBA:1809return V_008F14_IMG_DATA_FORMAT_BC1;1810case PIPE_FORMAT_DXT3_RGBA:1811case PIPE_FORMAT_DXT3_SRGBA:1812return V_008F14_IMG_DATA_FORMAT_BC2;1813case PIPE_FORMAT_DXT5_RGBA:1814case PIPE_FORMAT_DXT5_SRGBA:1815return V_008F14_IMG_DATA_FORMAT_BC3;1816default:1817goto out_unknown;1818}1819}18201821if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {1822return V_008F14_IMG_DATA_FORMAT_5_9_9_9;1823} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {1824return V_008F14_IMG_DATA_FORMAT_10_11_11;1825}18261827/* R8G8Bx_SNORM - TODO CxV8U8 */18281829/* hw cannot support mixed formats (except depth/stencil, since only1830* depth is read).*/1831if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)1832goto out_unknown;18331834/* See whether the components are of the same size. */1835for (i = 1; i < desc->nr_channels; i++) {1836uniform = uniform && desc->channel[0].size == desc->channel[i].size;1837}18381839/* Non-uniform formats. */1840if (!uniform) {1841switch (desc->nr_channels) {1842case 3:1843if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&1844desc->channel[2].size == 5) {1845return V_008F14_IMG_DATA_FORMAT_5_6_5;1846}1847goto out_unknown;1848case 4:1849if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&1850desc->channel[2].size == 5 && desc->channel[3].size == 1) {1851return V_008F14_IMG_DATA_FORMAT_1_5_5_5;1852}1853if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&1854desc->channel[2].size == 5 && desc->channel[3].size == 5) {1855return V_008F14_IMG_DATA_FORMAT_5_5_5_1;1856}1857if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&1858desc->channel[2].size == 10 && desc->channel[3].size == 2) {1859return V_008F14_IMG_DATA_FORMAT_2_10_10_10;1860}1861goto out_unknown;1862}1863goto out_unknown;1864}18651866if (first_non_void < 0 || first_non_void > 3)1867goto out_unknown;18681869/* uniform formats */1870switch (desc->channel[first_non_void].size) {1871case 4:1872switch (desc->nr_channels) {1873#if 0 /* Not supported for render targets */1874case 2:1875return V_008F14_IMG_DATA_FORMAT_4_4;1876#endif1877case 4:1878return V_008F14_IMG_DATA_FORMAT_4_4_4_4;1879}1880break;1881case 8:1882switch (desc->nr_channels) {1883case 1:1884return V_008F14_IMG_DATA_FORMAT_8;1885case 2:1886return V_008F14_IMG_DATA_FORMAT_8_8;1887case 4:1888return V_008F14_IMG_DATA_FORMAT_8_8_8_8;1889}1890break;1891case 16:1892switch (desc->nr_channels) {1893case 1:1894return V_008F14_IMG_DATA_FORMAT_16;1895case 2:1896return V_008F14_IMG_DATA_FORMAT_16_16;1897case 4:1898return V_008F14_IMG_DATA_FORMAT_16_16_16_16;1899}1900break;1901case 32:1902switch (desc->nr_channels) {1903case 1:1904return V_008F14_IMG_DATA_FORMAT_32;1905case 2:1906return V_008F14_IMG_DATA_FORMAT_32_32;1907#if 0 /* Not supported for render targets */1908case 3:1909return V_008F14_IMG_DATA_FORMAT_32_32_32;1910#endif1911case 4:1912return V_008F14_IMG_DATA_FORMAT_32_32_32_32;1913}1914}19151916out_unknown:1917return ~0;1918}19191920static unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap)1921{1922if (!screen->info.has_3d_cube_border_color_mipmap) {1923switch (wrap) {1924case PIPE_TEX_WRAP_CLAMP:1925case PIPE_TEX_WRAP_CLAMP_TO_BORDER:1926case PIPE_TEX_WRAP_MIRROR_CLAMP:1927case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:1928return false;1929}1930}1931return true;1932}19331934static unsigned si_tex_wrap(unsigned wrap)1935{1936switch (wrap) {1937default:1938case PIPE_TEX_WRAP_REPEAT:1939return V_008F30_SQ_TEX_WRAP;1940case PIPE_TEX_WRAP_CLAMP:1941return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;1942case PIPE_TEX_WRAP_CLAMP_TO_EDGE:1943return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;1944case PIPE_TEX_WRAP_CLAMP_TO_BORDER:1945return V_008F30_SQ_TEX_CLAMP_BORDER;1946case PIPE_TEX_WRAP_MIRROR_REPEAT:1947return V_008F30_SQ_TEX_MIRROR;1948case PIPE_TEX_WRAP_MIRROR_CLAMP:1949return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;1950case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:1951return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;1952case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:1953return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;1954}1955}19561957static unsigned si_tex_mipfilter(unsigned filter)1958{1959switch (filter) {1960case PIPE_TEX_MIPFILTER_NEAREST:1961return V_008F38_SQ_TEX_Z_FILTER_POINT;1962case PIPE_TEX_MIPFILTER_LINEAR:1963return V_008F38_SQ_TEX_Z_FILTER_LINEAR;1964default:1965case PIPE_TEX_MIPFILTER_NONE:1966return V_008F38_SQ_TEX_Z_FILTER_NONE;1967}1968}19691970static unsigned si_tex_compare(unsigned compare)1971{1972switch (compare) {1973default:1974case PIPE_FUNC_NEVER:1975return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;1976case PIPE_FUNC_LESS:1977return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;1978case PIPE_FUNC_EQUAL:1979return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;1980case PIPE_FUNC_LEQUAL:1981return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;1982case PIPE_FUNC_GREATER:1983return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;1984case PIPE_FUNC_NOTEQUAL:1985return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;1986case PIPE_FUNC_GEQUAL:1987return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;1988case PIPE_FUNC_ALWAYS:1989return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;1990}1991}19921993static unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target,1994unsigned nr_samples)1995{1996unsigned res_target = tex->buffer.b.b.target;19971998if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY)1999res_target = view_target;2000/* If interpreting cubemaps as something else, set 2D_ARRAY. */2001else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY)2002res_target = PIPE_TEXTURE_2D_ARRAY;20032004/* GFX9 allocates 1D textures as 2D. */2005if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) &&2006sscreen->info.chip_class == GFX9 &&2007tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) {2008if (res_target == PIPE_TEXTURE_1D)2009res_target = PIPE_TEXTURE_2D;2010else2011res_target = PIPE_TEXTURE_2D_ARRAY;2012}20132014switch (res_target) {2015default:2016case PIPE_TEXTURE_1D:2017return V_008F1C_SQ_RSRC_IMG_1D;2018case PIPE_TEXTURE_1D_ARRAY:2019return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;2020case PIPE_TEXTURE_2D:2021case PIPE_TEXTURE_RECT:2022return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D;2023case PIPE_TEXTURE_2D_ARRAY:2024return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY;2025case PIPE_TEXTURE_3D:2026return V_008F1C_SQ_RSRC_IMG_3D;2027case PIPE_TEXTURE_CUBE:2028case PIPE_TEXTURE_CUBE_ARRAY:2029return V_008F1C_SQ_RSRC_IMG_CUBE;2030}2031}20322033/*2034* Format support testing2035*/20362037static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)2038{2039struct si_screen *sscreen = (struct si_screen *)screen;20402041if (sscreen->info.chip_class >= GFX10) {2042const struct gfx10_format *fmt = &gfx10_format_table[format];2043if (!fmt->img_format || fmt->buffers_only)2044return false;2045return true;2046}20472048const struct util_format_description *desc = util_format_description(format);2049if (!desc)2050return false;20512052return si_translate_texformat(screen, format, desc,2053util_format_get_first_non_void_channel(format)) != ~0U;2054}20552056static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,2057const struct util_format_description *desc,2058int first_non_void)2059{2060int i;20612062assert(((struct si_screen *)screen)->info.chip_class <= GFX9);20632064if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)2065return V_008F0C_BUF_DATA_FORMAT_10_11_11;20662067assert(first_non_void >= 0);20682069if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 &&2070desc->channel[2].size == 10 && desc->channel[3].size == 2)2071return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;20722073/* See whether the components are of the same size. */2074for (i = 0; i < desc->nr_channels; i++) {2075if (desc->channel[first_non_void].size != desc->channel[i].size)2076return V_008F0C_BUF_DATA_FORMAT_INVALID;2077}20782079switch (desc->channel[first_non_void].size) {2080case 8:2081switch (desc->nr_channels) {2082case 1:2083case 3: /* 3 loads */2084return V_008F0C_BUF_DATA_FORMAT_8;2085case 2:2086return V_008F0C_BUF_DATA_FORMAT_8_8;2087case 4:2088return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;2089}2090break;2091case 16:2092switch (desc->nr_channels) {2093case 1:2094case 3: /* 3 loads */2095return V_008F0C_BUF_DATA_FORMAT_16;2096case 2:2097return V_008F0C_BUF_DATA_FORMAT_16_16;2098case 4:2099return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;2100}2101break;2102case 32:2103switch (desc->nr_channels) {2104case 1:2105return V_008F0C_BUF_DATA_FORMAT_32;2106case 2:2107return V_008F0C_BUF_DATA_FORMAT_32_32;2108case 3:2109return V_008F0C_BUF_DATA_FORMAT_32_32_32;2110case 4:2111return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;2112}2113break;2114case 64:2115/* Legacy double formats. */2116switch (desc->nr_channels) {2117case 1: /* 1 load */2118return V_008F0C_BUF_DATA_FORMAT_32_32;2119case 2: /* 1 load */2120return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;2121case 3: /* 3 loads */2122return V_008F0C_BUF_DATA_FORMAT_32_32;2123case 4: /* 2 loads */2124return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;2125}2126break;2127}21282129return V_008F0C_BUF_DATA_FORMAT_INVALID;2130}21312132static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,2133const struct util_format_description *desc,2134int first_non_void)2135{2136assert(((struct si_screen *)screen)->info.chip_class <= GFX9);21372138if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)2139return V_008F0C_BUF_NUM_FORMAT_FLOAT;21402141assert(first_non_void >= 0);21422143switch (desc->channel[first_non_void].type) {2144case UTIL_FORMAT_TYPE_SIGNED:2145case UTIL_FORMAT_TYPE_FIXED:2146if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)2147return V_008F0C_BUF_NUM_FORMAT_SINT;2148else if (desc->channel[first_non_void].normalized)2149return V_008F0C_BUF_NUM_FORMAT_SNORM;2150else2151return V_008F0C_BUF_NUM_FORMAT_SSCALED;2152break;2153case UTIL_FORMAT_TYPE_UNSIGNED:2154if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer)2155return V_008F0C_BUF_NUM_FORMAT_UINT;2156else if (desc->channel[first_non_void].normalized)2157return V_008F0C_BUF_NUM_FORMAT_UNORM;2158else2159return V_008F0C_BUF_NUM_FORMAT_USCALED;2160break;2161case UTIL_FORMAT_TYPE_FLOAT:2162default:2163return V_008F0C_BUF_NUM_FORMAT_FLOAT;2164}2165}21662167static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format,2168unsigned usage)2169{2170struct si_screen *sscreen = (struct si_screen *)screen;2171const struct util_format_description *desc;2172int first_non_void;2173unsigned data_format;21742175assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) ==21760);21772178desc = util_format_description(format);2179if (!desc)2180return 0;21812182/* There are no native 8_8_8 or 16_16_16 data formats, and we currently2183* select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well2184* for read-only access (with caveats surrounding bounds checks), but2185* obviously fails for write access which we have to implement for2186* shader images. Luckily, OpenGL doesn't expect this to be supported2187* anyway, and so the only impact is on PBO uploads / downloads, which2188* shouldn't be expected to be fast for GL_RGB anyway.2189*/2190if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) {2191if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {2192usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);2193if (!usage)2194return 0;2195}2196}21972198if (sscreen->info.chip_class >= GFX10) {2199const struct gfx10_format *fmt = &gfx10_format_table[format];2200if (!fmt->img_format || fmt->img_format >= 128)2201return 0;2202return usage;2203}22042205first_non_void = util_format_get_first_non_void_channel(format);2206data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);2207if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)2208return 0;22092210return usage;2211}22122213static bool si_is_colorbuffer_format_supported(enum chip_class chip_class,2214enum pipe_format format)2215{2216return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID &&2217si_translate_colorswap(format, false) != ~0U;2218}22192220static bool si_is_zs_format_supported(enum pipe_format format)2221{2222return si_translate_dbformat(format) != V_028040_Z_INVALID;2223}22242225static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format,2226enum pipe_texture_target target, unsigned sample_count,2227unsigned storage_sample_count, unsigned usage)2228{2229struct si_screen *sscreen = (struct si_screen *)screen;2230unsigned retval = 0;22312232if (target >= PIPE_MAX_TEXTURE_TYPES) {2233PRINT_ERR("radeonsi: unsupported texture type %d\n", target);2234return false;2235}22362237if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) &&2238!sscreen->info.has_3d_cube_border_color_mipmap)2239return false;22402241if (util_format_get_num_planes(format) >= 2)2242return false;22432244if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))2245return false;22462247if (sample_count > 1) {2248if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))2249return false;22502251/* Only power-of-two sample counts are supported. */2252if (!util_is_power_of_two_or_zero(sample_count) ||2253!util_is_power_of_two_or_zero(storage_sample_count))2254return false;22552256/* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,2257* so don't expose 16 samples there.2258*/2259const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16;2260const unsigned max_samples = 8;22612262/* MSAA support without framebuffer attachments. */2263if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)2264return true;22652266if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {2267/* Color without EQAA or depth/stencil. */2268if (sample_count > max_samples || sample_count != storage_sample_count)2269return false;2270} else {2271/* Color with EQAA. */2272if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)2273return false;2274}2275}22762277if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) {2278if (target == PIPE_BUFFER) {2279retval |= si_is_vertex_format_supported(2280screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE));2281} else {2282if (si_is_sampler_format_supported(screen, format))2283retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);2284}2285}22862287if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |2288PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&2289si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) {2290retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |2291PIPE_BIND_SHARED);2292if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))2293retval |= usage & PIPE_BIND_BLENDABLE;2294}22952296if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) {2297retval |= PIPE_BIND_DEPTH_STENCIL;2298}22992300if (usage & PIPE_BIND_VERTEX_BUFFER) {2301retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER);2302}23032304if (usage & PIPE_BIND_INDEX_BUFFER) {2305if (format == PIPE_FORMAT_R8_UINT ||2306format == PIPE_FORMAT_R16_UINT ||2307format == PIPE_FORMAT_R32_UINT)2308retval |= PIPE_BIND_INDEX_BUFFER;2309}23102311if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) &&2312!(usage & PIPE_BIND_DEPTH_STENCIL))2313retval |= PIPE_BIND_LINEAR;23142315return retval == usage;2316}23172318/*2319* framebuffer handling2320*/23212322static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap,2323unsigned ntype, bool is_depth)2324{2325struct ac_spi_color_formats formats = {};23262327ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats);23282329surf->spi_shader_col_format = formats.normal;2330surf->spi_shader_col_format_alpha = formats.alpha;2331surf->spi_shader_col_format_blend = formats.blend;2332surf->spi_shader_col_format_blend_alpha = formats.blend_alpha;2333}23342335static void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf)2336{2337struct si_texture *tex = (struct si_texture *)surf->base.texture;2338unsigned color_info, color_attrib;2339unsigned format, swap, ntype, endian;2340const struct util_format_description *desc;2341int firstchan;2342unsigned blend_clamp = 0, blend_bypass = 0;23432344desc = util_format_description(surf->base.format);2345for (firstchan = 0; firstchan < 4; firstchan++) {2346if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {2347break;2348}2349}2350if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {2351ntype = V_028C70_NUMBER_FLOAT;2352} else {2353ntype = V_028C70_NUMBER_UNORM;2354if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)2355ntype = V_028C70_NUMBER_SRGB;2356else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {2357if (desc->channel[firstchan].pure_integer) {2358ntype = V_028C70_NUMBER_SINT;2359} else {2360assert(desc->channel[firstchan].normalized);2361ntype = V_028C70_NUMBER_SNORM;2362}2363} else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {2364if (desc->channel[firstchan].pure_integer) {2365ntype = V_028C70_NUMBER_UINT;2366} else {2367assert(desc->channel[firstchan].normalized);2368ntype = V_028C70_NUMBER_UNORM;2369}2370}2371}23722373format = si_translate_colorformat(sctx->chip_class, surf->base.format);2374if (format == V_028C70_COLOR_INVALID) {2375PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);2376}2377assert(format != V_028C70_COLOR_INVALID);2378swap = si_translate_colorswap(surf->base.format, false);2379endian = si_colorformat_endian_swap(format);23802381/* blend clamp should be set for all NORM/SRGB types */2382if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||2383ntype == V_028C70_NUMBER_SRGB)2384blend_clamp = 1;23852386/* set blend bypass according to docs if SINT/UINT or23878/24 COLOR variants */2388if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||2389format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||2390format == V_028C70_COLOR_X24_8_32_FLOAT) {2391blend_clamp = 0;2392blend_bypass = 1;2393}23942395if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {2396if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 ||2397format == V_028C70_COLOR_8_8_8_8)2398surf->color_is_int8 = true;2399else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10)2400surf->color_is_int10 = true;2401}24022403color_info =2404S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |2405S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |2406S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&2407ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&2408format != V_028C70_COLOR_24_8) |2409S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);24102411/* Intensity is implemented as Red, so treat it that way. */2412color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||2413util_format_is_intensity(surf->base.format));24142415if (tex->buffer.b.b.nr_samples > 1) {2416unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples);2417unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples);24182419color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_fragments);24202421if (tex->surface.fmask_offset) {2422color_info |= S_028C70_COMPRESSION(1);2423unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh);24242425if (sctx->chip_class == GFX6) {2426/* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */2427color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);2428}2429}2430}24312432/* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and2433* 64 for APU because all of our APUs to date use DIMMs which have2434* a request granularity size of 64B while all other chips have a2435* 32B request size */2436unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;2437if (!sctx->screen->info.has_dedicated_vram)2438min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;24392440if (sctx->chip_class >= GFX10) {2441surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |2442S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |2443S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |2444S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks) |2445S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_128B_blocks);2446} else if (sctx->chip_class >= GFX8) {2447unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;24482449if (tex->buffer.b.b.nr_storage_samples > 1) {2450if (tex->surface.bpe == 1)2451max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;2452else if (tex->surface.bpe == 2)2453max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;2454}24552456surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |2457S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |2458S_028C78_INDEPENDENT_64B_BLOCKS(1);2459}24602461/* This must be set for fast clear to work without FMASK. */2462if (!tex->surface.fmask_size && sctx->chip_class == GFX6) {2463unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh);2464color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);2465}24662467/* GFX10 field has the same base shift as the GFX6 field */2468unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |2469S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer);2470unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0);24712472if (sctx->chip_class >= GFX10) {2473color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level);24742475surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) |2476S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) |2477S_028EE0_RESOURCE_LEVEL(1);2478} else if (sctx->chip_class == GFX9) {2479color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level);2480color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |2481S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type);2482}24832484if (sctx->chip_class >= GFX9) {2485surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) |2486S_028C68_MIP0_HEIGHT(surf->height0 - 1) |2487S_028C68_MAX_MIP(tex->buffer.b.b.last_level);2488}24892490surf->cb_color_view = color_view;2491surf->cb_color_info = color_info;2492surf->cb_color_attrib = color_attrib;24932494/* Determine pixel shader export format */2495si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth);24962497surf->color_initialized = true;2498}24992500static void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf)2501{2502struct si_texture *tex = (struct si_texture *)surf->base.texture;2503unsigned level = surf->base.u.tex.level;2504unsigned format, stencil_format;2505uint32_t z_info, s_info;25062507format = si_translate_dbformat(tex->db_render_format);2508stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;25092510assert(format != V_028040_Z_INVALID);2511if (format == V_028040_Z_INVALID)2512PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format);25132514surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |2515S_028008_SLICE_MAX(surf->base.u.tex.last_layer);2516surf->db_htile_data_base = 0;2517surf->db_htile_surface = 0;25182519if (sctx->chip_class >= GFX10) {2520surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) |2521S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11);2522}25232524if (sctx->chip_class >= GFX9) {2525assert(tex->surface.u.gfx9.surf_offset == 0);2526surf->db_depth_base = tex->buffer.gpu_address >> 8;2527surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8;2528z_info = S_028038_FORMAT(format) |2529S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) |2530S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |2531S_028038_MAXMIP(tex->buffer.b.b.last_level);2532s_info = S_02803C_FORMAT(stencil_format) |2533S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);25342535if (sctx->chip_class == GFX9) {2536surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch);2537surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch);2538}2539surf->db_depth_view |= S_028008_MIPID(level);2540surf->db_depth_size =2541S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1);25422543if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {2544z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1);2545s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);25462547if (tex->surface.has_stencil && !tex->htile_stencil_disabled) {2548/* Stencil buffer workaround ported from the GFX6-GFX8 code.2549* See that for explanation.2550*/2551s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1);2552}25532554surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;2555surf->db_htile_surface =2556S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);2557if (sctx->chip_class == GFX9) {2558surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1);2559}2560}2561} else {2562/* GFX6-GFX8 */2563struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level];25642565assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);25662567surf->db_depth_base =2568(tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B;2569surf->db_stencil_base =2570(tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B;25712572z_info =2573S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples));2574s_info = S_028044_FORMAT(stencil_format);2575surf->db_depth_info = 0;25762577if (sctx->chip_class >= GFX7) {2578struct radeon_info *info = &sctx->screen->info;2579unsigned index = tex->surface.u.legacy.tiling_index[level];2580unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level];2581unsigned macro_index = tex->surface.u.legacy.macro_tile_index;2582unsigned tile_mode = info->si_tile_mode_array[index];2583unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];2584unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];25852586surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |2587S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |2588S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |2589S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |2590S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |2591S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));2592z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));2593s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));2594} else {2595unsigned tile_mode_index = si_tile_mode_index(tex, level, false);2596z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);2597tile_mode_index = si_tile_mode_index(tex, level, true);2598s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);2599}26002601surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |2602S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);2603surf->db_depth_slice =2604S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1);26052606if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {2607z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1);2608s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled);26092610if (tex->surface.has_stencil) {2611/* Workaround: For a not yet understood reason, the2612* combination of MSAA, fast stencil clear and stencil2613* decompress messes with subsequent stencil buffer2614* uses. Problem was reproduced on Verde, Bonaire,2615* Tonga, and Carrizo.2616*2617* Disabling EXPCLEAR works around the problem.2618*2619* Check piglit's arb_texture_multisample-stencil-clear2620* test if you want to try changing this.2621*/2622if (tex->buffer.b.b.nr_samples <= 1)2623s_info |= S_028044_ALLOW_EXPCLEAR(1);2624}26252626surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;2627surf->db_htile_surface = S_028ABC_FULL_CACHE(1);2628}2629}26302631surf->db_z_info = z_info;2632surf->db_stencil_info = s_info;26332634surf->depth_initialized = true;2635}26362637void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)2638{2639if (sctx->decompression_enabled)2640return;26412642if (sctx->framebuffer.state.zsbuf) {2643struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;2644struct si_texture *tex = (struct si_texture *)surf->texture;26452646tex->dirty_level_mask |= 1 << surf->u.tex.level;26472648if (tex->surface.has_stencil)2649tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;2650}26512652unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;2653while (compressed_cb_mask) {2654unsigned i = u_bit_scan(&compressed_cb_mask);2655struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];2656struct si_texture *tex = (struct si_texture *)surf->texture;26572658if (tex->surface.fmask_offset) {2659tex->dirty_level_mask |= 1 << surf->u.tex.level;2660tex->fmask_is_identity = false;2661}2662}2663}26642665static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)2666{2667for (int i = 0; i < state->nr_cbufs; ++i) {2668struct si_surface *surf = NULL;2669struct si_texture *tex;26702671if (!state->cbufs[i])2672continue;2673surf = (struct si_surface *)state->cbufs[i];2674tex = (struct si_texture *)surf->base.texture;26752676p_atomic_dec(&tex->framebuffers_bound);2677}2678}26792680void si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex)2681{2682if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty)2683return;26842685if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) {2686struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex);2687if (!entry) {2688struct pipe_resource *dummy = NULL;2689pipe_resource_reference(&dummy, &tex->buffer.b.b);2690_mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex);2691}2692}2693tex->displayable_dcc_dirty = true;2694}26952696static void si_update_display_dcc_dirty(struct si_context *sctx)2697{2698const struct pipe_framebuffer_state *state = &sctx->framebuffer.state;26992700for (unsigned i = 0; i < state->nr_cbufs; i++) {2701if (state->cbufs[i])2702si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture);2703}2704}27052706static void si_set_framebuffer_state(struct pipe_context *ctx,2707const struct pipe_framebuffer_state *state)2708{2709struct si_context *sctx = (struct si_context *)ctx;2710struct si_surface *surf = NULL;2711struct si_texture *tex;2712bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;2713unsigned old_nr_samples = sctx->framebuffer.nr_samples;2714unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;2715bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;2716bool old_has_stencil =2717old_has_zsbuf &&2718((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;2719bool unbound = false;2720int i;27212722/* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs2723* when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.2724* We could implement the full workaround here, but it's a useless case.2725*/2726if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) {2727unreachable("the framebuffer shouldn't have zero area");2728return;2729}27302731si_update_fb_dirtiness_after_rendering(sctx);27322733/* Disable DCC if the formats are incompatible. */2734for (i = 0; i < state->nr_cbufs; i++) {2735if (!state->cbufs[i])2736continue;27372738surf = (struct si_surface *)state->cbufs[i];2739tex = (struct si_texture *)surf->base.texture;27402741if (!surf->dcc_incompatible)2742continue;27432744/* Since the DCC decompression calls back into set_framebuffer-2745* _state, we need to unbind the framebuffer, so that2746* vi_separate_dcc_stop_query isn't called twice with the same2747* color buffer.2748*/2749if (!unbound) {2750util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);2751unbound = true;2752}27532754if (vi_dcc_enabled(tex, surf->base.u.tex.level))2755if (!si_texture_disable_dcc(sctx, tex))2756si_decompress_dcc(sctx, tex);27572758surf->dcc_incompatible = false;2759}27602761/* Only flush TC when changing the framebuffer state, because2762* the only client not using TC that can change textures is2763* the framebuffer.2764*2765* Wait for compute shaders because of possible transitions:2766* - FB write -> shader read2767* - shader write -> FB read2768*2769* DB caches are flushed on demand (using si_decompress_textures).2770*2771* When MSAA is enabled, CB and TC caches are flushed on demand2772* (after FMASK decompression). Shader write -> FB read transitions2773* cannot happen for MSAA textures, because MSAA shader images are2774* not supported.2775*2776* Only flush and wait for CB if there is actually a bound color buffer.2777*/2778if (sctx->framebuffer.uncompressed_cb_mask) {2779si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,2780sctx->framebuffer.CB_has_shader_readable_metadata,2781sctx->framebuffer.all_DCC_pipe_aligned);2782}27832784sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;27852786/* u_blitter doesn't invoke depth decompression when it does multiple2787* blits in a row, but the only case when it matters for DB is when2788* doing generate_mipmap. So here we flush DB manually between2789* individual generate_mipmap blits.2790* Note that lower mipmap levels aren't compressed.2791*/2792if (sctx->generate_mipmap_for_depth) {2793si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata);2794} else if (sctx->chip_class == GFX9) {2795/* It appears that DB metadata "leaks" in a sequence of:2796* - depth clear2797* - DCC decompress for shader image writes (with DB disabled)2798* - render with DEPTH_BEFORE_SHADER=12799* Flushing DB metadata works around the problem.2800*/2801sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;2802}28032804/* Take the maximum of the old and new count. If the new count is lower,2805* dirtying is needed to disable the unbound colorbuffers.2806*/2807sctx->framebuffer.dirty_cbufs |=2808(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;2809sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;28102811si_dec_framebuffer_counters(&sctx->framebuffer.state);2812util_copy_framebuffer_state(&sctx->framebuffer.state, state);28132814sctx->framebuffer.colorbuf_enabled_4bit = 0;2815sctx->framebuffer.spi_shader_col_format = 0;2816sctx->framebuffer.spi_shader_col_format_alpha = 0;2817sctx->framebuffer.spi_shader_col_format_blend = 0;2818sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;2819sctx->framebuffer.color_is_int8 = 0;2820sctx->framebuffer.color_is_int10 = 0;28212822sctx->framebuffer.compressed_cb_mask = 0;2823sctx->framebuffer.uncompressed_cb_mask = 0;2824sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);2825sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;2826sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);2827sctx->framebuffer.any_dst_linear = false;2828sctx->framebuffer.CB_has_shader_readable_metadata = false;2829sctx->framebuffer.DB_has_shader_readable_metadata = false;2830sctx->framebuffer.all_DCC_pipe_aligned = true;2831sctx->framebuffer.has_dcc_msaa = false;2832sctx->framebuffer.min_bytes_per_pixel = 0;28332834for (i = 0; i < state->nr_cbufs; i++) {2835if (!state->cbufs[i])2836continue;28372838surf = (struct si_surface *)state->cbufs[i];2839tex = (struct si_texture *)surf->base.texture;28402841if (!surf->color_initialized) {2842si_initialize_color_surface(sctx, surf);2843}28442845sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);2846sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4);2847sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4);2848sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4);2849sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha2850<< (i * 4);28512852if (surf->color_is_int8)2853sctx->framebuffer.color_is_int8 |= 1 << i;2854if (surf->color_is_int10)2855sctx->framebuffer.color_is_int10 |= 1 << i;28562857if (tex->surface.fmask_offset)2858sctx->framebuffer.compressed_cb_mask |= 1 << i;2859else2860sctx->framebuffer.uncompressed_cb_mask |= 1 << i;28612862/* Don't update nr_color_samples for non-AA buffers.2863* (e.g. destination of MSAA resolve)2864*/2865if (tex->buffer.b.b.nr_samples >= 2 &&2866tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {2867sctx->framebuffer.nr_color_samples =2868MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples);2869sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples);2870}28712872if (tex->surface.is_linear)2873sctx->framebuffer.any_dst_linear = true;28742875if (vi_dcc_enabled(tex, surf->base.u.tex.level)) {2876sctx->framebuffer.CB_has_shader_readable_metadata = true;28772878if (sctx->chip_class >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned)2879sctx->framebuffer.all_DCC_pipe_aligned = false;28802881if (tex->buffer.b.b.nr_storage_samples >= 2)2882sctx->framebuffer.has_dcc_msaa = true;2883}28842885si_context_add_resource_size(sctx, surf->base.texture);28862887p_atomic_inc(&tex->framebuffers_bound);28882889/* Update the minimum but don't keep 0. */2890if (!sctx->framebuffer.min_bytes_per_pixel ||2891tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)2892sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe;2893}28942895/* For optimal DCC performance. */2896if (sctx->chip_class >= GFX10)2897sctx->framebuffer.dcc_overwrite_combiner_watermark = 6;2898else2899sctx->framebuffer.dcc_overwrite_combiner_watermark = 4;29002901struct si_texture *zstex = NULL;29022903if (state->zsbuf) {2904surf = (struct si_surface *)state->zsbuf;2905zstex = (struct si_texture *)surf->base.texture;29062907if (!surf->depth_initialized) {2908si_init_depth_surface(sctx, surf);2909}29102911if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS))2912sctx->framebuffer.DB_has_shader_readable_metadata = true;29132914si_context_add_resource_size(sctx, surf->base.texture);29152916/* Update the minimum but don't keep 0. */2917if (!sctx->framebuffer.min_bytes_per_pixel ||2918zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)2919sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe;2920}29212922si_update_ps_colorbuf0_slot(sctx);2923si_update_poly_offset_state(sctx);2924si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);2925si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);29262927/* NGG cull state uses the sample count. */2928if (sctx->screen->use_ngg_culling)2929si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);29302931if (sctx->screen->dpbb_allowed)2932si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);29332934if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)2935si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);29362937if (sctx->screen->has_out_of_order_rast &&2938(sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||2939!!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||2940(zstex && zstex->surface.has_stencil != old_has_stencil)))2941si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);29422943if (sctx->framebuffer.nr_samples != old_nr_samples) {2944struct pipe_constant_buffer constbuf = {0};29452946si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);2947si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);29482949if (!sctx->sample_pos_buffer) {2950sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT,2951sizeof(sctx->sample_positions),2952&sctx->sample_positions);2953}2954constbuf.buffer = sctx->sample_pos_buffer;29552956/* Set sample locations as fragment shader constants. */2957switch (sctx->framebuffer.nr_samples) {2958case 1:2959constbuf.buffer_offset = 0;2960break;2961case 2:2962constbuf.buffer_offset =2963(ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1;2964break;2965case 4:2966constbuf.buffer_offset =2967(ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1;2968break;2969case 8:2970constbuf.buffer_offset =2971(ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1;2972break;2973case 16:2974constbuf.buffer_offset =2975(ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1;2976break;2977default:2978PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples);2979assert(0);2980}2981constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;2982si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);29832984si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);2985}29862987sctx->do_update_shaders = true;29882989if (!sctx->decompression_enabled) {2990/* Prevent textures decompression when the framebuffer state2991* changes come from the decompression passes themselves.2992*/2993sctx->need_check_render_feedback = true;2994}2995}29962997static void si_emit_framebuffer_state(struct si_context *sctx)2998{2999struct radeon_cmdbuf *cs = &sctx->gfx_cs;3000struct pipe_framebuffer_state *state = &sctx->framebuffer.state;3001unsigned i, nr_cbufs = state->nr_cbufs;3002struct si_texture *tex = NULL;3003struct si_surface *cb = NULL;3004unsigned cb_color_info = 0;30053006radeon_begin(cs);30073008/* Colorbuffers. */3009for (i = 0; i < nr_cbufs; i++) {3010uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;3011unsigned cb_color_attrib;30123013if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))3014continue;30153016cb = (struct si_surface *)state->cbufs[i];3017if (!cb) {3018radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,3019S_028C70_FORMAT(V_028C70_COLOR_INVALID));3020continue;3021}30223023tex = (struct si_texture *)cb->base.texture;3024radeon_add_to_buffer_list(3025sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,3026tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER);30273028if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {3029radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,3030RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,3031RADEON_PRIO_SEPARATE_META);3032}30333034/* Compute mutable surface parameters. */3035cb_color_base = tex->buffer.gpu_address >> 8;3036cb_color_fmask = 0;3037cb_color_cmask = tex->cmask_base_address_reg;3038cb_dcc_base = 0;3039cb_color_info = cb->cb_color_info | tex->cb_color_info;3040cb_color_attrib = cb->cb_color_attrib;30413042if (tex->swap_rgb_to_bgr) {3043/* Swap R and B channels. */3044static unsigned rgb_to_bgr[4] = {3045[V_028C70_SWAP_STD] = V_028C70_SWAP_ALT,3046[V_028C70_SWAP_ALT] = V_028C70_SWAP_STD,3047[V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV,3048[V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV,3049};3050unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)];30513052cb_color_info &= C_028C70_COMP_SWAP;3053cb_color_info |= S_028C70_COMP_SWAP(swap);3054}30553056if (cb->base.u.tex.level > 0)3057cb_color_info &= C_028C70_FAST_CLEAR;30583059if (tex->surface.fmask_offset) {3060cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8;3061cb_color_fmask |= tex->surface.fmask_tile_swizzle;3062}30633064/* Set up DCC. */3065if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {3066bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 &&3067state->cbufs[1] == &cb->base &&3068state->cbufs[1]->texture->nr_samples <= 1;30693070if (!is_msaa_resolve_dst)3071cb_color_info |= S_028C70_DCC_ENABLE(1);30723073cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;30743075unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;3076dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;3077cb_dcc_base |= dcc_tile_swizzle;3078}30793080if (sctx->chip_class >= GFX10) {3081unsigned cb_color_attrib3;30823083/* Set mutable surface parameters. */3084cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;3085cb_color_base |= tex->surface.tile_swizzle;3086if (!tex->surface.fmask_offset)3087cb_color_fmask = cb_color_base;3088if (cb->base.u.tex.level > 0)3089cb_color_cmask = cb_color_base;30903091cb_color_attrib3 = cb->cb_color_attrib3 |3092S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |3093S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |3094S_028EE0_CMASK_PIPE_ALIGNED(1) |3095S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);30963097radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 14);3098radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */3099radeon_emit(cs, 0); /* hole */3100radeon_emit(cs, 0); /* hole */3101radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */3102radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */3103radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */3104radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */3105radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */3106radeon_emit(cs, 0); /* hole */3107radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */3108radeon_emit(cs, 0); /* hole */3109radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */3110radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */3111radeon_emit(cs, cb_dcc_base); /* CB_COLOR0_DCC_BASE */31123113radeon_set_context_reg(cs, R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);3114radeon_set_context_reg(cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4,3115cb_color_cmask >> 32);3116radeon_set_context_reg(cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4,3117cb_color_fmask >> 32);3118radeon_set_context_reg(cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);3119radeon_set_context_reg(cs, R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);3120radeon_set_context_reg(cs, R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);3121} else if (sctx->chip_class == GFX9) {3122struct gfx9_surf_meta_flags meta = {3123.rb_aligned = 1,3124.pipe_aligned = 1,3125};31263127if (!tex->is_depth && tex->surface.meta_offset)3128meta = tex->surface.u.gfx9.color.dcc;31293130/* Set mutable surface parameters. */3131cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;3132cb_color_base |= tex->surface.tile_swizzle;3133if (!tex->surface.fmask_offset)3134cb_color_fmask = cb_color_base;3135if (cb->base.u.tex.level > 0)3136cb_color_cmask = cb_color_base;3137cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |3138S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |3139S_028C74_RB_ALIGNED(meta.rb_aligned) |3140S_028C74_PIPE_ALIGNED(meta.pipe_aligned);31413142radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);3143radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */3144radeon_emit(cs, S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */3145radeon_emit(cs, cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */3146radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */3147radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */3148radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */3149radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */3150radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */3151radeon_emit(cs, S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */3152radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */3153radeon_emit(cs, S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */3154radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */3155radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */3156radeon_emit(cs, cb_dcc_base); /* CB_COLOR0_DCC_BASE */3157radeon_emit(cs, S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */31583159radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4,3160S_0287A0_EPITCH(tex->surface.u.gfx9.epitch));3161} else {3162/* Compute mutable surface parameters (GFX6-GFX8). */3163const struct legacy_surf_level *level_info =3164&tex->surface.u.legacy.level[cb->base.u.tex.level];3165unsigned pitch_tile_max, slice_tile_max, tile_mode_index;3166unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;31673168cb_color_base += level_info->offset_256B;3169/* Only macrotiled modes can set tile swizzle. */3170if (level_info->mode == RADEON_SURF_MODE_2D)3171cb_color_base |= tex->surface.tile_swizzle;31723173if (!tex->surface.fmask_offset)3174cb_color_fmask = cb_color_base;3175if (cb->base.u.tex.level > 0)3176cb_color_cmask = cb_color_base;3177if (cb_dcc_base)3178cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8;31793180pitch_tile_max = level_info->nblk_x / 8 - 1;3181slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1;3182tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);31833184cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);3185cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);3186cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);31873188if (tex->surface.fmask_offset) {3189if (sctx->chip_class >= GFX7)3190cb_color_pitch |=3191S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1);3192cb_color_attrib |=3193S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);3194cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max);3195} else {3196/* This must be set for fast clear to work without FMASK. */3197if (sctx->chip_class >= GFX7)3198cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);3199cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);3200cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);3201}32023203radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,3204sctx->chip_class >= GFX8 ? 14 : 13);3205radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */3206radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */3207radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */3208radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */3209radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */3210radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */3211radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */3212radeon_emit(cs, cb_color_cmask); /* CB_COLOR0_CMASK */3213radeon_emit(cs, tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */3214radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */3215radeon_emit(cs, cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */3216radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */3217radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */32183219if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */3220radeon_emit(cs, cb_dcc_base);3221}3222}3223for (; i < 8; i++)3224if (sctx->framebuffer.dirty_cbufs & (1 << i))3225radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);32263227/* ZS buffer. */3228if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {3229struct si_surface *zb = (struct si_surface *)state->zsbuf;3230struct si_texture *tex = (struct si_texture *)zb->base.texture;3231unsigned db_z_info = zb->db_z_info;3232unsigned db_stencil_info = zb->db_stencil_info;3233unsigned db_htile_surface = zb->db_htile_surface;32343235radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE,3236zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA3237: RADEON_PRIO_DEPTH_BUFFER);32383239/* Set fields dependent on tc_compatile_htile. */3240if (sctx->chip_class >= GFX9 &&3241vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {3242unsigned max_zplanes = 4;32433244if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)3245max_zplanes = 2;32463247db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);32483249if (sctx->chip_class >= GFX10) {3250db_z_info |= S_028040_ITERATE_FLUSH(1);3251db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled);3252} else {3253db_z_info |= S_028038_ITERATE_FLUSH(1);3254db_stencil_info |= S_02803C_ITERATE_FLUSH(1);3255}3256}32573258unsigned level = zb->base.u.tex.level;32593260if (sctx->chip_class >= GFX10) {3261radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);3262radeon_set_context_reg(cs, R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);32633264radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 7);3265radeon_emit(cs, S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */3266radeon_emit(cs, db_z_info | /* DB_Z_INFO */3267S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));3268radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */3269radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */3270radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */3271radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */3272radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */32733274radeon_set_context_reg_seq(cs, R_028068_DB_Z_READ_BASE_HI, 5);3275radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */3276radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */3277radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */3278radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */3279radeon_emit(cs, zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */3280} else if (sctx->chip_class == GFX9) {3281radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);3282radeon_emit(cs, zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */3283radeon_emit(cs,3284S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */3285radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */32863287radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);3288radeon_emit(cs, db_z_info | /* DB_Z_INFO */3289S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));3290radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */3291radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */3292radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */3293radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */3294radeon_emit(cs, S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */3295radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */3296radeon_emit(cs, S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */3297radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */3298radeon_emit(cs,3299S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */33003301radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2);3302radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */3303radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */3304} else {3305/* GFX6-GFX8 */3306/* Set fields dependent on tc_compatile_htile. */3307if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {3308if (tex->tc_compatible_htile) {3309db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);33103311/* 0 = full compression. N = only compress up to N-1 Z planes. */3312if (tex->buffer.b.b.nr_samples <= 1)3313db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);3314else if (tex->buffer.b.b.nr_samples <= 4)3315db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);3316else3317db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);3318}3319}33203321radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);33223323radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);3324radeon_emit(cs, zb->db_depth_info | /* DB_DEPTH_INFO */3325S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile));3326radeon_emit(cs, db_z_info | /* DB_Z_INFO */3327S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));3328radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */3329radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */3330radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */3331radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */3332radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */3333radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */3334radeon_emit(cs, zb->db_depth_slice); /* DB_DEPTH_SLICE */3335}33363337radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);3338radeon_emit(cs, tex->stencil_clear_value[level]); /* R_028028_DB_STENCIL_CLEAR */3339radeon_emit(cs, fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */33403341radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);3342radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);3343} else if (sctx->framebuffer.dirty_zsbuf) {3344if (sctx->chip_class == GFX9)3345radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2);3346else3347radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);33483349radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */3350radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */3351}33523353/* Framebuffer dimensions. */3354/* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */3355radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,3356S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));33573358if (sctx->screen->dpbb_allowed) {3359radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));3360radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));3361}3362radeon_end();33633364si_update_display_dcc_dirty(sctx);33653366sctx->framebuffer.dirty_cbufs = 0;3367sctx->framebuffer.dirty_zsbuf = false;3368}33693370static void si_emit_msaa_sample_locs(struct si_context *sctx)3371{3372struct radeon_cmdbuf *cs = &sctx->gfx_cs;3373struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;3374unsigned nr_samples = sctx->framebuffer.nr_samples;3375bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug;33763377/* Smoothing (only possible with nr_samples == 1) uses the same3378* sample locations as the MSAA it simulates.3379*/3380if (nr_samples <= 1 && sctx->smoothing_enabled)3381nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;33823383/* On Polaris, the small primitive filter uses the sample locations3384* even when MSAA is off, so we need to make sure they're set to 0.3385*3386* GFX10 uses sample locations unconditionally, so they always need3387* to be set up.3388*/3389if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->chip_class >= GFX10) &&3390nr_samples != sctx->sample_locs_num_samples) {3391sctx->sample_locs_num_samples = nr_samples;3392si_emit_sample_locations(cs, nr_samples);3393}33943395radeon_begin(cs);33963397if (sctx->family >= CHIP_POLARIS10) {3398unsigned small_prim_filter_cntl =3399S_028830_SMALL_PRIM_FILTER_ENABLE(1) |3400/* line bug */3401S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);34023403/* For hardware with the sample location bug, the problem is that in order to use the small3404* primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't3405* properly process the change of sample locations without a flush, and so we can end up3406* with incorrect Z values.3407*3408* Instead of doing a flush, just disable the small primitive filter when MSAA is3409* force-disabled.3410*3411* The alternative of setting sample locations to 0 would require a DB flush to avoid3412* Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=969083413*/3414if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable)3415small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;34163417radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,3418SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);3419}34203421/* The exclusion bits can be set to improve rasterization efficiency3422* if no sample lies on the pixel boundary (-8 sample offset).3423*/3424bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16);3425radeon_opt_set_context_reg(3426sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,3427S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));3428radeon_end();3429}34303431static bool si_out_of_order_rasterization(struct si_context *sctx)3432{3433struct si_state_blend *blend = sctx->queued.named.blend;3434struct si_state_dsa *dsa = sctx->queued.named.dsa;34353436if (!sctx->screen->has_out_of_order_rast)3437return false;34383439unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;34403441colormask &= blend->cb_target_enabled_4bit;34423443/* Conservative: No logic op. */3444if (colormask && blend->logicop_enable)3445return false;34463447struct si_dsa_order_invariance dsa_order_invariant = {.zs = true,3448.pass_set = true,3449.pass_last = false};34503451if (sctx->framebuffer.state.zsbuf) {3452struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;3453bool has_stencil = zstex->surface.has_stencil;3454dsa_order_invariant = dsa->order_invariance[has_stencil];3455if (!dsa_order_invariant.zs)3456return false;34573458/* The set of PS invocations is always order invariant,3459* except when early Z/S tests are requested. */3460if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory &&3461sctx->shader.ps.cso->info.base.fs.early_fragment_tests &&3462!dsa_order_invariant.pass_set)3463return false;34643465if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set)3466return false;3467}34683469if (!colormask)3470return true;34713472unsigned blendmask = colormask & blend->blend_enable_4bit;34733474if (blendmask) {3475/* Only commutative blending. */3476if (blendmask & ~blend->commutative_4bit)3477return false;34783479if (!dsa_order_invariant.pass_set)3480return false;3481}34823483if (colormask & ~blendmask) {3484if (!dsa_order_invariant.pass_last)3485return false;3486}34873488return true;3489}34903491static void si_emit_msaa_config(struct si_context *sctx)3492{3493struct radeon_cmdbuf *cs = &sctx->gfx_cs;3494unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;3495/* 33% faster rendering to linear color buffers */3496bool dst_is_linear = sctx->framebuffer.any_dst_linear;3497bool out_of_order_rast = si_out_of_order_rasterization(sctx);3498unsigned sc_mode_cntl_1 =3499S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |3500S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |3501S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |3502S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |3503/* always 1: */3504S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |3505S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |3506S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);3507unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |3508S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);3509unsigned coverage_samples, color_samples, z_samples;3510struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;35113512/* S: Coverage samples (up to 16x):3513* - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES)3514* - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES)3515*3516* Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples):3517* - Value seen by DB (DB_Z_INFO.NUM_SAMPLES)3518* - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES)3519* # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or3520* # from the closest defined sample if Z is uncompressed (same quality as the number of3521* # Z samples).3522*3523* F: Color samples (up to 8x, must be <= coverage samples):3524* - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS)3525* - PS iter samples (DB_EQAA.PS_ITER_SAMPLES)3526*3527* Can be anything between coverage and color samples:3528* - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES)3529* - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES)3530* - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES)3531* - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE)3532* # All are currently set the same as coverage samples.3533*3534* If color samples < coverage samples, FMASK has a higher bpp to store an "unknown"3535* flag for undefined color samples. A shader-based resolve must handle unknowns3536* or mask them out with AND. Unknowns can also be guessed from neighbors via3537* an edge-detect shader-based resolve, which is required to make "color samples = 1"3538* useful. The CB resolve always drops unknowns.3539*3540* Sensible AA configurations:3541* EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed3542* EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed3543* EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed3544* EQAA 8s 8z 8f = 8x MSAA3545* EQAA 8s 8z 4f - might look the same as 8x MSAA3546* EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry3547* EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed3548* EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed3549* EQAA 4s 4z 4f = 4x MSAA3550* EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry3551* EQAA 2s 2z 2f = 2x MSAA3552*/3553coverage_samples = color_samples = z_samples = si_get_num_coverage_samples(sctx);35543555if (sctx->framebuffer.nr_samples > 1 && rs->multisample_enable) {3556color_samples = sctx->framebuffer.nr_color_samples;35573558if (sctx->framebuffer.state.zsbuf) {3559z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples;3560z_samples = MAX2(1, z_samples);3561} else {3562z_samples = coverage_samples;3563}3564}35653566/* Required by OpenGL line rasterization.3567*3568* TODO: We should also enable perpendicular endcaps for AA lines,3569* but that requires implementing line stippling in the pixel3570* shader. SC can only do line stippling with axis-aligned3571* endcaps.3572*/3573unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);3574unsigned sc_aa_config = 0;35753576if (coverage_samples > 1) {3577/* distance from the pixel center, indexed by log2(nr_samples) */3578static unsigned max_dist[] = {35790, /* unused */35804, /* 2x MSAA */35816, /* 4x MSAA */35827, /* 8x MSAA */35838, /* 16x MSAA */3584};3585unsigned log_samples = util_logbase2(coverage_samples);3586unsigned log_z_samples = util_logbase2(z_samples);3587unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);3588unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);35893590sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);3591sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |3592S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |3593S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |3594S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3);35953596if (sctx->framebuffer.nr_samples > 1) {3597db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |3598S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |3599S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |3600S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);3601sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);3602} else if (sctx->smoothing_enabled) {3603db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples);3604}3605}36063607radeon_begin(cs);36083609/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */3610radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,3611sc_line_cntl, sc_aa_config);3612/* R_028804_DB_EQAA */3613radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);3614/* R_028A4C_PA_SC_MODE_CNTL_1 */3615radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,3616sc_mode_cntl_1);3617radeon_end_update_context_roll(sctx);3618}36193620void si_update_ps_iter_samples(struct si_context *sctx)3621{3622if (sctx->framebuffer.nr_samples > 1)3623si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);3624if (sctx->screen->dpbb_allowed)3625si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state);3626}36273628static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)3629{3630struct si_context *sctx = (struct si_context *)ctx;36313632/* The hardware can only do sample shading with 2^n samples. */3633min_samples = util_next_power_of_two(min_samples);36343635if (sctx->ps_iter_samples == min_samples)3636return;36373638sctx->ps_iter_samples = min_samples;3639sctx->do_update_shaders = true;36403641si_update_ps_iter_samples(sctx);3642}36433644/*3645* Samplers3646*/36473648/**3649* Build the sampler view descriptor for a buffer texture.3650* @param state 256-bit descriptor; only the high 128 bits are filled in3651*/3652void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,3653enum pipe_format format, unsigned offset, unsigned size,3654uint32_t *state)3655{3656const struct util_format_description *desc;3657unsigned stride;3658unsigned num_records;36593660desc = util_format_description(format);3661stride = desc->block.bits / 8;36623663num_records = size / stride;3664num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);36653666/* The NUM_RECORDS field has a different meaning depending on the chip,3667* instruction type, STRIDE, and SWIZZLE_ENABLE.3668*3669* GFX6-7,10:3670* - If STRIDE == 0, it's in byte units.3671* - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.3672*3673* GFX8:3674* - For SMEM and STRIDE == 0, it's in byte units.3675* - For SMEM and STRIDE != 0, it's in units of STRIDE.3676* - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.3677* - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.3678* NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-3679* ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when3680* using SMEM. This can be done in the shader by clearing STRIDE with s_and.3681* That way the same descriptor can be used by both SMEM and VMEM.3682*3683* GFX9:3684* - For SMEM and STRIDE == 0, it's in byte units.3685* - For SMEM and STRIDE != 0, it's in units of STRIDE.3686* - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.3687* - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.3688*/3689if (screen->info.chip_class == GFX8)3690num_records *= stride;36913692state[4] = 0;3693state[5] = S_008F04_STRIDE(stride);3694state[6] = num_records;3695state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |3696S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |3697S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |3698S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));36993700if (screen->info.chip_class >= GFX10) {3701const struct gfx10_format *fmt = &gfx10_format_table[format];37023703/* OOB_SELECT chooses the out-of-bounds check:3704* - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)3705* - 1: index >= NUM_RECORDS3706* - 2: NUM_RECORDS == 03707* - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS3708* else: swizzle_address >= NUM_RECORDS3709*/3710state[7] |= S_008F0C_FORMAT(fmt->img_format) |3711S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |3712S_008F0C_RESOURCE_LEVEL(1);3713} else {3714int first_non_void;3715unsigned num_format, data_format;37163717first_non_void = util_format_get_first_non_void_channel(format);3718num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);3719data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);37203721state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);3722}3723}37243725static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])3726{3727unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;37283729if (swizzle[3] == PIPE_SWIZZLE_X) {3730/* For the pre-defined border color values (white, opaque3731* black, transparent black), the only thing that matters is3732* that the alpha channel winds up in the correct place3733* (because the RGB channels are all the same) so either of3734* these enumerations will work.3735*/3736if (swizzle[2] == PIPE_SWIZZLE_Y)3737bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;3738else3739bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;3740} else if (swizzle[0] == PIPE_SWIZZLE_X) {3741if (swizzle[1] == PIPE_SWIZZLE_Y)3742bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;3743else3744bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;3745} else if (swizzle[1] == PIPE_SWIZZLE_X) {3746bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;3747} else if (swizzle[2] == PIPE_SWIZZLE_X) {3748bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;3749}37503751return bc_swizzle;3752}37533754/**3755* Build the sampler view descriptor for a texture.3756*/3757static void gfx10_make_texture_descriptor(3758struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target,3759enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level,3760unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height,3761unsigned depth, uint32_t *state, uint32_t *fmask_state)3762{3763struct pipe_resource *res = &tex->buffer.b.b;3764const struct util_format_description *desc;3765unsigned img_format;3766unsigned char swizzle[4];3767unsigned type;3768uint64_t va;37693770desc = util_format_description(pipe_format);3771img_format = gfx10_format_table[pipe_format].img_format;37723773if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {3774const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};3775const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};3776const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};3777bool is_stencil = false;37783779switch (pipe_format) {3780case PIPE_FORMAT_S8_UINT_Z24_UNORM:3781case PIPE_FORMAT_X32_S8X24_UINT:3782case PIPE_FORMAT_X8Z24_UNORM:3783util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);3784is_stencil = true;3785break;3786case PIPE_FORMAT_X24S8_UINT:3787/*3788* X24S8 is implemented as an 8_8_8_8 data format, to3789* fix texture gathers. This affects at least3790* GL45-CTS.texture_cube_map_array.sampling on GFX8.3791*/3792util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);3793is_stencil = true;3794break;3795default:3796util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);3797is_stencil = pipe_format == PIPE_FORMAT_S8_UINT;3798}37993800if (tex->upgraded_depth && !is_stencil) {3801assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT);3802img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP;3803}3804} else {3805util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);3806}38073808if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) {3809/* For the purpose of shader images, treat cube maps as 2D3810* arrays.3811*/3812type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;3813} else {3814type = si_tex_dim(screen, tex, target, res->nr_samples);3815}38163817if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {3818height = 1;3819depth = res->array_size;3820} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {3821if (sampler || res->target != PIPE_TEXTURE_3D)3822depth = res->array_size;3823} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)3824depth = res->array_size / 6;38253826state[0] = 0;3827state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);3828state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |3829S_00A008_RESOURCE_LEVEL(1);3830state[3] =3831S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) |3832S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |3833S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |3834S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) |3835S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |3836S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |3837S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);3838/* Depth is the the last accessible layer on gfx9+. The hw doesn't need3839* to know the total number of layers.3840*/3841state[4] =3842S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |3843S_00A010_BASE_ARRAY(first_layer);3844state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |3845S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)3846: tex->buffer.b.b.last_level) |3847S_00A014_PERF_MOD(4);3848state[6] = 0;3849state[7] = 0;38503851if (vi_dcc_enabled(tex, first_level)) {3852state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |3853S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) |3854S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));3855}38563857/* Initialize the sampler view for FMASK. */3858if (tex->surface.fmask_offset) {3859uint32_t format;38603861va = tex->buffer.gpu_address + tex->surface.fmask_offset;38623863#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))3864switch (FMASK(res->nr_samples, res->nr_storage_samples)) {3865case FMASK(2, 1):3866format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1;3867break;3868case FMASK(2, 2):3869format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;3870break;3871case FMASK(4, 1):3872format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1;3873break;3874case FMASK(4, 2):3875format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2;3876break;3877case FMASK(4, 4):3878format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;3879break;3880case FMASK(8, 1):3881format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1;3882break;3883case FMASK(8, 2):3884format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2;3885break;3886case FMASK(8, 4):3887format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4;3888break;3889case FMASK(8, 8):3890format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;3891break;3892case FMASK(16, 1):3893format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1;3894break;3895case FMASK(16, 2):3896format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2;3897break;3898case FMASK(16, 4):3899format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4;3900break;3901case FMASK(16, 8):3902format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8;3903break;3904default:3905unreachable("invalid nr_samples");3906}3907#undef FMASK3908fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;3909fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |3910S_00A004_WIDTH_LO(width - 1);3911fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |3912S_00A008_RESOURCE_LEVEL(1);3913fmask_state[3] =3914S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |3915S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |3916S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) |3917S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0));3918fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);3919fmask_state[5] = 0;3920fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);3921fmask_state[7] = 0;3922}3923}39243925/**3926* Build the sampler view descriptor for a texture (SI-GFX9).3927*/3928static void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex,3929bool sampler, enum pipe_texture_target target,3930enum pipe_format pipe_format,3931const unsigned char state_swizzle[4], unsigned first_level,3932unsigned last_level, unsigned first_layer,3933unsigned last_layer, unsigned width, unsigned height,3934unsigned depth, uint32_t *state, uint32_t *fmask_state)3935{3936struct pipe_resource *res = &tex->buffer.b.b;3937const struct util_format_description *desc;3938unsigned char swizzle[4];3939int first_non_void;3940unsigned num_format, data_format, type, num_samples;3941uint64_t va;39423943desc = util_format_description(pipe_format);39443945num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples)3946: MAX2(1, res->nr_storage_samples);39473948if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {3949const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};3950const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};3951const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};39523953switch (pipe_format) {3954case PIPE_FORMAT_S8_UINT_Z24_UNORM:3955case PIPE_FORMAT_X32_S8X24_UINT:3956case PIPE_FORMAT_X8Z24_UNORM:3957util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);3958break;3959case PIPE_FORMAT_X24S8_UINT:3960/*3961* X24S8 is implemented as an 8_8_8_8 data format, to3962* fix texture gathers. This affects at least3963* GL45-CTS.texture_cube_map_array.sampling on GFX8.3964*/3965if (screen->info.chip_class <= GFX8)3966util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);3967else3968util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);3969break;3970default:3971util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);3972}3973} else {3974util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);3975}39763977first_non_void = util_format_get_first_non_void_channel(pipe_format);39783979switch (pipe_format) {3980case PIPE_FORMAT_S8_UINT_Z24_UNORM:3981num_format = V_008F14_IMG_NUM_FORMAT_UNORM;3982break;3983default:3984if (first_non_void < 0) {3985if (util_format_is_compressed(pipe_format)) {3986switch (pipe_format) {3987case PIPE_FORMAT_DXT1_SRGB:3988case PIPE_FORMAT_DXT1_SRGBA:3989case PIPE_FORMAT_DXT3_SRGBA:3990case PIPE_FORMAT_DXT5_SRGBA:3991case PIPE_FORMAT_BPTC_SRGBA:3992case PIPE_FORMAT_ETC2_SRGB8:3993case PIPE_FORMAT_ETC2_SRGB8A1:3994case PIPE_FORMAT_ETC2_SRGBA8:3995num_format = V_008F14_IMG_NUM_FORMAT_SRGB;3996break;3997case PIPE_FORMAT_RGTC1_SNORM:3998case PIPE_FORMAT_LATC1_SNORM:3999case PIPE_FORMAT_RGTC2_SNORM:4000case PIPE_FORMAT_LATC2_SNORM:4001case PIPE_FORMAT_ETC2_R11_SNORM:4002case PIPE_FORMAT_ETC2_RG11_SNORM:4003/* implies float, so use SNORM/UNORM to determine4004whether data is signed or not */4005case PIPE_FORMAT_BPTC_RGB_FLOAT:4006num_format = V_008F14_IMG_NUM_FORMAT_SNORM;4007break;4008default:4009num_format = V_008F14_IMG_NUM_FORMAT_UNORM;4010break;4011}4012} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {4013num_format = V_008F14_IMG_NUM_FORMAT_UNORM;4014} else {4015num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;4016}4017} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {4018num_format = V_008F14_IMG_NUM_FORMAT_SRGB;4019} else {4020num_format = V_008F14_IMG_NUM_FORMAT_UNORM;40214022switch (desc->channel[first_non_void].type) {4023case UTIL_FORMAT_TYPE_FLOAT:4024num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;4025break;4026case UTIL_FORMAT_TYPE_SIGNED:4027if (desc->channel[first_non_void].normalized)4028num_format = V_008F14_IMG_NUM_FORMAT_SNORM;4029else if (desc->channel[first_non_void].pure_integer)4030num_format = V_008F14_IMG_NUM_FORMAT_SINT;4031else4032num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;4033break;4034case UTIL_FORMAT_TYPE_UNSIGNED:4035if (desc->channel[first_non_void].normalized)4036num_format = V_008F14_IMG_NUM_FORMAT_UNORM;4037else if (desc->channel[first_non_void].pure_integer)4038num_format = V_008F14_IMG_NUM_FORMAT_UINT;4039else4040num_format = V_008F14_IMG_NUM_FORMAT_USCALED;4041}4042}4043}40444045data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void);4046if (data_format == ~0) {4047data_format = 0;4048}40494050/* S8 with Z32 HTILE needs a special format. */4051if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT)4052data_format = V_008F14_IMG_DATA_FORMAT_S8_32;40534054if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY ||4055(screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) {4056/* For the purpose of shader images, treat cube maps and 3D4057* textures as 2D arrays. For 3D textures, the address4058* calculations for mipmaps are different, so we rely on the4059* caller to effectively disable mipmaps.4060*/4061type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;40624063assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));4064} else {4065type = si_tex_dim(screen, tex, target, num_samples);4066}40674068if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {4069height = 1;4070depth = res->array_size;4071} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {4072if (sampler || res->target != PIPE_TEXTURE_3D)4073depth = res->array_size;4074} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)4075depth = res->array_size / 6;40764077state[0] = 0;4078state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));4079state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));4080state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |4081S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |4082S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |4083S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |4084S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) |4085S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) |4086S_008F1C_TYPE(type));4087state[4] = 0;4088state[5] = S_008F24_BASE_ARRAY(first_layer);4089state[6] = 0;4090state[7] = 0;40914092if (screen->info.chip_class == GFX9) {4093unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);40944095/* Depth is the the last accessible layer on Gfx9.4096* The hw doesn't need to know the total number of layers.4097*/4098if (type == V_008F1C_SQ_RSRC_IMG_3D)4099state[4] |= S_008F20_DEPTH(depth - 1);4100else4101state[4] |= S_008F20_DEPTH(last_layer);41024103state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);4104state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples)4105: tex->buffer.b.b.last_level);4106} else {4107state[3] |= S_008F1C_POW2_PAD(res->last_level > 0);4108state[4] |= S_008F20_DEPTH(depth - 1);4109state[5] |= S_008F24_LAST_ARRAY(last_layer);4110}41114112if (vi_dcc_enabled(tex, first_level)) {4113state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));4114} else {4115/* The last dword is unused by hw. The shader uses it to clear4116* bits in the first dword of sampler state.4117*/4118if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) {4119if (first_level == last_level)4120state[7] = C_008F30_MAX_ANISO_RATIO;4121else4122state[7] = 0xffffffff;4123}4124}41254126/* Initialize the sampler view for FMASK. */4127if (tex->surface.fmask_offset) {4128uint32_t data_format, num_format;41294130va = tex->buffer.gpu_address + tex->surface.fmask_offset;41314132#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f)))4133if (screen->info.chip_class == GFX9) {4134data_format = V_008F14_IMG_DATA_FORMAT_FMASK;4135switch (FMASK(res->nr_samples, res->nr_storage_samples)) {4136case FMASK(2, 1):4137num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;4138break;4139case FMASK(2, 2):4140num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;4141break;4142case FMASK(4, 1):4143num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;4144break;4145case FMASK(4, 2):4146num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;4147break;4148case FMASK(4, 4):4149num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;4150break;4151case FMASK(8, 1):4152num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;4153break;4154case FMASK(8, 2):4155num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;4156break;4157case FMASK(8, 4):4158num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;4159break;4160case FMASK(8, 8):4161num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;4162break;4163case FMASK(16, 1):4164num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;4165break;4166case FMASK(16, 2):4167num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;4168break;4169case FMASK(16, 4):4170num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;4171break;4172case FMASK(16, 8):4173num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;4174break;4175default:4176unreachable("invalid nr_samples");4177}4178} else {4179switch (FMASK(res->nr_samples, res->nr_storage_samples)) {4180case FMASK(2, 1):4181data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1;4182break;4183case FMASK(2, 2):4184data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;4185break;4186case FMASK(4, 1):4187data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1;4188break;4189case FMASK(4, 2):4190data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2;4191break;4192case FMASK(4, 4):4193data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;4194break;4195case FMASK(8, 1):4196data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1;4197break;4198case FMASK(8, 2):4199data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2;4200break;4201case FMASK(8, 4):4202data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4;4203break;4204case FMASK(8, 8):4205data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;4206break;4207case FMASK(16, 1):4208data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1;4209break;4210case FMASK(16, 2):4211data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2;4212break;4213case FMASK(16, 4):4214data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4;4215break;4216case FMASK(16, 8):4217data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8;4218break;4219default:4220unreachable("invalid nr_samples");4221}4222num_format = V_008F14_IMG_NUM_FORMAT_UINT;4223}4224#undef FMASK42254226fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle;4227fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) |4228S_008F14_NUM_FORMAT(num_format);4229fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);4230fmask_state[3] =4231S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |4232S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |4233S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0));4234fmask_state[4] = 0;4235fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);4236fmask_state[6] = 0;4237fmask_state[7] = 0;42384239if (screen->info.chip_class == GFX9) {4240fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode);4241fmask_state[4] |=4242S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch);4243fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |4244S_008F24_META_RB_ALIGNED(1);4245} else {4246fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index);4247fmask_state[4] |= S_008F20_DEPTH(depth - 1) |4248S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1);4249fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);4250}4251}4252}42534254/**4255* Create a sampler view.4256*4257* @param ctx context4258* @param texture texture4259* @param state sampler view template4260* @param width0 width0 override (for compressed textures as int)4261* @param height0 height0 override (for compressed textures as int)4262* @param force_level set the base address to the level (for compressed textures)4263*/4264struct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx,4265struct pipe_resource *texture,4266const struct pipe_sampler_view *state,4267unsigned width0, unsigned height0,4268unsigned force_level)4269{4270struct si_context *sctx = (struct si_context *)ctx;4271struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);4272struct si_texture *tex = (struct si_texture *)texture;4273unsigned base_level, first_level, last_level;4274unsigned char state_swizzle[4];4275unsigned height, depth, width;4276unsigned last_layer = state->u.tex.last_layer;4277enum pipe_format pipe_format;4278const struct legacy_surf_level *surflevel;42794280if (!view)4281return NULL;42824283/* initialize base object */4284view->base = *state;4285view->base.texture = NULL;4286view->base.reference.count = 1;4287view->base.context = ctx;42884289assert(texture);4290pipe_resource_reference(&view->base.texture, texture);42914292if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT ||4293state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT)4294view->is_stencil_sampler = true;42954296/* Buffer resource. */4297if (texture->target == PIPE_BUFFER) {4298si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format,4299state->u.buf.offset, state->u.buf.size, view->state);4300return &view->base;4301}43024303state_swizzle[0] = state->swizzle_r;4304state_swizzle[1] = state->swizzle_g;4305state_swizzle[2] = state->swizzle_b;4306state_swizzle[3] = state->swizzle_a;43074308base_level = 0;4309first_level = state->u.tex.first_level;4310last_level = state->u.tex.last_level;4311width = width0;4312height = height0;4313depth = texture->depth0;43144315if (sctx->chip_class <= GFX8 && force_level) {4316assert(force_level == first_level && force_level == last_level);4317base_level = force_level;4318first_level = 0;4319last_level = 0;4320width = u_minify(width, force_level);4321height = u_minify(height, force_level);4322depth = u_minify(depth, force_level);4323}43244325/* This is not needed if gallium frontends set last_layer correctly. */4326if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||4327state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)4328last_layer = state->u.tex.first_layer;43294330/* Texturing with separate depth and stencil. */4331pipe_format = state->format;43324333/* Depth/stencil texturing sometimes needs separate texture. */4334if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) {4335if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) {4336pipe_resource_reference(&view->base.texture, NULL);4337FREE(view);4338return NULL;4339}43404341assert(tex->flushed_depth_texture);43424343/* Override format for the case where the flushed texture4344* contains only Z or only S.4345*/4346if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format)4347pipe_format = tex->flushed_depth_texture->buffer.b.b.format;43484349tex = tex->flushed_depth_texture;4350}43514352surflevel = tex->surface.u.legacy.level;43534354if (tex->db_compatible) {4355if (!view->is_stencil_sampler)4356pipe_format = tex->db_render_format;43574358switch (pipe_format) {4359case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:4360pipe_format = PIPE_FORMAT_Z32_FLOAT;4361break;4362case PIPE_FORMAT_X8Z24_UNORM:4363case PIPE_FORMAT_S8_UINT_Z24_UNORM:4364/* Z24 is always stored like this for DB4365* compatibility.4366*/4367pipe_format = PIPE_FORMAT_Z24X8_UNORM;4368break;4369case PIPE_FORMAT_X24S8_UINT:4370case PIPE_FORMAT_S8X24_UINT:4371case PIPE_FORMAT_X32_S8X24_UINT:4372pipe_format = PIPE_FORMAT_S8_UINT;4373surflevel = tex->surface.u.legacy.zs.stencil_level;4374break;4375default:;4376}4377}43784379view->dcc_incompatible =4380vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format);43814382sctx->screen->make_texture_descriptor(4383sctx->screen, tex, true, state->target, pipe_format, state_swizzle, first_level, last_level,4384state->u.tex.first_layer, last_layer, width, height, depth, view->state, view->fmask_state);43854386view->base_level_info = &surflevel[base_level];4387view->base_level = base_level;4388view->block_width = util_format_get_blockwidth(pipe_format);4389return &view->base;4390}43914392static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,4393struct pipe_resource *texture,4394const struct pipe_sampler_view *state)4395{4396return si_create_sampler_view_custom(ctx, texture, state, texture ? texture->width0 : 0,4397texture ? texture->height0 : 0, 0);4398}43994400static void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state)4401{4402struct si_sampler_view *view = (struct si_sampler_view *)state;44034404pipe_resource_reference(&state->texture, NULL);4405FREE(view);4406}44074408static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)4409{4410return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||4411(linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));4412}44134414static uint32_t si_translate_border_color(struct si_context *sctx,4415const struct pipe_sampler_state *state,4416const union pipe_color_union *color, bool is_integer)4417{4418bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||4419state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;44204421if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&4422!wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&4423!wrap_mode_uses_border_color(state->wrap_r, linear_filter))4424return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);44254426#define simple_border_types(elt) \4427do { \4428if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0) \4429return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \4430if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1) \4431return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \4432if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1) \4433return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \4434} while (false)44354436if (is_integer)4437simple_border_types(ui);4438else4439simple_border_types(f);44404441#undef simple_border_types44424443int i;44444445/* Check if the border has been uploaded already. */4446for (i = 0; i < sctx->border_color_count; i++)4447if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0)4448break;44494450if (i >= SI_MAX_BORDER_COLORS) {4451/* Getting 4096 unique border colors is very unlikely. */4452fprintf(stderr, "radeonsi: The border color table is full. "4453"Any new border colors will be just black. "4454"Please file a bug.\n");4455return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);4456}44574458if (i == sctx->border_color_count) {4459/* Upload a new border color. */4460memcpy(&sctx->border_color_table[i], color, sizeof(*color));4461util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color));4462sctx->border_color_count++;4463}44644465return S_008F3C_BORDER_COLOR_PTR(i) |4466S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER);4467}44684469static inline int S_FIXED(float value, unsigned frac_bits)4470{4471return value * (1 << frac_bits);4472}44734474static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso)4475{4476if (filter == PIPE_TEX_FILTER_LINEAR)4477return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR4478: V_008F38_SQ_TEX_XY_FILTER_BILINEAR;4479else4480return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT4481: V_008F38_SQ_TEX_XY_FILTER_POINT;4482}44834484static inline unsigned si_tex_aniso_filter(unsigned filter)4485{4486if (filter < 2)4487return 0;4488if (filter < 4)4489return 1;4490if (filter < 8)4491return 2;4492if (filter < 16)4493return 3;4494return 4;4495}44964497static void *si_create_sampler_state(struct pipe_context *ctx,4498const struct pipe_sampler_state *state)4499{4500struct si_context *sctx = (struct si_context *)ctx;4501struct si_screen *sscreen = sctx->screen;4502struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);4503unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;4504unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);4505bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&4506state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&4507state->compare_mode == PIPE_TEX_COMPARE_NONE;4508union pipe_color_union clamped_border_color;45094510if (!rstate) {4511return NULL;4512}45134514/* Validate inputs. */4515if (!is_wrap_mode_legal(sscreen, state->wrap_s) ||4516!is_wrap_mode_legal(sscreen, state->wrap_t) ||4517!is_wrap_mode_legal(sscreen, state->wrap_r) ||4518(!sscreen->info.has_3d_cube_border_color_mipmap &&4519(state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||4520state->max_anisotropy > 0))) {4521assert(0);4522return NULL;4523}45244525#ifndef NDEBUG4526rstate->magic = SI_SAMPLER_STATE_MAGIC;4527#endif4528rstate->val[0] =4529(S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |4530S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |4531S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |4532S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |4533S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |4534S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |4535S_008F30_TRUNC_COORD(trunc_coord) |4536S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9));4537rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |4538S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |4539S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));4540rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |4541S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) |4542S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |4543S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |4544S_008F38_MIP_POINT_PRECLAMP(0));4545rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color,4546state->border_color_is_integer);45474548if (sscreen->info.chip_class >= GFX10) {4549rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);4550} else {4551rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) |4552S_008F38_FILTER_PREC_FIX(1) |4553S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8);4554}45554556/* Create sampler resource for upgraded depth textures. */4557memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));45584559for (unsigned i = 0; i < 4; ++i) {4560/* Use channel 0 on purpose, so that we can use OPAQUE_WHITE4561* when the border color is 1.0. */4562clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);4563}45644565if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) {4566if (sscreen->info.chip_class <= GFX9)4567rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);4568} else {4569rstate->upgraded_depth_val[3] =4570si_translate_border_color(sctx, state, &clamped_border_color, false);4571}45724573return rstate;4574}45754576static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)4577{4578struct si_context *sctx = (struct si_context *)ctx;45794580if (sctx->sample_mask == (uint16_t)sample_mask)4581return;45824583sctx->sample_mask = sample_mask;4584si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask);4585}45864587static void si_emit_sample_mask(struct si_context *sctx)4588{4589struct radeon_cmdbuf *cs = &sctx->gfx_cs;4590unsigned mask = sctx->sample_mask;45914592/* Needed for line and polygon smoothing as well as for the Polaris4593* small primitive filter. We expect the gallium frontend to take care of4594* this for us.4595*/4596assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||4597(mask & 1 && sctx->blitter_running));45984599radeon_begin(cs);4600radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);4601radeon_emit(cs, mask | (mask << 16));4602radeon_emit(cs, mask | (mask << 16));4603radeon_end();4604}46054606static void si_delete_sampler_state(struct pipe_context *ctx, void *state)4607{4608#ifndef NDEBUG4609struct si_sampler_state *s = state;46104611assert(s->magic == SI_SAMPLER_STATE_MAGIC);4612s->magic = 0;4613#endif4614free(state);4615}46164617/*4618* Vertex elements & buffers4619*/46204621struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits)4622{4623struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32);46244625struct si_fast_udiv_info32 result = {4626info.multiplier,4627info.pre_shift,4628info.post_shift,4629info.increment,4630};4631return result;4632}46334634static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count,4635const struct pipe_vertex_element *elements)4636{4637struct si_screen *sscreen = (struct si_screen *)ctx->screen;4638struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);4639bool used[SI_NUM_VERTEX_BUFFERS] = {};4640struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {};4641STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16);4642STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4);4643STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4);4644STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4);4645STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4);4646int i;46474648assert(count <= SI_MAX_ATTRIBS);4649if (!v)4650return NULL;46514652v->count = count;46534654unsigned alloc_count =4655count > sscreen->num_vbos_in_user_sgprs ? count - sscreen->num_vbos_in_user_sgprs : 0;4656v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);46574658for (i = 0; i < count; ++i) {4659const struct util_format_description *desc;4660const struct util_format_channel_description *channel;4661int first_non_void;4662unsigned vbo_index = elements[i].vertex_buffer_index;46634664if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {4665FREE(v);4666return NULL;4667}46684669unsigned instance_divisor = elements[i].instance_divisor;4670if (instance_divisor) {4671v->uses_instance_divisors = true;46724673if (instance_divisor == 1) {4674v->instance_divisor_is_one |= 1u << i;4675} else {4676v->instance_divisor_is_fetched |= 1u << i;4677divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32);4678}4679}46804681if (!used[vbo_index]) {4682v->first_vb_use_mask |= 1 << i;4683used[vbo_index] = true;4684}46854686desc = util_format_description(elements[i].src_format);4687first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);4688channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;46894690v->format_size[i] = desc->block.bits / 8;4691v->src_offset[i] = elements[i].src_offset;4692v->vertex_buffer_index[i] = vbo_index;46934694bool always_fix = false;4695union si_vs_fix_fetch fix_fetch;4696unsigned log_hw_load_size; /* the load element size as seen by the hardware */46974698fix_fetch.bits = 0;4699log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3);47004701if (channel) {4702switch (channel->type) {4703case UTIL_FORMAT_TYPE_FLOAT:4704fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;4705break;4706case UTIL_FORMAT_TYPE_FIXED:4707fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;4708break;4709case UTIL_FORMAT_TYPE_SIGNED: {4710if (channel->pure_integer)4711fix_fetch.u.format = AC_FETCH_FORMAT_SINT;4712else if (channel->normalized)4713fix_fetch.u.format = AC_FETCH_FORMAT_SNORM;4714else4715fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED;4716break;4717}4718case UTIL_FORMAT_TYPE_UNSIGNED: {4719if (channel->pure_integer)4720fix_fetch.u.format = AC_FETCH_FORMAT_UINT;4721else if (channel->normalized)4722fix_fetch.u.format = AC_FETCH_FORMAT_UNORM;4723else4724fix_fetch.u.format = AC_FETCH_FORMAT_USCALED;4725break;4726}4727default:4728unreachable("bad format type");4729}4730} else {4731switch (elements[i].src_format) {4732case PIPE_FORMAT_R11G11B10_FLOAT:4733fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT;4734break;4735default:4736unreachable("bad other format");4737}4738}47394740if (desc->channel[0].size == 10) {4741fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */4742log_hw_load_size = 2;47434744/* The hardware always treats the 2-bit alpha channel as4745* unsigned, so a shader workaround is needed. The affected4746* chips are GFX8 and older except Stoney (GFX8.1).4747*/4748always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY &&4749channel->type == UTIL_FORMAT_TYPE_SIGNED;4750} else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) {4751fix_fetch.u.log_size = 3; /* special encoding */4752fix_fetch.u.format = AC_FETCH_FORMAT_FIXED;4753log_hw_load_size = 2;4754} else {4755fix_fetch.u.log_size = util_logbase2(channel->size) - 3;4756fix_fetch.u.num_channels_m1 = desc->nr_channels - 1;47574758/* Always fix up:4759* - doubles (multiple loads + truncate to float)4760* - 32-bit requiring a conversion4761*/4762always_fix = (fix_fetch.u.log_size == 3) ||4763(fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT &&4764fix_fetch.u.format != AC_FETCH_FORMAT_UINT &&4765fix_fetch.u.format != AC_FETCH_FORMAT_SINT);47664767/* Also fixup 8_8_8 and 16_16_16. */4768if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) {4769always_fix = true;4770log_hw_load_size = fix_fetch.u.log_size;4771}4772}47734774if (desc->swizzle[0] != PIPE_SWIZZLE_X) {4775assert(desc->swizzle[0] == PIPE_SWIZZLE_Z &&4776(desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0));4777fix_fetch.u.reverse = 1;4778}47794780/* Force the workaround for unaligned access here already if the4781* offset relative to the vertex buffer base is unaligned.4782*4783* There is a theoretical case in which this is too conservative:4784* if the vertex buffer's offset is also unaligned in just the4785* right way, we end up with an aligned address after all.4786* However, this case should be extremely rare in practice (it4787* won't happen in well-behaved applications), and taking it4788* into account would complicate the fast path (where everything4789* is nicely aligned).4790*/4791bool check_alignment =4792log_hw_load_size >= 1 &&4793(sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10);4794bool opencode = sscreen->options.vs_fetch_always_opencode;47954796if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0)4797opencode = true;47984799if (always_fix || check_alignment || opencode)4800v->fix_fetch[i] = fix_fetch.bits;48014802if (opencode)4803v->fix_fetch_opencode |= 1 << i;4804if (opencode || always_fix)4805v->fix_fetch_always |= 1 << i;48064807if (check_alignment && !opencode) {4808assert(log_hw_load_size == 1 || log_hw_load_size == 2);48094810v->fix_fetch_unaligned |= 1 << i;4811v->hw_load_is_dword |= (log_hw_load_size - 1) << i;4812v->vb_alignment_check_mask |= 1 << vbo_index;4813}48144815v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |4816S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |4817S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |4818S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3]));48194820if (sscreen->info.chip_class >= GFX10) {4821const struct gfx10_format *fmt = &gfx10_format_table[elements[i].src_format];4822assert(fmt->img_format != 0 && fmt->img_format < 128);4823v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_RESOURCE_LEVEL(1);4824} else {4825unsigned data_format, num_format;4826data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);4827num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);4828v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);4829}4830}48314832if (v->instance_divisor_is_fetched) {4833unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);48344835v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create(4836&sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0]));4837if (!v->instance_divisor_factor_buffer) {4838FREE(v);4839return NULL;4840}4841void *map =4842sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE);4843memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0]));4844}4845return v;4846}48474848static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)4849{4850struct si_context *sctx = (struct si_context *)ctx;4851struct si_vertex_elements *old = sctx->vertex_elements;4852struct si_vertex_elements *v = (struct si_vertex_elements *)state;48534854if (!v)4855v = sctx->no_velems_state;48564857sctx->vertex_elements = v;4858sctx->num_vertex_elements = v->count;48594860if (sctx->num_vertex_elements) {4861sctx->vertex_buffers_dirty = true;4862} else {4863sctx->vertex_buffers_dirty = false;4864sctx->vertex_buffer_pointer_dirty = false;4865sctx->vertex_buffer_user_sgprs_dirty = false;4866}48674868if (old->count != v->count ||4869old->uses_instance_divisors != v->uses_instance_divisors ||4870/* we don't check which divisors changed */4871v->uses_instance_divisors ||4872(old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) &4873sctx->vertex_buffer_unaligned ||4874((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) &&4875memcmp(old->vertex_buffer_index, v->vertex_buffer_index,4876sizeof(v->vertex_buffer_index[0]) * v->count)) ||4877/* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are4878* functions of fix_fetch and the src_offset alignment.4879* If they change and fix_fetch doesn't, it must be due to different4880* src_offset alignment, which is reflected in fix_fetch_opencode. */4881old->fix_fetch_opencode != v->fix_fetch_opencode ||4882memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count))4883sctx->do_update_shaders = true;48844885if (v->instance_divisor_is_fetched) {4886struct pipe_constant_buffer cb;48874888cb.buffer = &v->instance_divisor_factor_buffer->b.b;4889cb.user_buffer = NULL;4890cb.buffer_offset = 0;4891cb.buffer_size = 0xffffffff;4892si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);4893}4894}48954896static void si_delete_vertex_element(struct pipe_context *ctx, void *state)4897{4898struct si_context *sctx = (struct si_context *)ctx;4899struct si_vertex_elements *v = (struct si_vertex_elements *)state;49004901if (sctx->vertex_elements == state)4902si_bind_vertex_elements(ctx, sctx->no_velems_state);49034904si_resource_reference(&v->instance_divisor_factor_buffer, NULL);4905FREE(state);4906}49074908static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count,4909unsigned unbind_num_trailing_slots, bool take_ownership,4910const struct pipe_vertex_buffer *buffers)4911{4912struct si_context *sctx = (struct si_context *)ctx;4913struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;4914unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots);4915uint32_t orig_unaligned = sctx->vertex_buffer_unaligned;4916uint32_t unaligned = 0;4917int i;49184919assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer));49204921if (buffers) {4922if (take_ownership) {4923for (i = 0; i < count; i++) {4924const struct pipe_vertex_buffer *src = buffers + i;4925struct pipe_vertex_buffer *dsti = dst + i;4926struct pipe_resource *buf = src->buffer.resource;4927unsigned slot_bit = 1 << (start_slot + i);49284929/* Only unreference bound vertex buffers. (take_ownership) */4930pipe_resource_reference(&dsti->buffer.resource, NULL);49314932if (src->buffer_offset & 3 || src->stride & 3)4933unaligned |= slot_bit;49344935si_context_add_resource_size(sctx, buf);4936if (buf)4937si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;4938}4939/* take_ownership allows us to copy pipe_resource pointers without refcounting. */4940memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer));4941} else {4942for (i = 0; i < count; i++) {4943const struct pipe_vertex_buffer *src = buffers + i;4944struct pipe_vertex_buffer *dsti = dst + i;4945struct pipe_resource *buf = src->buffer.resource;4946unsigned slot_bit = 1 << (start_slot + i);49474948pipe_resource_reference(&dsti->buffer.resource, buf);4949dsti->buffer_offset = src->buffer_offset;4950dsti->stride = src->stride;49514952if (dsti->buffer_offset & 3 || dsti->stride & 3)4953unaligned |= slot_bit;49544955si_context_add_resource_size(sctx, buf);4956if (buf)4957si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;4958}4959}4960} else {4961for (i = 0; i < count; i++)4962pipe_resource_reference(&dst[i].buffer.resource, NULL);4963}49644965for (i = 0; i < unbind_num_trailing_slots; i++)4966pipe_resource_reference(&dst[count + i].buffer.resource, NULL);49674968sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;4969sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned;49704971/* Check whether alignment may have changed in a way that requires4972* shader changes. This check is conservative: a vertex buffer can only4973* trigger a shader change if the misalignment amount changes (e.g.4974* from byte-aligned to short-aligned), but we only keep track of4975* whether buffers are at least dword-aligned, since that should always4976* be the case in well-behaved applications anyway.4977*/4978if ((sctx->vertex_elements->vb_alignment_check_mask &4979(unaligned | orig_unaligned) & updated_mask))4980sctx->do_update_shaders = true;4981}49824983/*4984* Misc4985*/49864987static void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4],4988const float default_inner_level[2])4989{4990struct si_context *sctx = (struct si_context *)ctx;4991struct pipe_constant_buffer cb;4992float array[8];49934994memcpy(array, default_outer_level, sizeof(float) * 4);4995memcpy(array + 4, default_inner_level, sizeof(float) * 2);49964997cb.buffer = NULL;4998cb.user_buffer = array;4999cb.buffer_offset = 0;5000cb.buffer_size = sizeof(array);50015002si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);5003}50045005static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)5006{5007struct si_context *sctx = (struct si_context *)ctx;50085009si_update_fb_dirtiness_after_rendering(sctx);50105011/* Multisample surfaces are flushed in si_decompress_textures. */5012if (sctx->framebuffer.uncompressed_cb_mask) {5013si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,5014sctx->framebuffer.CB_has_shader_readable_metadata,5015sctx->framebuffer.all_DCC_pipe_aligned);5016}5017}50185019/* This only ensures coherency for shader image/buffer stores. */5020static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)5021{5022struct si_context *sctx = (struct si_context *)ctx;50235024if (!(flags & ~PIPE_BARRIER_UPDATE))5025return;50265027/* Subsequent commands must wait for all shader invocations to5028* complete. */5029sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |5030SI_CONTEXT_PFP_SYNC_ME;50315032if (flags & PIPE_BARRIER_CONSTANT_BUFFER)5033sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;50345035if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE |5036PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) {5037/* As far as I can tell, L1 contents are written back to L25038* automatically at end of shader, but the contents of other5039* L1 caches might still be stale. */5040sctx->flags |= SI_CONTEXT_INV_VCACHE;5041}50425043if (flags & PIPE_BARRIER_INDEX_BUFFER) {5044/* Indices are read through TC L2 since GFX8.5045* L1 isn't used.5046*/5047if (sctx->screen->info.chip_class <= GFX7)5048sctx->flags |= SI_CONTEXT_WB_L2;5049}50505051/* MSAA color, any depth and any stencil are flushed in5052* si_decompress_textures when needed.5053*/5054if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) {5055sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;50565057if (sctx->chip_class <= GFX8)5058sctx->flags |= SI_CONTEXT_WB_L2;5059}50605061/* Indirect buffers use TC L2 on GFX9, but not older hw. */5062if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER)5063sctx->flags |= SI_CONTEXT_WB_L2;5064}50655066static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)5067{5068struct pipe_blend_state blend;50695070memset(&blend, 0, sizeof(blend));5071blend.independent_blend_enable = true;5072blend.rt[0].colormask = 0xf;5073return si_create_blend_state_mode(&sctx->b, &blend, mode);5074}50755076void si_init_state_compute_functions(struct si_context *sctx)5077{5078sctx->b.create_sampler_state = si_create_sampler_state;5079sctx->b.delete_sampler_state = si_delete_sampler_state;5080sctx->b.create_sampler_view = si_create_sampler_view;5081sctx->b.sampler_view_destroy = si_sampler_view_destroy;5082sctx->b.memory_barrier = si_memory_barrier;5083}50845085void si_init_state_functions(struct si_context *sctx)5086{5087sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;5088sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs;5089sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;5090sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;5091sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;5092sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;5093sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;5094sctx->atoms.s.blend_color.emit = si_emit_blend_color;5095sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;5096sctx->atoms.s.clip_state.emit = si_emit_clip_state;5097sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;50985099sctx->b.create_blend_state = si_create_blend_state;5100sctx->b.bind_blend_state = si_bind_blend_state;5101sctx->b.delete_blend_state = si_delete_blend_state;5102sctx->b.set_blend_color = si_set_blend_color;51035104sctx->b.create_rasterizer_state = si_create_rs_state;5105sctx->b.bind_rasterizer_state = si_bind_rs_state;5106sctx->b.delete_rasterizer_state = si_delete_rs_state;51075108sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state;5109sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state;5110sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state;51115112sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);5113sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);5114sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);5115sctx->custom_blend_eliminate_fastclear =5116si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);5117sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);51185119sctx->b.set_clip_state = si_set_clip_state;5120sctx->b.set_stencil_ref = si_set_stencil_ref;51215122sctx->b.set_framebuffer_state = si_set_framebuffer_state;51235124sctx->b.set_sample_mask = si_set_sample_mask;51255126sctx->b.create_vertex_elements_state = si_create_vertex_elements;5127sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;5128sctx->b.delete_vertex_elements_state = si_delete_vertex_element;5129sctx->b.set_vertex_buffers = si_set_vertex_buffers;51305131sctx->b.texture_barrier = si_texture_barrier;5132sctx->b.set_min_samples = si_set_min_samples;5133sctx->b.set_tess_state = si_set_tess_state;51345135sctx->b.set_active_query_state = si_set_active_query_state;5136}51375138void si_init_screen_state_functions(struct si_screen *sscreen)5139{5140sscreen->b.is_format_supported = si_is_format_supported;51415142if (sscreen->info.chip_class >= GFX10) {5143sscreen->make_texture_descriptor = gfx10_make_texture_descriptor;5144} else {5145sscreen->make_texture_descriptor = si_make_texture_descriptor;5146}5147}51485149static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value)5150{5151unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX;5152si_pm4_set_reg(pm4, reg, value);5153}51545155static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se)5156{5157assert(se == ~0 || se < sctx->screen->info.max_se);5158si_set_grbm_gfx_index(sctx, pm4,5159(se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) |5160S_030800_SH_BROADCAST_WRITES(1) |5161S_030800_INSTANCE_BROADCAST_WRITES(1));5162}51635164static void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4,5165unsigned raster_config, unsigned raster_config_1)5166{5167unsigned num_se = MAX2(sctx->screen->info.max_se, 1);5168unsigned raster_config_se[4];5169unsigned se;51705171ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se);51725173for (se = 0; se < num_se; se++) {5174si_set_grbm_gfx_index_se(sctx, pm4, se);5175si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);5176}5177si_set_grbm_gfx_index(sctx, pm4, ~0);51785179if (sctx->chip_class >= GFX7) {5180si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);5181}5182}51835184static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)5185{5186struct si_screen *sscreen = sctx->screen;5187unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16);5188unsigned rb_mask = sscreen->info.enabled_rb_mask;5189unsigned raster_config = sscreen->pa_sc_raster_config;5190unsigned raster_config_1 = sscreen->pa_sc_raster_config_1;51915192if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {5193/* Always use the default config when all backends are enabled5194* (or when we failed to determine the enabled backends).5195*/5196si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config);5197if (sctx->chip_class >= GFX7)5198si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);5199} else {5200si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);5201}5202}52035204void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)5205{5206struct si_screen *sscreen = sctx->screen;5207uint64_t border_color_va = sctx->border_color_buffer->gpu_address;5208bool has_clear_state = sscreen->info.has_clear_state;5209struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);52105211if (!pm4)5212return;52135214if (!uses_reg_shadowing) {5215si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));5216si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));5217si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));52185219if (has_clear_state) {5220si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));5221si_pm4_cmd_add(pm4, 0);5222}5223}52245225/* CLEAR_STATE doesn't restore these correctly. */5226si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));5227si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,5228S_028244_BR_X(16384) | S_028244_BR_Y(16384));52295230si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));5231if (!has_clear_state)5232si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));52335234if (!has_clear_state) {5235si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,5236S_028230_ER_TRI(0xA) | S_028230_ER_POINT(0xA) | S_028230_ER_RECT(0xA) |5237/* Required by DX10_DIAMOND_TEST_ENA: */5238S_028230_ER_LINE_LR(0x1A) | S_028230_ER_LINE_RL(0x26) |5239S_028230_ER_LINE_TB(0xA) | S_028230_ER_LINE_BT(0xA));5240si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);5241si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);5242si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);5243si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);5244si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);5245si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);5246si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);5247si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);5248si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);5249}52505251si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);5252if (sctx->chip_class >= GFX7)5253si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));52545255if (sctx->chip_class == GFX6) {5256si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,5257S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));5258}52595260if (sctx->chip_class <= GFX7 || !has_clear_state) {5261si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);5262si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);52635264/* CLEAR_STATE doesn't clear these correctly on certain generations.5265* I don't know why. Deduced by trial and error.5266*/5267si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);5268si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));5269si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);5270si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,5271S_028034_BR_X(16384) | S_028034_BR_Y(16384));5272}52735274if (sctx->chip_class >= GFX10) {5275si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL,5276S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) |5277S_028038_POPS_DRAIN_PS_ON_OVERLAP(1));5278}52795280unsigned cu_mask_ps = 0xffffffff;52815282/* It's wasteful to enable all CUs for PS if shader arrays have a different5283* number of CUs. The reason is that the hardware sends the same number of PS5284* waves to each shader array, so the slowest shader array limits the performance.5285* Disable the extra CUs for PS in other shader arrays to save power and thus5286* increase clocks for busy CUs. In the future, we might disable or enable this5287* tweak only for certain apps.5288*/5289if (sctx->chip_class >= GFX10_3)5290cu_mask_ps = u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa);52915292if (sctx->chip_class >= GFX7) {5293si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,5294S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));5295}52965297if (sctx->chip_class <= GFX8) {5298si_set_raster_config(sctx, pm4);52995300/* FIXME calculate these values somehow ??? */5301si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);5302si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);53035304/* These registers, when written, also overwrite the CLEAR_STATE5305* context, so we can't rely on CLEAR_STATE setting them.5306* It would be an issue if there was another UMD changing them.5307*/5308si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);5309si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);5310si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);5311}53125313if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {5314si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,5315S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));5316si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));5317si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,5318S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));53195320/* If this is 0, Bonaire can hang even if GS isn't being used.5321* Other chips are unaffected. These are suboptimal values,5322* but we don't use on-chip GS.5323*/5324si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,5325S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));5326}53275328if (sctx->chip_class == GFX8) {5329unsigned vgt_tess_distribution;53305331vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |5332S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16);53335334/* Testing with Unigine Heaven extreme tesselation yielded best results5335* with TRAP_SPLIT = 3.5336*/5337if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10)5338vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);53395340si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);5341}53425343if (sscreen->info.chip_class <= GFX9) {5344si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);5345}53465347if (sctx->chip_class == GFX9) {5348si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);5349si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);5350si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);53515352si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,5353S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |5354S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));5355}53565357if (sctx->chip_class >= GFX9) {5358si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,5359S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));53605361si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,5362S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |5363S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6));5364si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,5365S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |5366S_028C48_MAX_PRIM_PER_BATCH(1023));5367si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,5368S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));53695370si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);5371si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY,5372sctx->chip_class >= GFX10 ? 0x20 : 0);5373}53745375if (sctx->chip_class >= GFX10) {5376/* Logical CUs 16 - 31 */5377si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16));5378si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));5379si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));53805381si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);5382si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);5383si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);5384si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);5385si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);5386si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);5387si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);5388si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);5389si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);5390si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);5391si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);5392si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);5393si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);5394si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);5395si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);5396si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);53975398si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,5399S_00B0C0_SOFT_GROUPING_EN(1) |5400S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));5401si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);54025403/* Enable CMASK/HTILE/DCC caching in L2 for small chips. */5404unsigned meta_write_policy, meta_read_policy;5405if (sscreen->info.max_render_backends <= 4) {5406meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */5407meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */5408} else {5409meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */5410meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */5411}54125413si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,5414S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |5415S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |5416S_02807C_HTILE_WR_POLICY(meta_write_policy) |5417S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |5418S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |5419S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |5420S_02807C_HTILE_RD_POLICY(meta_read_policy));5421si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,5422S_028410_CMASK_WR_POLICY(meta_write_policy) |5423S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) |5424S_028410_DCC_WR_POLICY(meta_write_policy) |5425S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |5426S_028410_CMASK_RD_POLICY(meta_read_policy) |5427S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA) |5428S_028410_DCC_RD_POLICY(meta_read_policy) |5429S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));54305431si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0);5432si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);54335434/* Break up a pixel wave if it contains deallocs for more than5435* half the parameter cache.5436*5437* To avoid a deadlock where pixel waves aren't launched5438* because they're waiting for more pixels while the frontend5439* is stuck waiting for PC space, the maximum allowed value is5440* the size of the PC minus the largest possible allocation for5441* a single primitive shader subgroup.5442*/5443si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));5444/* Reuse for legacy (non-NGG) only. */5445si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);54465447if (!has_clear_state) {5448si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,5449sscreen->info.pa_sc_tile_steering_override);5450}545154525453si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);5454si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);5455si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);5456si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);5457si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);5458}54595460if (sctx->chip_class >= GFX10_3) {5461si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);5462/* The rate combiners have no effect if they are disabled like this:5463* VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 15464* PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 15465* HTILE_RATE: VRS_HTILE_ENCODING = 05466* SAMPLE_ITER: PS_ITER_SAMPLE = 05467*5468* Use OVERRIDE, which will ignore results from previous combiners.5469* (e.g. enabled sample shading overrides the vertex rate)5470*/5471si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,5472S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |5473S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));5474}54755476sctx->cs_preamble_state = pm4;5477}547854795480