Path: blob/21.2-virgl/src/intel/vulkan/gfx8_cmd_buffer.c
4547 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>25#include <string.h>26#include <unistd.h>27#include <fcntl.h>2829#include "anv_private.h"3031#include "genxml/gen_macros.h"32#include "genxml/genX_pack.h"33#include "common/intel_guardband.h"3435#if GFX_VER == 836void37gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)38{39struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;40uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;41const VkViewport *viewports =42cmd_buffer->state.gfx.dynamic.viewport.viewports;43struct anv_state sf_clip_state =44anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);4546for (uint32_t i = 0; i < count; i++) {47const VkViewport *vp = &viewports[i];4849/* The gfx7 state struct has just the matrix and guardband fields, the50* gfx8 struct adds the min/max viewport fields. */51struct GENX(SF_CLIP_VIEWPORT) sfv = {52.ViewportMatrixElementm00 = vp->width / 2,53.ViewportMatrixElementm11 = vp->height / 2,54.ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth,55.ViewportMatrixElementm30 = vp->x + vp->width / 2,56.ViewportMatrixElementm31 = vp->y + vp->height / 2,57.ViewportMatrixElementm32 = vp->minDepth,58.XMinClipGuardband = -1.0f,59.XMaxClipGuardband = 1.0f,60.YMinClipGuardband = -1.0f,61.YMaxClipGuardband = 1.0f,62.XMinViewPort = vp->x,63.XMaxViewPort = vp->x + vp->width - 1,64.YMinViewPort = MIN2(vp->y, vp->y + vp->height),65.YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,66};6768if (fb) {69/* We can only calculate a "real" guardband clip if we know the70* framebuffer at the time we emit the packet. Otherwise, we have71* fall back to a worst-case guardband of [-1, 1].72*/73intel_calculate_guardband_size(fb->width, fb->height,74sfv.ViewportMatrixElementm00,75sfv.ViewportMatrixElementm11,76sfv.ViewportMatrixElementm30,77sfv.ViewportMatrixElementm31,78&sfv.XMinClipGuardband,79&sfv.XMaxClipGuardband,80&sfv.YMinClipGuardband,81&sfv.YMaxClipGuardband);82}8384GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);85}8687anv_batch_emit(&cmd_buffer->batch,88GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {89clip.SFClipViewportPointer = sf_clip_state.offset;90}91}9293void94gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,95bool depth_clamp_enable)96{97uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;98const VkViewport *viewports =99cmd_buffer->state.gfx.dynamic.viewport.viewports;100struct anv_state cc_state =101anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);102103for (uint32_t i = 0; i < count; i++) {104const VkViewport *vp = &viewports[i];105106/* From the Vulkan spec:107*108* "It is valid for minDepth to be greater than or equal to109* maxDepth."110*/111float min_depth = MIN2(vp->minDepth, vp->maxDepth);112float max_depth = MAX2(vp->minDepth, vp->maxDepth);113114struct GENX(CC_VIEWPORT) cc_viewport = {115.MinimumDepth = depth_clamp_enable ? min_depth : 0.0f,116.MaximumDepth = depth_clamp_enable ? max_depth : 1.0f,117};118119GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);120}121122anv_batch_emit(&cmd_buffer->batch,123GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {124cc.CCViewportPointer = cc_state.offset;125}126}127#endif128129void130genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)131{132if (cmd_buffer->state.pma_fix_enabled == enable)133return;134135cmd_buffer->state.pma_fix_enabled = enable;136137/* According to the Broadwell PIPE_CONTROL documentation, software should138* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set139* prior to the LRI. If stencil buffer writes are enabled, then a Render140* Cache Flush is also necessary.141*142* The Skylake docs say to use a depth stall rather than a command143* streamer stall. However, the hardware seems to violently disagree.144* A full command streamer stall seems to be needed in both cases.145*/146anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {147pc.DepthCacheFlushEnable = true;148pc.CommandStreamerStallEnable = true;149pc.RenderTargetCacheFlushEnable = true;150#if GFX_VER >= 12151pc.TileCacheFlushEnable = true;152153/* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must154* be set with any PIPE_CONTROL with Depth Flush Enable bit set.155*/156pc.DepthStallEnable = true;157#endif158}159160#if GFX_VER == 9161162uint32_t cache_mode;163anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),164.STCPMAOptimizationEnable = enable,165.STCPMAOptimizationEnableMask = true);166anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {167lri.RegisterOffset = GENX(CACHE_MODE_0_num);168lri.DataDWord = cache_mode;169}170171#elif GFX_VER == 8172173uint32_t cache_mode;174anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),175.NPPMAFixEnable = enable,176.NPEarlyZFailsDisable = enable,177.NPPMAFixEnableMask = true,178.NPEarlyZFailsDisableMask = true);179anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {180lri.RegisterOffset = GENX(CACHE_MODE_1_num);181lri.DataDWord = cache_mode;182}183184#endif /* GFX_VER == 8 */185186/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache187* Flush bits is often necessary. We do it regardless because it's easier.188* The render cache flush is also necessary if stencil writes are enabled.189*190* Again, the Skylake docs give a different set of flushes but the BDW191* flushes seem to work just as well.192*/193anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {194pc.DepthStallEnable = true;195pc.DepthCacheFlushEnable = true;196pc.RenderTargetCacheFlushEnable = true;197#if GFX_VER >= 12198pc.TileCacheFlushEnable = true;199#endif200}201}202203UNUSED static bool204want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)205{206assert(GFX_VER == 8);207208/* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:209*210* SW must set this bit in order to enable this fix when following211* expression is TRUE.212*213* 3DSTATE_WM::ForceThreadDispatch != 1 &&214* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&215* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&216* (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&217* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&218* (3DSTATE_PS_EXTRA::PixelShaderValid) &&219* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||220* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||221* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||222* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&223* (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&224* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||225* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||226* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||227* 3DSTATE_PS_BLEND::AlphaTestEnable ||228* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&229* 3DSTATE_WM::ForceKillPix != ForceOff &&230* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&231* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||232* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&233* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&234* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||235* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))236*/237238/* These are always true:239* 3DSTATE_WM::ForceThreadDispatch != 1 &&240* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)241*/242243/* We only enable the PMA fix if we know for certain that HiZ is enabled.244* If we don't know whether HiZ is enabled or not, we disable the PMA fix245* and there is no harm.246*247* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&248* 3DSTATE_DEPTH_BUFFER::HIZ Enable249*/250if (!cmd_buffer->state.hiz_enabled)251return false;252253/* 3DSTATE_PS_EXTRA::PixelShaderValid */254struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;255if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))256return false;257258/* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */259const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);260if (wm_prog_data->early_fragment_tests)261return false;262263/* We never use anv_pipeline for HiZ ops so this is trivially true:264* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||265* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||266* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||267* 3DSTATE_WM_HZ_OP::StencilBufferClear)268*/269270/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */271if (!pipeline->depth_test_enable)272return false;273274/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||275* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||276* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||277* 3DSTATE_PS_BLEND::AlphaTestEnable ||278* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&279* 3DSTATE_WM::ForceKillPix != ForceOff &&280* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&281* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||282* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&283* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&284* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||285* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))286*/287return (pipeline->kill_pixel && (pipeline->writes_depth ||288pipeline->writes_stencil)) ||289wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;290}291292UNUSED static bool293want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)294{295if (GFX_VER > 9)296return false;297assert(GFX_VER == 9);298299/* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:300*301* Clearing this bit will force the STC cache to wait for pending302* retirement of pixels at the HZ-read stage and do the STC-test for303* Non-promoted, R-computed and Computed depth modes instead of304* postponing the STC-test to RCPFE.305*306* STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&307* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable308*309* STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&310* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&311* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)312*313* COMP_STC_EN = STC_TEST_EN &&314* 3DSTATE_PS_EXTRA::PixelShaderComputesStencil315*316* SW parses the pipeline states to generate the following logical317* signal indicating if PMA FIX can be enabled.318*319* STC_PMA_OPT =320* 3DSTATE_WM::ForceThreadDispatch != 1 &&321* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&322* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&323* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&324* !(3DSTATE_WM::EDSC_Mode == 2) &&325* 3DSTATE_PS_EXTRA::PixelShaderValid &&326* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||327* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||328* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||329* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&330* (COMP_STC_EN || STC_WRITE_EN) &&331* ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||332* 3DSTATE_WM::ForceKillPix == ON ||333* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||334* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||335* 3DSTATE_PS_BLEND::AlphaTestEnable ||336* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||337* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))338*/339340/* These are always true:341* 3DSTATE_WM::ForceThreadDispatch != 1 &&342* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)343*/344345/* We only enable the PMA fix if we know for certain that HiZ is enabled.346* If we don't know whether HiZ is enabled or not, we disable the PMA fix347* and there is no harm.348*349* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&350* 3DSTATE_DEPTH_BUFFER::HIZ Enable351*/352if (!cmd_buffer->state.hiz_enabled)353return false;354355/* We can't possibly know if HiZ is enabled without the framebuffer */356assert(cmd_buffer->state.framebuffer);357358/* HiZ is enabled so we had better have a depth buffer with HiZ */359const struct anv_image_view *ds_iview =360anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);361assert(ds_iview && ds_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);362363/* 3DSTATE_PS_EXTRA::PixelShaderValid */364struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;365if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))366return false;367368/* !(3DSTATE_WM::EDSC_Mode == 2) */369const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);370if (wm_prog_data->early_fragment_tests)371return false;372373/* We never use anv_pipeline for HiZ ops so this is trivially true:374* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||375* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||376* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||377* 3DSTATE_WM_HZ_OP::StencilBufferClear)378*/379380/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&381* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable382*/383const bool stc_test_en =384(ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&385pipeline->stencil_test_enable;386387/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&388* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&389* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)390*/391const bool stc_write_en =392(ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&393(cmd_buffer->state.gfx.dynamic.stencil_write_mask.front ||394cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) &&395pipeline->writes_stencil;396397/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */398const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;399400/* COMP_STC_EN || STC_WRITE_EN */401if (!(comp_stc_en || stc_write_en))402return false;403404/* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||405* 3DSTATE_WM::ForceKillPix == ON ||406* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||407* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||408* 3DSTATE_PS_BLEND::AlphaTestEnable ||409* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||410* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)411*/412return pipeline->kill_pixel ||413wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;414}415416void417genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)418{419struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;420struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;421422if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {423uint32_t topology;424if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))425topology = pipeline->topology;426else427topology = genX(vk_to_intel_primitive_type)[d->primitive_topology];428429cmd_buffer->state.gfx.primitive_topology = topology;430431anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {432vft.PrimitiveTopologyType = topology;433}434}435436if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |437ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {438uint32_t sf_dw[GENX(3DSTATE_SF_length)];439struct GENX(3DSTATE_SF) sf = {440GENX(3DSTATE_SF_header),441};442#if GFX_VER == 8443if (cmd_buffer->device->info.is_cherryview) {444sf.CHVLineWidth = d->line_width;445} else {446sf.LineWidth = d->line_width;447}448#else449sf.LineWidth = d->line_width,450#endif451GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);452anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);453}454455if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |456ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |457ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |458ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |459ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE |460ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {461/* Take dynamic primitive topology in to account with462* 3DSTATE_RASTER::APIMode463* 3DSTATE_RASTER::DXMultisampleRasterizationEnable464* 3DSTATE_RASTER::AntialiasingEnable465*/466uint32_t api_mode = 0;467bool msaa_raster_enable = false;468bool aa_enable = 0;469470if (cmd_buffer->state.gfx.pipeline->dynamic_states &471ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {472VkPrimitiveTopology primitive_topology =473cmd_buffer->state.gfx.dynamic.primitive_topology;474475VkPolygonMode dynamic_raster_mode =476genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,477primitive_topology);478479genX(rasterization_mode)(480dynamic_raster_mode, pipeline->line_mode, &api_mode,481&msaa_raster_enable);482483aa_enable =484anv_rasterization_aa_mode(dynamic_raster_mode,485pipeline->line_mode);486}487488uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];489struct GENX(3DSTATE_RASTER) raster = {490GENX(3DSTATE_RASTER_header),491.APIMode = api_mode,492.DXMultisampleRasterizationEnable = msaa_raster_enable,493.AntialiasingEnable = aa_enable,494.GlobalDepthOffsetConstant = d->depth_bias.bias,495.GlobalDepthOffsetScale = d->depth_bias.slope,496.GlobalDepthOffsetClamp = d->depth_bias.clamp,497.CullMode = genX(vk_to_intel_cullmode)[d->cull_mode],498.FrontWinding = genX(vk_to_intel_front_face)[d->front_face],499.GlobalDepthOffsetEnableSolid = d->depth_bias_enable,500.GlobalDepthOffsetEnableWireframe = d->depth_bias_enable,501.GlobalDepthOffsetEnablePoint = d->depth_bias_enable,502};503GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);504anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,505pipeline->gfx8.raster);506}507508/* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to509* 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split510* across different state packets for gfx8 and gfx9. We handle that by511* using a big old #if switch here.512*/513#if GFX_VER == 8514if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |515ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {516struct anv_state cc_state =517anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,518GENX(COLOR_CALC_STATE_length) * 4,51964);520struct GENX(COLOR_CALC_STATE) cc = {521.BlendConstantColorRed = d->blend_constants[0],522.BlendConstantColorGreen = d->blend_constants[1],523.BlendConstantColorBlue = d->blend_constants[2],524.BlendConstantColorAlpha = d->blend_constants[3],525.StencilReferenceValue = d->stencil_reference.front & 0xff,526.BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,527};528GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);529530anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {531ccp.ColorCalcStatePointer = cc_state.offset;532ccp.ColorCalcStatePointerValid = true;533}534}535536if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |537ANV_CMD_DIRTY_RENDER_TARGETS |538ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |539ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |540ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |541ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |542ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |543ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |544ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) {545uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];546547struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {548GENX(3DSTATE_WM_DEPTH_STENCIL_header),549550.StencilTestMask = d->stencil_compare_mask.front & 0xff,551.StencilWriteMask = d->stencil_write_mask.front & 0xff,552553.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,554.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,555556.StencilBufferWriteEnable =557(d->stencil_write_mask.front || d->stencil_write_mask.back) &&558d->stencil_test_enable,559560.DepthTestEnable = d->depth_test_enable,561.DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,562.DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op],563.StencilTestEnable = d->stencil_test_enable,564.StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op],565.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op],566.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op],567.StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op],568.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op],569.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op],570.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op],571.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op],572};573GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,574&wm_depth_stencil);575576anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,577pipeline->gfx8.wm_depth_stencil);578579genX(cmd_buffer_enable_pma_fix)(cmd_buffer,580want_depth_pma_fix(cmd_buffer));581}582#else583if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {584struct anv_state cc_state =585anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,586GENX(COLOR_CALC_STATE_length) * 4,58764);588struct GENX(COLOR_CALC_STATE) cc = {589.BlendConstantColorRed = d->blend_constants[0],590.BlendConstantColorGreen = d->blend_constants[1],591.BlendConstantColorBlue = d->blend_constants[2],592.BlendConstantColorAlpha = d->blend_constants[3],593};594GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);595596anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {597ccp.ColorCalcStatePointer = cc_state.offset;598ccp.ColorCalcStatePointerValid = true;599}600}601602if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |603ANV_CMD_DIRTY_RENDER_TARGETS |604ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |605ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |606ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |607ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |608ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |609ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |610ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |611ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) {612uint32_t dwords[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];613struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {614GENX(3DSTATE_WM_DEPTH_STENCIL_header),615616.StencilTestMask = d->stencil_compare_mask.front & 0xff,617.StencilWriteMask = d->stencil_write_mask.front & 0xff,618619.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,620.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,621622.StencilReferenceValue = d->stencil_reference.front & 0xff,623.BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,624625.StencilBufferWriteEnable =626(d->stencil_write_mask.front || d->stencil_write_mask.back) &&627d->stencil_test_enable,628629.DepthTestEnable = d->depth_test_enable,630.DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,631.DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op],632.StencilTestEnable = d->stencil_test_enable,633.StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op],634.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op],635.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op],636.StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op],637.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op],638.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op],639.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op],640.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op],641642};643GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil);644645anv_batch_emit_merge(&cmd_buffer->batch, dwords,646pipeline->gfx9.wm_depth_stencil);647648genX(cmd_buffer_enable_pma_fix)(cmd_buffer,649want_stencil_pma_fix(cmd_buffer));650}651#endif652653#if GFX_VER >= 12654if(cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |655ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS |656ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE)) {657anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {658db.DepthBoundsTestValueModifyDisable = false;659db.DepthBoundsTestEnableModifyDisable = false;660db.DepthBoundsTestEnable = d->depth_bounds_test_enable;661db.DepthBoundsTestMinValue = d->depth_bounds.min;662db.DepthBoundsTestMaxValue = d->depth_bounds.max;663}664}665#endif666667if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {668anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {669ls.LineStipplePattern = d->line_stipple.pattern;670ls.LineStippleInverseRepeatCount =6711.0f / MAX2(1, d->line_stipple.factor);672ls.LineStippleRepeatCount = d->line_stipple.factor;673}674}675676if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |677ANV_CMD_DIRTY_INDEX_BUFFER |678ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)) {679anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {680vf.IndexedDrawCutIndexEnable = d->primitive_restart_enable;681vf.CutIndex = cmd_buffer->state.restart_index;682}683}684685if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {686genX(emit_sample_pattern)(&cmd_buffer->batch,687cmd_buffer->state.gfx.dynamic.sample_locations.samples,688cmd_buffer->state.gfx.dynamic.sample_locations.locations);689}690691if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE ||692cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP) {693const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;694/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders695* threads.696*/697bool dirty_color_blend =698cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;699700if (dirty_color_blend) {701uint32_t dwords[MAX2(GENX(3DSTATE_WM_length),702GENX(3DSTATE_PS_BLEND_length))];703struct GENX(3DSTATE_WM) wm = {704GENX(3DSTATE_WM_header),705706.ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch ||707!color_writes) ? ForceON : 0,708};709GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);710711anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm);712713/* 3DSTATE_PS_BLEND to be consistent with the rest of the714* BLEND_STATE_ENTRY.715*/716struct GENX(3DSTATE_PS_BLEND) ps_blend = {717GENX(3DSTATE_PS_BLEND_header),718.HasWriteableRT = color_writes != 0,719};720GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend);721anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend);722}723724/* Blend states of each RT */725uint32_t surface_count = 0;726struct anv_pipeline_bind_map *map;727if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {728map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;729surface_count = map->surface_count;730}731732uint32_t blend_dws[GENX(BLEND_STATE_length) +733MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];734uint32_t *dws = blend_dws;735memset(blend_dws, 0, sizeof(blend_dws));736737/* Skip this part */738dws += GENX(BLEND_STATE_length);739740bool dirty_logic_op =741cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;742743for (uint32_t i = 0; i < surface_count; i++) {744struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];745bool write_disabled =746dirty_color_blend && (color_writes & (1u << binding->index)) == 0;747struct GENX(BLEND_STATE_ENTRY) entry = {748.WriteDisableAlpha = write_disabled,749.WriteDisableRed = write_disabled,750.WriteDisableGreen = write_disabled,751.WriteDisableBlue = write_disabled,752.LogicOpFunction =753dirty_logic_op ? genX(vk_to_intel_logic_op)[d->logic_op] : 0,754};755GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);756dws += GENX(BLEND_STATE_ENTRY_length);757}758759uint32_t num_dwords = GENX(BLEND_STATE_length) +760GENX(BLEND_STATE_ENTRY_length) * surface_count;761762struct anv_state blend_states =763anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,764pipeline->gfx8.blend_state, num_dwords, 64);765anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {766bsp.BlendStatePointer = blend_states.offset;767bsp.BlendStatePointerValid = true;768}769}770771#if GFX_VER >= 11772if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) {773struct anv_state cps_states = ANV_STATE_NULL;774775#if GFX_VER >= 12776uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;777cps_states =778anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,779GENX(CPS_STATE_length) * 4 * count,78032);781#endif /* GFX_VER >= 12 */782783genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, cps_states,784&cmd_buffer->state.gfx.dynamic);785}786#endif /* GFX_VER >= 11 */787788cmd_buffer->state.gfx.dirty = 0;789}790791static uint32_t vk_to_intel_index_type(VkIndexType type)792{793switch (type) {794case VK_INDEX_TYPE_UINT8_EXT:795return INDEX_BYTE;796case VK_INDEX_TYPE_UINT16:797return INDEX_WORD;798case VK_INDEX_TYPE_UINT32:799return INDEX_DWORD;800default:801unreachable("invalid index type");802}803}804805static uint32_t restart_index_for_type(VkIndexType type)806{807switch (type) {808case VK_INDEX_TYPE_UINT8_EXT:809return UINT8_MAX;810case VK_INDEX_TYPE_UINT16:811return UINT16_MAX;812case VK_INDEX_TYPE_UINT32:813return UINT32_MAX;814default:815unreachable("invalid index type");816}817}818819void genX(CmdBindIndexBuffer)(820VkCommandBuffer commandBuffer,821VkBuffer _buffer,822VkDeviceSize offset,823VkIndexType indexType)824{825ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);826ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);827828cmd_buffer->state.restart_index = restart_index_for_type(indexType);829830anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {831ib.IndexFormat = vk_to_intel_index_type(indexType);832ib.MOCS = anv_mocs(cmd_buffer->device,833buffer->address.bo,834ISL_SURF_USAGE_INDEX_BUFFER_BIT);835#if GFX_VER >= 12836ib.L3BypassDisable = true;837#endif838ib.BufferStartingAddress = anv_address_add(buffer->address, offset);839ib.BufferSize = buffer->size - offset;840}841842cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;843}844845846