Path: blob/21.2-virgl/src/intel/blorp/blorp_genX_exec.h
7227 views
/*1* Copyright © 2016 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#ifndef BLORP_GENX_EXEC_H24#define BLORP_GENX_EXEC_H2526#include "blorp_priv.h"27#include "dev/intel_device_info.h"28#include "common/intel_sample_positions.h"29#include "common/intel_l3_config.h"30#include "genxml/gen_macros.h"3132/**33* This file provides the blorp pipeline setup and execution functionality.34* It defines the following function:35*36* static void37* blorp_exec(struct blorp_context *blorp, void *batch_data,38* const struct blorp_params *params);39*40* It is the job of whoever includes this header to wrap this in something41* to get an externally visible symbol.42*43* In order for the blorp_exec function to work, the driver must provide44* implementations of the following static helper functions.45*/4647static void *48blorp_emit_dwords(struct blorp_batch *batch, unsigned n);4950static uint64_t51blorp_emit_reloc(struct blorp_batch *batch,52void *location, struct blorp_address address, uint32_t delta);5354static void55blorp_measure_start(struct blorp_batch *batch,56const struct blorp_params *params);5758static void *59blorp_alloc_dynamic_state(struct blorp_batch *batch,60uint32_t size,61uint32_t alignment,62uint32_t *offset);63static void *64blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,65struct blorp_address *addr);66static void67blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,68const struct blorp_address *addrs,69uint32_t *sizes,70unsigned num_vbs);7172UNUSED static struct blorp_address73blorp_get_workaround_address(struct blorp_batch *batch);7475static void76blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,77unsigned state_size, unsigned state_alignment,78uint32_t *bt_offset, uint32_t *surface_offsets,79void **surface_maps);8081static void82blorp_flush_range(struct blorp_batch *batch, void *start, size_t size);8384static void85blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,86struct blorp_address address, uint32_t delta);8788static uint64_t89blorp_get_surface_address(struct blorp_batch *batch,90struct blorp_address address);9192#if GFX_VER >= 7 && GFX_VER < 1093static struct blorp_address94blorp_get_surface_base_address(struct blorp_batch *batch);95#endif9697#if GFX_VER >= 798static const struct intel_l3_config *99blorp_get_l3_config(struct blorp_batch *batch);100# else101static void102blorp_emit_urb_config(struct blorp_batch *batch,103unsigned vs_entry_size, unsigned sf_entry_size);104#endif105106static void107blorp_emit_pipeline(struct blorp_batch *batch,108const struct blorp_params *params);109110/***** BEGIN blorp_exec implementation ******/111112static uint64_t113_blorp_combine_address(struct blorp_batch *batch, void *location,114struct blorp_address address, uint32_t delta)115{116if (address.buffer == NULL) {117return address.offset + delta;118} else {119return blorp_emit_reloc(batch, location, address, delta);120}121}122123#define __gen_address_type struct blorp_address124#define __gen_user_data struct blorp_batch125#define __gen_combine_address _blorp_combine_address126127#include "genxml/genX_pack.h"128129#define _blorp_cmd_length(cmd) cmd ## _length130#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias131#define _blorp_cmd_header(cmd) cmd ## _header132#define _blorp_cmd_pack(cmd) cmd ## _pack133134#define blorp_emit(batch, cmd, name) \135for (struct cmd name = { _blorp_cmd_header(cmd) }, \136*_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \137__builtin_expect(_dst != NULL, 1); \138_blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \139_dst = NULL)140141#define blorp_emitn(batch, cmd, n, ...) ({ \142uint32_t *_dw = blorp_emit_dwords(batch, n); \143if (_dw) { \144struct cmd template = { \145_blorp_cmd_header(cmd), \146.DWordLength = n - _blorp_cmd_length_bias(cmd), \147__VA_ARGS__ \148}; \149_blorp_cmd_pack(cmd)(batch, _dw, &template); \150} \151_dw ? _dw + 1 : NULL; /* Array starts at dw[1] */ \152})153154#define STRUCT_ZERO(S) ({ struct S t; memset(&t, 0, sizeof(t)); t; })155156#define blorp_emit_dynamic(batch, state, name, align, offset) \157for (struct state name = STRUCT_ZERO(state), \158*_dst = blorp_alloc_dynamic_state(batch, \159_blorp_cmd_length(state) * 4, \160align, offset); \161__builtin_expect(_dst != NULL, 1); \162_blorp_cmd_pack(state)(batch, (void *)_dst, &name), \163blorp_flush_range(batch, _dst, _blorp_cmd_length(state) * 4), \164_dst = NULL)165166/* 3DSTATE_URB167* 3DSTATE_URB_VS168* 3DSTATE_URB_HS169* 3DSTATE_URB_DS170* 3DSTATE_URB_GS171*172* Assign the entire URB to the VS. Even though the VS disabled, URB space173* is still needed because the clipper loads the VUE's from the URB. From174* the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,175* Dword 1.15:0 "VS Number of URB Entries":176* This field is always used (even if VS Function Enable is DISABLED).177*178* The warning below appears in the PRM (Section 3DSTATE_URB), but we can179* safely ignore it because this batch contains only one draw call.180* Because of URB corruption caused by allocating a previous GS unit181* URB entry to the VS unit, software is required to send a “GS NULL182* Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)183* plus a dummy DRAW call before any case where VS will be taking over184* GS URB space.185*186* If the 3DSTATE_URB_VS is emitted, than the others must be also.187* From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:188*189* 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be190* programmed in order for the programming of this state to be191* valid.192*/193static void194emit_urb_config(struct blorp_batch *batch,195const struct blorp_params *params,196UNUSED enum intel_urb_deref_block_size *deref_block_size)197{198/* Once vertex fetcher has written full VUE entries with complete199* header the space requirement is as follows per vertex (in bytes):200*201* Header Position Program constants202* +--------+------------+-------------------+203* | 16 | 16 | n x 16 |204* +--------+------------+-------------------+205*206* where 'n' stands for number of varying inputs expressed as vec4s.207*/208const unsigned num_varyings =209params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;210const unsigned total_needed = 16 + 16 + num_varyings * 16;211212/* The URB size is expressed in units of 64 bytes (512 bits) */213const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);214215ASSERTED const unsigned sf_entry_size =216params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;217218#if GFX_VER >= 7219assert(sf_entry_size == 0);220const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };221222unsigned entries[4], start[4];223bool constrained;224intel_get_urb_config(batch->blorp->compiler->devinfo,225blorp_get_l3_config(batch),226false, false, entry_size,227entries, start, deref_block_size, &constrained);228229#if GFX_VERx10 == 70230/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:231*232* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall233* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,234* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,235* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL236* needs to be sent before any combination of VS associated 3DSTATE."237*/238blorp_emit(batch, GENX(PIPE_CONTROL), pc) {239pc.DepthStallEnable = true;240pc.PostSyncOperation = WriteImmediateData;241pc.Address = blorp_get_workaround_address(batch);242}243#endif244245for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {246blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {247urb._3DCommandSubOpcode += i;248urb.VSURBStartingAddress = start[i];249urb.VSURBEntryAllocationSize = entry_size[i] - 1;250urb.VSNumberofURBEntries = entries[i];251}252}253#else /* GFX_VER < 7 */254blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);255#endif256}257258#if GFX_VER >= 7259static void260blorp_emit_memcpy(struct blorp_batch *batch,261struct blorp_address dst,262struct blorp_address src,263uint32_t size);264#endif265266static void267blorp_emit_vertex_data(struct blorp_batch *batch,268const struct blorp_params *params,269struct blorp_address *addr,270uint32_t *size)271{272const float vertices[] = {273/* v0 */ (float)params->x1, (float)params->y1, params->z,274/* v1 */ (float)params->x0, (float)params->y1, params->z,275/* v2 */ (float)params->x0, (float)params->y0, params->z,276};277278void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);279memcpy(data, vertices, sizeof(vertices));280*size = sizeof(vertices);281blorp_flush_range(batch, data, *size);282}283284static void285blorp_emit_input_varying_data(struct blorp_batch *batch,286const struct blorp_params *params,287struct blorp_address *addr,288uint32_t *size)289{290const unsigned vec4_size_in_bytes = 4 * sizeof(float);291const unsigned max_num_varyings =292DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);293const unsigned num_varyings =294params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;295296*size = 16 + num_varyings * vec4_size_in_bytes;297298const uint32_t *const inputs_src = (const uint32_t *)¶ms->wm_inputs;299void *data = blorp_alloc_vertex_buffer(batch, *size, addr);300uint32_t *inputs = data;301302/* Copy in the VS inputs */303assert(sizeof(params->vs_inputs) == 16);304memcpy(inputs, ¶ms->vs_inputs, sizeof(params->vs_inputs));305inputs += 4;306307if (params->wm_prog_data) {308/* Walk over the attribute slots, determine if the attribute is used by309* the program and when necessary copy the values from the input storage310* to the vertex data buffer.311*/312for (unsigned i = 0; i < max_num_varyings; i++) {313const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;314315const int input_index = params->wm_prog_data->urb_setup[attr];316if (input_index < 0)317continue;318319memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);320321inputs += 4;322}323}324325blorp_flush_range(batch, data, *size);326327if (params->dst_clear_color_as_input) {328#if GFX_VER >= 7329/* In this case, the clear color isn't known statically and instead330* comes in through an indirect which we have to copy into the vertex331* buffer before we execute the 3DPRIMITIVE. We already copied the332* value of params->wm_inputs.clear_color into the vertex buffer in the333* loop above. Now we emit code to stomp it from the GPU with the334* actual clear color value.335*/336assert(num_varyings == 1);337338/* The clear color is the first thing after the header */339struct blorp_address clear_color_input_addr = *addr;340clear_color_input_addr.offset += 16;341342const unsigned clear_color_size =343GFX_VER < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;344blorp_emit_memcpy(batch, clear_color_input_addr,345params->dst.clear_color_addr,346clear_color_size);347#else348unreachable("MCS partial resolve is not a thing on SNB and earlier");349#endif350}351}352353static void354blorp_fill_vertex_buffer_state(struct GENX(VERTEX_BUFFER_STATE) *vb,355unsigned idx,356struct blorp_address addr, uint32_t size,357uint32_t stride)358{359vb[idx].VertexBufferIndex = idx;360vb[idx].BufferStartingAddress = addr;361vb[idx].BufferPitch = stride;362363#if GFX_VER >= 6364vb[idx].MOCS = addr.mocs;365#endif366367#if GFX_VER >= 7368vb[idx].AddressModifyEnable = true;369#endif370371#if GFX_VER >= 8372vb[idx].BufferSize = size;373#elif GFX_VER >= 5374vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;375vb[idx].EndAddress = vb[idx].BufferStartingAddress;376vb[idx].EndAddress.offset += size - 1;377#elif GFX_VER == 4378vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;379vb[idx].MaxIndex = stride > 0 ? size / stride : 0;380#endif381382#if GFX_VER >= 12383vb[idx].L3BypassDisable = true;384#endif385}386387static void388blorp_emit_vertex_buffers(struct blorp_batch *batch,389const struct blorp_params *params)390{391struct GENX(VERTEX_BUFFER_STATE) vb[3];392uint32_t num_vbs = 2;393memset(vb, 0, sizeof(vb));394395struct blorp_address addrs[2] = {};396uint32_t sizes[2];397blorp_emit_vertex_data(batch, params, &addrs[0], &sizes[0]);398blorp_fill_vertex_buffer_state(vb, 0, addrs[0], sizes[0],3993 * sizeof(float));400401blorp_emit_input_varying_data(batch, params, &addrs[1], &sizes[1]);402blorp_fill_vertex_buffer_state(vb, 1, addrs[1], sizes[1], 0);403404blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, sizes, num_vbs);405406const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);407uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);408if (!dw)409return;410411for (unsigned i = 0; i < num_vbs; i++) {412GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);413dw += GENX(VERTEX_BUFFER_STATE_length);414}415}416417static void418blorp_emit_vertex_elements(struct blorp_batch *batch,419const struct blorp_params *params)420{421const unsigned num_varyings =422params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;423bool need_ndc = batch->blorp->compiler->devinfo->ver <= 5;424const unsigned num_elements = 2 + need_ndc + num_varyings;425426struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];427memset(ve, 0, num_elements * sizeof(*ve));428429/* Setup VBO for the rectangle primitive..430*431* A rectangle primitive (3DPRIM_RECTLIST) consists of only three432* vertices. The vertices reside in screen space with DirectX433* coordinates (that is, (0, 0) is the upper left corner).434*435* v2 ------ implied436* | |437* | |438* v1 ----- v0439*440* Since the VS is disabled, the clipper loads each VUE directly from441* the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and442* 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:443* dw0: Reserved, MBZ.444* dw1: Render Target Array Index. Below vertex fetcher gets programmed445* to assign this with primitive instance identifier which will be446* used for layered clears. All other renders have only one instance447* and therefore the value will be effectively zero.448* dw2: Viewport Index. The HiZ op disables viewport mapping and449* scissoring, so set the dword to 0.450* dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,451* so set the dword to 0.452* dw4: Vertex Position X.453* dw5: Vertex Position Y.454* dw6: Vertex Position Z.455* dw7: Vertex Position W.456*457* dw8: Flat vertex input 0458* dw9: Flat vertex input 1459* ...460* dwn: Flat vertex input n - 8461*462* For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1463* "Vertex URB Entry (VUE) Formats".464*465* Only vertex position X and Y are going to be variable, Z is fixed to466* zero and W to one. Header words dw0,2,3 are zero. There is no need to467* include the fixed values in the vertex buffer. Vertex fetcher can be468* instructed to fill vertex elements with constant values of one and zero469* instead of reading them from the buffer.470* Flat inputs are program constants that are not interpolated. Moreover471* their values will be the same between vertices.472*473* See the vertex element setup below.474*/475unsigned slot = 0;476477ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {478.VertexBufferIndex = 1,479.Valid = true,480.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,481.SourceElementOffset = 0,482.Component0Control = VFCOMP_STORE_SRC,483484/* From Gfx8 onwards hardware is no more instructed to overwrite485* components using an element specifier. Instead one has separate486* 3DSTATE_VF_SGVS (System Generated Value Setup) state packet for it.487*/488#if GFX_VER >= 8489.Component1Control = VFCOMP_STORE_0,490#elif GFX_VER >= 5491.Component1Control = VFCOMP_STORE_IID,492#else493.Component1Control = VFCOMP_STORE_0,494#endif495.Component2Control = VFCOMP_STORE_0,496.Component3Control = VFCOMP_STORE_0,497#if GFX_VER <= 5498.DestinationElementOffset = slot * 4,499#endif500};501slot++;502503#if GFX_VER <= 5504/* On Iron Lake and earlier, a native device coordinates version of the505* position goes right after the normal VUE header and before position.506* Since w == 1 for all of our coordinates, this is just a copy of the507* position.508*/509ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {510.VertexBufferIndex = 0,511.Valid = true,512.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,513.SourceElementOffset = 0,514.Component0Control = VFCOMP_STORE_SRC,515.Component1Control = VFCOMP_STORE_SRC,516.Component2Control = VFCOMP_STORE_SRC,517.Component3Control = VFCOMP_STORE_1_FP,518.DestinationElementOffset = slot * 4,519};520slot++;521#endif522523ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {524.VertexBufferIndex = 0,525.Valid = true,526.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT,527.SourceElementOffset = 0,528.Component0Control = VFCOMP_STORE_SRC,529.Component1Control = VFCOMP_STORE_SRC,530.Component2Control = VFCOMP_STORE_SRC,531.Component3Control = VFCOMP_STORE_1_FP,532#if GFX_VER <= 5533.DestinationElementOffset = slot * 4,534#endif535};536slot++;537538for (unsigned i = 0; i < num_varyings; ++i) {539ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {540.VertexBufferIndex = 1,541.Valid = true,542.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,543.SourceElementOffset = 16 + i * 4 * sizeof(float),544.Component0Control = VFCOMP_STORE_SRC,545.Component1Control = VFCOMP_STORE_SRC,546.Component2Control = VFCOMP_STORE_SRC,547.Component3Control = VFCOMP_STORE_SRC,548#if GFX_VER <= 5549.DestinationElementOffset = slot * 4,550#endif551};552slot++;553}554555const unsigned num_dwords =5561 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;557uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);558if (!dw)559return;560561for (unsigned i = 0; i < num_elements; i++) {562GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);563dw += GENX(VERTEX_ELEMENT_STATE_length);564}565566blorp_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {567vf.StatisticsEnable = false;568}569570#if GFX_VER >= 8571/* Overwrite Render Target Array Index (2nd dword) in the VUE header with572* primitive instance identifier. This is used for layered clears.573*/574blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {575sgvs.InstanceIDEnable = true;576sgvs.InstanceIDComponentNumber = COMP_1;577sgvs.InstanceIDElementOffset = 0;578}579580for (unsigned i = 0; i < num_elements; i++) {581blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {582vf.VertexElementIndex = i;583vf.InstancingEnable = false;584}585}586587blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {588topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;589}590#endif591}592593/* 3DSTATE_VIEWPORT_STATE_POINTERS */594static uint32_t595blorp_emit_cc_viewport(struct blorp_batch *batch)596{597uint32_t cc_vp_offset;598blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) {599vp.MinimumDepth = 0.0;600vp.MaximumDepth = 1.0;601}602603#if GFX_VER >= 7604blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {605vsp.CCViewportPointer = cc_vp_offset;606}607#elif GFX_VER == 6608blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {609vsp.CCViewportStateChange = true;610vsp.PointertoCC_VIEWPORT = cc_vp_offset;611}612#endif613614return cc_vp_offset;615}616617static uint32_t618blorp_emit_sampler_state(struct blorp_batch *batch)619{620uint32_t offset;621blorp_emit_dynamic(batch, GENX(SAMPLER_STATE), sampler, 32, &offset) {622sampler.MipModeFilter = MIPFILTER_NONE;623sampler.MagModeFilter = MAPFILTER_LINEAR;624sampler.MinModeFilter = MAPFILTER_LINEAR;625sampler.MinLOD = 0;626sampler.MaxLOD = 0;627sampler.TCXAddressControlMode = TCM_CLAMP;628sampler.TCYAddressControlMode = TCM_CLAMP;629sampler.TCZAddressControlMode = TCM_CLAMP;630sampler.MaximumAnisotropy = RATIO21;631sampler.RAddressMinFilterRoundingEnable = true;632sampler.RAddressMagFilterRoundingEnable = true;633sampler.VAddressMinFilterRoundingEnable = true;634sampler.VAddressMagFilterRoundingEnable = true;635sampler.UAddressMinFilterRoundingEnable = true;636sampler.UAddressMagFilterRoundingEnable = true;637#if GFX_VER > 6638sampler.NonnormalizedCoordinateEnable = true;639#endif640}641642#if GFX_VER >= 7643blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {644ssp.PointertoPSSamplerState = offset;645}646#elif GFX_VER == 6647blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {648ssp.VSSamplerStateChange = true;649ssp.GSSamplerStateChange = true;650ssp.PSSamplerStateChange = true;651ssp.PointertoPSSamplerState = offset;652}653#endif654655return offset;656}657658/* What follows is the code for setting up a "pipeline" on Sandy Bridge and659* later hardware. This file will be included by i965 for gfx4-5 as well, so660* this code is guarded by GFX_VER >= 6.661*/662#if GFX_VER >= 6663664static void665blorp_emit_vs_config(struct blorp_batch *batch,666const struct blorp_params *params)667{668struct brw_vs_prog_data *vs_prog_data = params->vs_prog_data;669assert(!vs_prog_data || GFX_VER < 11 ||670vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8);671672blorp_emit(batch, GENX(3DSTATE_VS), vs) {673if (vs_prog_data) {674vs.Enable = true;675676vs.KernelStartPointer = params->vs_prog_kernel;677678vs.DispatchGRFStartRegisterForURBData =679vs_prog_data->base.base.dispatch_grf_start_reg;680vs.VertexURBEntryReadLength =681vs_prog_data->base.urb_read_length;682vs.VertexURBEntryReadOffset = 0;683684vs.MaximumNumberofThreads =685batch->blorp->isl_dev->info->max_vs_threads - 1;686687#if GFX_VER >= 8688vs.SIMD8DispatchEnable =689vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;690#endif691}692}693}694695static void696blorp_emit_sf_config(struct blorp_batch *batch,697const struct blorp_params *params,698UNUSED enum intel_urb_deref_block_size urb_deref_block_size)699{700const struct brw_wm_prog_data *prog_data = params->wm_prog_data;701702/* 3DSTATE_SF703*704* Disable ViewportTransformEnable (dw2.1)705*706* From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D707* Primitives Overview":708* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the709* use of screen- space coordinates).710*711* A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)712* and BackFaceFillMode (dw2.5:6) to SOLID(0).713*714* From the Sandy Bridge PRM, Volume 2, Part 1, Section715* 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:716* SOLID: Any triangle or rectangle object found to be front-facing717* is rendered as a solid object. This setting is required when718* (rendering rectangle (RECTLIST) objects.719*/720721#if GFX_VER >= 8722723blorp_emit(batch, GENX(3DSTATE_SF), sf) {724#if GFX_VER >= 12725sf.DerefBlockSize = urb_deref_block_size;726#endif727}728729blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {730raster.CullMode = CULLMODE_NONE;731}732733blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {734sbe.VertexURBEntryReadOffset = 1;735if (prog_data) {736sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;737sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);738sbe.ConstantInterpolationEnable = prog_data->flat_inputs;739} else {740sbe.NumberofSFOutputAttributes = 0;741sbe.VertexURBEntryReadLength = 1;742}743sbe.ForceVertexURBEntryReadLength = true;744sbe.ForceVertexURBEntryReadOffset = true;745746#if GFX_VER >= 9747for (unsigned i = 0; i < 32; i++)748sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;749#endif750}751752#elif GFX_VER >= 7753754blorp_emit(batch, GENX(3DSTATE_SF), sf) {755sf.FrontFaceFillMode = FILL_MODE_SOLID;756sf.BackFaceFillMode = FILL_MODE_SOLID;757758sf.MultisampleRasterizationMode = params->num_samples > 1 ?759MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;760761#if GFX_VER == 7762sf.DepthBufferSurfaceFormat = params->depth_format;763#endif764}765766blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {767sbe.VertexURBEntryReadOffset = 1;768if (prog_data) {769sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;770sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);771sbe.ConstantInterpolationEnable = prog_data->flat_inputs;772} else {773sbe.NumberofSFOutputAttributes = 0;774sbe.VertexURBEntryReadLength = 1;775}776}777778#else /* GFX_VER <= 6 */779780blorp_emit(batch, GENX(3DSTATE_SF), sf) {781sf.FrontFaceFillMode = FILL_MODE_SOLID;782sf.BackFaceFillMode = FILL_MODE_SOLID;783784sf.MultisampleRasterizationMode = params->num_samples > 1 ?785MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;786787sf.VertexURBEntryReadOffset = 1;788if (prog_data) {789sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;790sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);791sf.ConstantInterpolationEnable = prog_data->flat_inputs;792} else {793sf.NumberofSFOutputAttributes = 0;794sf.VertexURBEntryReadLength = 1;795}796}797798#endif /* GFX_VER */799}800801static void802blorp_emit_ps_config(struct blorp_batch *batch,803const struct blorp_params *params)804{805const struct brw_wm_prog_data *prog_data = params->wm_prog_data;806807/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be808* nonzero to prevent the GPU from hanging. While the documentation doesn't809* mention this explicitly, it notes that the valid range for the field is810* [1,39] = [2,40] threads, which excludes zero.811*812* To be safe (and to minimize extraneous code) we go ahead and fully813* configure the WM state whether or not there is a WM program.814*/815816#if GFX_VER >= 8817818blorp_emit(batch, GENX(3DSTATE_WM), wm);819820blorp_emit(batch, GENX(3DSTATE_PS), ps) {821if (params->src.enabled) {822ps.SamplerCount = 1; /* Up to 4 samplers */823ps.BindingTableEntryCount = 2;824} else {825ps.BindingTableEntryCount = 1;826}827828/* SAMPLER_STATE prefetching is broken on Gfx11 - Wa_1606682166 */829if (GFX_VER == 11)830ps.SamplerCount = 0;831832if (prog_data) {833ps._8PixelDispatchEnable = prog_data->dispatch_8;834ps._16PixelDispatchEnable = prog_data->dispatch_16;835ps._32PixelDispatchEnable = prog_data->dispatch_32;836837/* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:838*839* "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32840* Dispatch must not be enabled for PER_PIXEL dispatch mode."841*842* Since 16x MSAA is first introduced on SKL, we don't need to apply843* the workaround on any older hardware.844*/845if (GFX_VER >= 9 && !prog_data->persample_dispatch &&846params->num_samples == 16) {847assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);848ps._32PixelDispatchEnable = false;849}850851ps.DispatchGRFStartRegisterForConstantSetupData0 =852brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);853ps.DispatchGRFStartRegisterForConstantSetupData1 =854brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);855ps.DispatchGRFStartRegisterForConstantSetupData2 =856brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);857858ps.KernelStartPointer0 = params->wm_prog_kernel +859brw_wm_prog_data_prog_offset(prog_data, ps, 0);860ps.KernelStartPointer1 = params->wm_prog_kernel +861brw_wm_prog_data_prog_offset(prog_data, ps, 1);862ps.KernelStartPointer2 = params->wm_prog_kernel +863brw_wm_prog_data_prog_offset(prog_data, ps, 2);864}865866/* 3DSTATE_PS expects the number of threads per PSD, which is always 64867* for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is868* k, it implies 2(k+1) threads. It implicitly scales for different GT869* levels (which have some # of PSDs).870*871* In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.872*/873if (GFX_VER >= 9)874ps.MaximumNumberofThreadsPerPSD = 64 - 1;875else876ps.MaximumNumberofThreadsPerPSD = 64 - 2;877878switch (params->fast_clear_op) {879case ISL_AUX_OP_NONE:880break;881#if GFX_VER >= 10882case ISL_AUX_OP_AMBIGUATE:883ps.RenderTargetFastClearEnable = true;884ps.RenderTargetResolveType = FAST_CLEAR_0;885break;886#endif887#if GFX_VER >= 9888case ISL_AUX_OP_PARTIAL_RESOLVE:889ps.RenderTargetResolveType = RESOLVE_PARTIAL;890break;891case ISL_AUX_OP_FULL_RESOLVE:892ps.RenderTargetResolveType = RESOLVE_FULL;893break;894#else895case ISL_AUX_OP_FULL_RESOLVE:896ps.RenderTargetResolveEnable = true;897break;898#endif899case ISL_AUX_OP_FAST_CLEAR:900ps.RenderTargetFastClearEnable = true;901break;902default:903unreachable("Invalid fast clear op");904}905}906907blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {908if (prog_data) {909psx.PixelShaderValid = true;910psx.AttributeEnable = prog_data->num_varying_inputs > 0;911psx.PixelShaderIsPerSample = prog_data->persample_dispatch;912psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;913#if GFX_VER >= 9914psx.PixelShaderComputesStencil = prog_data->computed_stencil;915#endif916}917918if (params->src.enabled)919psx.PixelShaderKillsPixel = true;920}921922#elif GFX_VER >= 7923924blorp_emit(batch, GENX(3DSTATE_WM), wm) {925switch (params->hiz_op) {926case ISL_AUX_OP_FAST_CLEAR:927wm.DepthBufferClear = true;928break;929case ISL_AUX_OP_FULL_RESOLVE:930wm.DepthBufferResolveEnable = true;931break;932case ISL_AUX_OP_AMBIGUATE:933wm.HierarchicalDepthBufferResolveEnable = true;934break;935case ISL_AUX_OP_NONE:936break;937default:938unreachable("not reached");939}940941if (prog_data) {942wm.ThreadDispatchEnable = true;943wm.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;944}945946if (params->src.enabled)947wm.PixelShaderKillsPixel = true;948949if (params->num_samples > 1) {950wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;951wm.MultisampleDispatchMode =952(prog_data && prog_data->persample_dispatch) ?953MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;954} else {955wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;956wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;957}958}959960blorp_emit(batch, GENX(3DSTATE_PS), ps) {961ps.MaximumNumberofThreads =962batch->blorp->isl_dev->info->max_wm_threads - 1;963964#if GFX_VERx10 == 75965ps.SampleMask = 1;966#endif967968if (prog_data) {969ps._8PixelDispatchEnable = prog_data->dispatch_8;970ps._16PixelDispatchEnable = prog_data->dispatch_16;971ps._32PixelDispatchEnable = prog_data->dispatch_32;972973ps.DispatchGRFStartRegisterForConstantSetupData0 =974brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);975ps.DispatchGRFStartRegisterForConstantSetupData1 =976brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);977ps.DispatchGRFStartRegisterForConstantSetupData2 =978brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);979980ps.KernelStartPointer0 = params->wm_prog_kernel +981brw_wm_prog_data_prog_offset(prog_data, ps, 0);982ps.KernelStartPointer1 = params->wm_prog_kernel +983brw_wm_prog_data_prog_offset(prog_data, ps, 1);984ps.KernelStartPointer2 = params->wm_prog_kernel +985brw_wm_prog_data_prog_offset(prog_data, ps, 2);986987ps.AttributeEnable = prog_data->num_varying_inputs > 0;988} else {989/* Gfx7 hardware gets angry if we don't enable at least one dispatch990* mode, so just enable 16-pixel dispatch if we don't have a program.991*/992ps._16PixelDispatchEnable = true;993}994995if (params->src.enabled)996ps.SamplerCount = 1; /* Up to 4 samplers */997998switch (params->fast_clear_op) {999case ISL_AUX_OP_NONE:1000break;1001case ISL_AUX_OP_FULL_RESOLVE:1002ps.RenderTargetResolveEnable = true;1003break;1004case ISL_AUX_OP_FAST_CLEAR:1005ps.RenderTargetFastClearEnable = true;1006break;1007default:1008unreachable("Invalid fast clear op");1009}1010}10111012#else /* GFX_VER <= 6 */10131014blorp_emit(batch, GENX(3DSTATE_WM), wm) {1015wm.MaximumNumberofThreads =1016batch->blorp->isl_dev->info->max_wm_threads - 1;10171018switch (params->hiz_op) {1019case ISL_AUX_OP_FAST_CLEAR:1020wm.DepthBufferClear = true;1021break;1022case ISL_AUX_OP_FULL_RESOLVE:1023wm.DepthBufferResolveEnable = true;1024break;1025case ISL_AUX_OP_AMBIGUATE:1026wm.HierarchicalDepthBufferResolveEnable = true;1027break;1028case ISL_AUX_OP_NONE:1029break;1030default:1031unreachable("not reached");1032}10331034if (prog_data) {1035wm.ThreadDispatchEnable = true;10361037wm._8PixelDispatchEnable = prog_data->dispatch_8;1038wm._16PixelDispatchEnable = prog_data->dispatch_16;1039wm._32PixelDispatchEnable = prog_data->dispatch_32;10401041wm.DispatchGRFStartRegisterForConstantSetupData0 =1042brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0);1043wm.DispatchGRFStartRegisterForConstantSetupData1 =1044brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1);1045wm.DispatchGRFStartRegisterForConstantSetupData2 =1046brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2);10471048wm.KernelStartPointer0 = params->wm_prog_kernel +1049brw_wm_prog_data_prog_offset(prog_data, wm, 0);1050wm.KernelStartPointer1 = params->wm_prog_kernel +1051brw_wm_prog_data_prog_offset(prog_data, wm, 1);1052wm.KernelStartPointer2 = params->wm_prog_kernel +1053brw_wm_prog_data_prog_offset(prog_data, wm, 2);10541055wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;1056}10571058if (params->src.enabled) {1059wm.SamplerCount = 1; /* Up to 4 samplers */1060wm.PixelShaderKillsPixel = true; /* TODO: temporarily smash on */1061}10621063if (params->num_samples > 1) {1064wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;1065wm.MultisampleDispatchMode =1066(prog_data && prog_data->persample_dispatch) ?1067MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;1068} else {1069wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;1070wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;1071}1072}10731074#endif /* GFX_VER */1075}10761077static uint32_t1078blorp_emit_blend_state(struct blorp_batch *batch,1079const struct blorp_params *params)1080{1081struct GENX(BLEND_STATE) blend = { };10821083uint32_t offset;1084int size = GENX(BLEND_STATE_length) * 4;1085size += GENX(BLEND_STATE_ENTRY_length) * 4 * params->num_draw_buffers;1086uint32_t *state = blorp_alloc_dynamic_state(batch, size, 64, &offset);1087uint32_t *pos = state;10881089GENX(BLEND_STATE_pack)(NULL, pos, &blend);1090pos += GENX(BLEND_STATE_length);10911092for (unsigned i = 0; i < params->num_draw_buffers; ++i) {1093struct GENX(BLEND_STATE_ENTRY) entry = {1094.PreBlendColorClampEnable = true,1095.PostBlendColorClampEnable = true,1096.ColorClampRange = COLORCLAMP_RTFORMAT,10971098.WriteDisableRed = params->color_write_disable[0],1099.WriteDisableGreen = params->color_write_disable[1],1100.WriteDisableBlue = params->color_write_disable[2],1101.WriteDisableAlpha = params->color_write_disable[3],1102};1103GENX(BLEND_STATE_ENTRY_pack)(NULL, pos, &entry);1104pos += GENX(BLEND_STATE_ENTRY_length);1105}11061107blorp_flush_range(batch, state, size);11081109#if GFX_VER >= 71110blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {1111sp.BlendStatePointer = offset;1112#if GFX_VER >= 81113sp.BlendStatePointerValid = true;1114#endif1115}1116#endif11171118#if GFX_VER >= 81119blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {1120ps_blend.HasWriteableRT = true;1121}1122#endif11231124return offset;1125}11261127static uint32_t1128blorp_emit_color_calc_state(struct blorp_batch *batch,1129UNUSED const struct blorp_params *params)1130{1131uint32_t offset;1132blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {1133#if GFX_VER <= 81134cc.StencilReferenceValue = params->stencil_ref;1135#endif1136}11371138#if GFX_VER >= 71139blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {1140sp.ColorCalcStatePointer = offset;1141#if GFX_VER >= 81142sp.ColorCalcStatePointerValid = true;1143#endif1144}1145#endif11461147return offset;1148}11491150static uint32_t1151blorp_emit_depth_stencil_state(struct blorp_batch *batch,1152const struct blorp_params *params)1153{1154#if GFX_VER >= 81155struct GENX(3DSTATE_WM_DEPTH_STENCIL) ds = {1156GENX(3DSTATE_WM_DEPTH_STENCIL_header),1157};1158#else1159struct GENX(DEPTH_STENCIL_STATE) ds = { 0 };1160#endif11611162if (params->depth.enabled) {1163ds.DepthBufferWriteEnable = true;11641165switch (params->hiz_op) {1166/* See the following sections of the Sandy Bridge PRM, Volume 2, Part1:1167* - 7.5.3.1 Depth Buffer Clear1168* - 7.5.3.2 Depth Buffer Resolve1169* - 7.5.3.3 Hierarchical Depth Buffer Resolve1170*/1171case ISL_AUX_OP_FULL_RESOLVE:1172ds.DepthTestEnable = true;1173ds.DepthTestFunction = COMPAREFUNCTION_NEVER;1174break;11751176case ISL_AUX_OP_NONE:1177case ISL_AUX_OP_FAST_CLEAR:1178case ISL_AUX_OP_AMBIGUATE:1179ds.DepthTestEnable = false;1180break;1181case ISL_AUX_OP_PARTIAL_RESOLVE:1182unreachable("Invalid HIZ op");1183}1184}11851186if (params->stencil.enabled) {1187ds.StencilBufferWriteEnable = true;1188ds.StencilTestEnable = true;1189ds.DoubleSidedStencilEnable = false;11901191ds.StencilTestFunction = COMPAREFUNCTION_ALWAYS;1192ds.StencilPassDepthPassOp = STENCILOP_REPLACE;11931194ds.StencilWriteMask = params->stencil_mask;1195#if GFX_VER >= 91196ds.StencilReferenceValue = params->stencil_ref;1197#endif1198}11991200#if GFX_VER >= 81201uint32_t offset = 0;1202uint32_t *dw = blorp_emit_dwords(batch,1203GENX(3DSTATE_WM_DEPTH_STENCIL_length));1204if (!dw)1205return 0;12061207GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &ds);1208#else1209uint32_t offset;1210void *state = blorp_alloc_dynamic_state(batch,1211GENX(DEPTH_STENCIL_STATE_length) * 4,121264, &offset);1213GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);1214blorp_flush_range(batch, state, GENX(DEPTH_STENCIL_STATE_length) * 4);1215#endif12161217#if GFX_VER == 71218blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {1219sp.PointertoDEPTH_STENCIL_STATE = offset;1220}1221#endif12221223return offset;1224}12251226static void1227blorp_emit_3dstate_multisample(struct blorp_batch *batch,1228const struct blorp_params *params)1229{1230blorp_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) {1231ms.NumberofMultisamples = __builtin_ffs(params->num_samples) - 1;12321233#if GFX_VER >= 81234/* The PRM says that this bit is valid only for DX9:1235*1236* SW can choose to set this bit only for DX9 API. DX10/OGL API's1237* should not have any effect by setting or not setting this bit.1238*/1239ms.PixelPositionOffsetEnable = false;1240#elif GFX_VER >= 712411242switch (params->num_samples) {1243case 1:1244INTEL_SAMPLE_POS_1X(ms.Sample);1245break;1246case 2:1247INTEL_SAMPLE_POS_2X(ms.Sample);1248break;1249case 4:1250INTEL_SAMPLE_POS_4X(ms.Sample);1251break;1252case 8:1253INTEL_SAMPLE_POS_8X(ms.Sample);1254break;1255default:1256break;1257}1258#else1259INTEL_SAMPLE_POS_4X(ms.Sample);1260#endif1261ms.PixelLocation = CENTER;1262}1263}12641265static void1266blorp_emit_pipeline(struct blorp_batch *batch,1267const struct blorp_params *params)1268{1269uint32_t blend_state_offset = 0;1270uint32_t color_calc_state_offset;1271uint32_t depth_stencil_state_offset;12721273enum intel_urb_deref_block_size urb_deref_block_size;1274emit_urb_config(batch, params, &urb_deref_block_size);12751276if (params->wm_prog_data) {1277blend_state_offset = blorp_emit_blend_state(batch, params);1278}1279color_calc_state_offset = blorp_emit_color_calc_state(batch, params);1280depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);12811282#if GFX_VER == 61283/* 3DSTATE_CC_STATE_POINTERS1284*1285* The pointer offsets are relative to1286* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.1287*1288* The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.1289*1290* The dynamic state emit helpers emit their own STATE_POINTERS packets on1291* gfx7+. However, on gfx6 and earlier, they're all lumpped together in1292* one CC_STATE_POINTERS packet so we have to emit that here.1293*/1294blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {1295cc.BLEND_STATEChange = true;1296cc.ColorCalcStatePointerValid = true;1297cc.DEPTH_STENCIL_STATEChange = true;1298cc.PointertoBLEND_STATE = blend_state_offset;1299cc.ColorCalcStatePointer = color_calc_state_offset;1300cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;1301}1302#else1303(void)blend_state_offset;1304(void)color_calc_state_offset;1305(void)depth_stencil_state_offset;1306#endif13071308#if GFX_VER >= 121309blorp_emit(batch, GENX(3DSTATE_CONSTANT_ALL), pc) {1310/* Update empty push constants for all stages (bitmask = 11111b) */1311pc.ShaderUpdateEnable = 0x1f;1312}1313#else1314blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);1315#if GFX_VER >= 71316blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);1317blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);1318#endif1319blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);1320blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);1321#endif13221323if (params->src.enabled)1324blorp_emit_sampler_state(batch);13251326blorp_emit_3dstate_multisample(batch, params);13271328blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {1329mask.SampleMask = (1 << params->num_samples) - 1;1330}13311332/* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,1333* 3DSTATE_VS, Dword 5.0 "VS Function Enable":1334*1335* [DevSNB] A pipeline flush must be programmed prior to a1336* 3DSTATE_VS command that causes the VS Function Enable to1337* toggle. Pipeline flush can be executed by sending a PIPE_CONTROL1338* command with CS stall bit set and a post sync operation.1339*1340* We've already done one at the start of the BLORP operation.1341*/1342blorp_emit_vs_config(batch, params);1343#if GFX_VER >= 71344blorp_emit(batch, GENX(3DSTATE_HS), hs);1345blorp_emit(batch, GENX(3DSTATE_TE), te);1346blorp_emit(batch, GENX(3DSTATE_DS), DS);1347blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);1348#endif1349blorp_emit(batch, GENX(3DSTATE_GS), gs);13501351blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {1352clip.PerspectiveDivideDisable = true;1353}13541355blorp_emit_sf_config(batch, params, urb_deref_block_size);1356blorp_emit_ps_config(batch, params);13571358blorp_emit_cc_viewport(batch);13591360#if GFX_VER >= 121361/* Disable Primitive Replication. */1362blorp_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);1363#endif1364}13651366/******** This is the end of the pipeline setup code ********/13671368#endif /* GFX_VER >= 6 */13691370#if GFX_VER >= 71371static void1372blorp_emit_memcpy(struct blorp_batch *batch,1373struct blorp_address dst,1374struct blorp_address src,1375uint32_t size)1376{1377assert(size % 4 == 0);13781379for (unsigned dw = 0; dw < size; dw += 4) {1380#if GFX_VER >= 81381blorp_emit(batch, GENX(MI_COPY_MEM_MEM), cp) {1382cp.DestinationMemoryAddress = dst;1383cp.SourceMemoryAddress = src;1384}1385#else1386/* IVB does not have a general purpose register for command streamer1387* commands. Therefore, we use an alternate temporary register.1388*/1389#define BLORP_TEMP_REG 0x2440 /* GFX7_3DPRIM_BASE_VERTEX */1390blorp_emit(batch, GENX(MI_LOAD_REGISTER_MEM), load) {1391load.RegisterAddress = BLORP_TEMP_REG;1392load.MemoryAddress = src;1393}1394blorp_emit(batch, GENX(MI_STORE_REGISTER_MEM), store) {1395store.RegisterAddress = BLORP_TEMP_REG;1396store.MemoryAddress = dst;1397}1398#undef BLORP_TEMP_REG1399#endif1400dst.offset += 4;1401src.offset += 4;1402}1403}1404#endif14051406static void1407blorp_emit_surface_state(struct blorp_batch *batch,1408const struct brw_blorp_surface_info *surface,1409UNUSED enum isl_aux_op aux_op,1410void *state, uint32_t state_offset,1411const bool color_write_disables[4],1412bool is_render_target)1413{1414const struct isl_device *isl_dev = batch->blorp->isl_dev;1415struct isl_surf surf = surface->surf;14161417if (surf.dim == ISL_SURF_DIM_1D &&1418surf.dim_layout == ISL_DIM_LAYOUT_GFX4_2D) {1419assert(surf.logical_level0_px.height == 1);1420surf.dim = ISL_SURF_DIM_2D;1421}14221423if (isl_aux_usage_has_hiz(surface->aux_usage)) {1424/* BLORP doesn't render with depth so we can't use HiZ */1425assert(!is_render_target);1426/* We can't reinterpret HiZ */1427assert(surface->surf.format == surface->view.format);1428}14291430enum isl_aux_usage aux_usage = surface->aux_usage;14311432/* On gfx12, implicit CCS has no aux buffer */1433bool use_aux_address = (aux_usage != ISL_AUX_USAGE_NONE) &&1434(surface->aux_addr.buffer != NULL);14351436isl_channel_mask_t write_disable_mask = 0;1437if (is_render_target && GFX_VER <= 5) {1438if (color_write_disables[0])1439write_disable_mask |= ISL_CHANNEL_RED_BIT;1440if (color_write_disables[1])1441write_disable_mask |= ISL_CHANNEL_GREEN_BIT;1442if (color_write_disables[2])1443write_disable_mask |= ISL_CHANNEL_BLUE_BIT;1444if (color_write_disables[3])1445write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;1446}14471448const bool use_clear_address =1449GFX_VER >= 10 && (surface->clear_color_addr.buffer != NULL);14501451isl_surf_fill_state(batch->blorp->isl_dev, state,1452.surf = &surf, .view = &surface->view,1453.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,1454.address =1455blorp_get_surface_address(batch, surface->addr),1456.aux_address = !use_aux_address ? 0 :1457blorp_get_surface_address(batch, surface->aux_addr),1458.clear_address = !use_clear_address ? 0 :1459blorp_get_surface_address(batch,1460surface->clear_color_addr),1461.mocs = surface->addr.mocs,1462.clear_color = surface->clear_color,1463.use_clear_address = use_clear_address,1464.write_disables = write_disable_mask);14651466blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,1467surface->addr, 0);14681469if (use_aux_address) {1470/* On gfx7 and prior, the bottom 12 bits of the MCS base address are1471* used to store other information. This should be ok, however, because1472* surface buffer addresses are always 4K page alinged.1473*/1474assert((surface->aux_addr.offset & 0xfff) == 0);1475uint32_t *aux_addr = state + isl_dev->ss.aux_addr_offset;1476blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset,1477surface->aux_addr, *aux_addr);1478}14791480if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {1481#if GFX_VER >= 101482assert((surface->clear_color_addr.offset & 0x3f) == 0);1483uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;1484blorp_surface_reloc(batch, state_offset +1485isl_dev->ss.clear_color_state_offset,1486surface->clear_color_addr, *clear_addr);1487#elif GFX_VER >= 71488/* Fast clears just whack the AUX surface and don't actually use the1489* clear color for anything. We can avoid the MI memcpy on that case.1490*/1491if (aux_op != ISL_AUX_OP_FAST_CLEAR) {1492struct blorp_address dst_addr = blorp_get_surface_base_address(batch);1493dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;1494blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,1495isl_dev->ss.clear_value_size);1496}1497#else1498unreachable("Fast clears are only supported on gfx7+");1499#endif1500}15011502blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);1503}15041505static void1506blorp_emit_null_surface_state(struct blorp_batch *batch,1507const struct brw_blorp_surface_info *surface,1508uint32_t *state)1509{1510struct GENX(RENDER_SURFACE_STATE) ss = {1511.SurfaceType = SURFTYPE_NULL,1512.SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,1513.Width = surface->surf.logical_level0_px.width - 1,1514.Height = surface->surf.logical_level0_px.height - 1,1515.MIPCountLOD = surface->view.base_level,1516.MinimumArrayElement = surface->view.base_array_layer,1517.Depth = surface->view.array_len - 1,1518.RenderTargetViewExtent = surface->view.array_len - 1,1519#if GFX_VER >= 61520.NumberofMultisamples = ffs(surface->surf.samples) - 1,1521#endif15221523#if GFX_VER >= 71524.SurfaceArray = surface->surf.dim != ISL_SURF_DIM_3D,1525#endif15261527#if GFX_VER >= 81528.TileMode = YMAJOR,1529#else1530.TiledSurface = true,1531#endif1532};15331534GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &ss);15351536blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);1537}15381539static void1540blorp_emit_surface_states(struct blorp_batch *batch,1541const struct blorp_params *params)1542{1543const struct isl_device *isl_dev = batch->blorp->isl_dev;1544uint32_t bind_offset = 0, surface_offsets[2];1545void *surface_maps[2];15461547UNUSED bool has_indirect_clear_color = false;1548if (params->use_pre_baked_binding_table) {1549bind_offset = params->pre_baked_binding_table_offset;1550} else {1551unsigned num_surfaces = 1 + params->src.enabled;1552blorp_alloc_binding_table(batch, num_surfaces,1553isl_dev->ss.size, isl_dev->ss.align,1554&bind_offset, surface_offsets, surface_maps);15551556if (params->dst.enabled) {1557blorp_emit_surface_state(batch, ¶ms->dst,1558params->fast_clear_op,1559surface_maps[BLORP_RENDERBUFFER_BT_INDEX],1560surface_offsets[BLORP_RENDERBUFFER_BT_INDEX],1561params->color_write_disable, true);1562if (params->dst.clear_color_addr.buffer != NULL)1563has_indirect_clear_color = true;1564} else {1565assert(params->depth.enabled || params->stencil.enabled);1566const struct brw_blorp_surface_info *surface =1567params->depth.enabled ? ¶ms->depth : ¶ms->stencil;1568blorp_emit_null_surface_state(batch, surface,1569surface_maps[BLORP_RENDERBUFFER_BT_INDEX]);1570}15711572if (params->src.enabled) {1573blorp_emit_surface_state(batch, ¶ms->src,1574params->fast_clear_op,1575surface_maps[BLORP_TEXTURE_BT_INDEX],1576surface_offsets[BLORP_TEXTURE_BT_INDEX],1577NULL, false);1578if (params->src.clear_color_addr.buffer != NULL)1579has_indirect_clear_color = true;1580}1581}15821583#if GFX_VER >= 71584if (has_indirect_clear_color) {1585/* Updating a surface state object may require that the state cache be1586* invalidated. From the SKL PRM, Shared Functions -> State -> State1587* Caching:1588*1589* Whenever the RENDER_SURFACE_STATE object in memory pointed to by1590* the Binding Table Pointer (BTP) and Binding Table Index (BTI) is1591* modified [...], the L1 state cache must be invalidated to ensure1592* the new surface or sampler state is fetched from system memory.1593*/1594blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {1595pipe.StateCacheInvalidationEnable = true;1596}1597}1598#endif15991600#if GFX_VER >= 71601blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt);1602blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt);1603blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_DS), bt);1604blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_GS), bt);16051606blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {1607bt.PointertoPSBindingTable = bind_offset;1608}1609#elif GFX_VER >= 61610blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {1611bt.PSBindingTableChange = true;1612bt.PointertoPSBindingTable = bind_offset;1613}1614#else1615blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {1616bt.PointertoPSBindingTable = bind_offset;1617}1618#endif1619}16201621static void1622blorp_emit_depth_stencil_config(struct blorp_batch *batch,1623const struct blorp_params *params)1624{1625const struct isl_device *isl_dev = batch->blorp->isl_dev;16261627uint32_t *dw = blorp_emit_dwords(batch, isl_dev->ds.size / 4);1628if (dw == NULL)1629return;16301631struct isl_depth_stencil_hiz_emit_info info = { };16321633if (params->depth.enabled) {1634info.view = ¶ms->depth.view;1635info.mocs = params->depth.addr.mocs;1636} else if (params->stencil.enabled) {1637info.view = ¶ms->stencil.view;1638info.mocs = params->stencil.addr.mocs;1639}16401641if (params->depth.enabled) {1642info.depth_surf = ¶ms->depth.surf;16431644info.depth_address =1645blorp_emit_reloc(batch, dw + isl_dev->ds.depth_offset / 4,1646params->depth.addr, 0);16471648info.hiz_usage = params->depth.aux_usage;1649if (isl_aux_usage_has_hiz(info.hiz_usage)) {1650info.hiz_surf = ¶ms->depth.aux_surf;16511652struct blorp_address hiz_address = params->depth.aux_addr;1653#if GFX_VER == 61654/* Sandy bridge hardware does not technically support mipmapped HiZ.1655* However, we have a special layout that allows us to make it work1656* anyway by manually offsetting to the specified miplevel.1657*/1658assert(info.hiz_surf->dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);1659uint32_t offset_B;1660isl_surf_get_image_offset_B_tile_sa(info.hiz_surf,1661info.view->base_level, 0, 0,1662&offset_B, NULL, NULL);1663hiz_address.offset += offset_B;1664#endif16651666info.hiz_address =1667blorp_emit_reloc(batch, dw + isl_dev->ds.hiz_offset / 4,1668hiz_address, 0);16691670info.depth_clear_value = params->depth.clear_color.f32[0];1671}1672}16731674if (params->stencil.enabled) {1675info.stencil_surf = ¶ms->stencil.surf;16761677info.stencil_aux_usage = params->stencil.aux_usage;1678struct blorp_address stencil_address = params->stencil.addr;1679#if GFX_VER == 61680/* Sandy bridge hardware does not technically support mipmapped stencil.1681* However, we have a special layout that allows us to make it work1682* anyway by manually offsetting to the specified miplevel.1683*/1684assert(info.stencil_surf->dim_layout == ISL_DIM_LAYOUT_GFX6_STENCIL_HIZ);1685uint32_t offset_B;1686isl_surf_get_image_offset_B_tile_sa(info.stencil_surf,1687info.view->base_level, 0, 0,1688&offset_B, NULL, NULL);1689stencil_address.offset += offset_B;1690#endif16911692info.stencil_address =1693blorp_emit_reloc(batch, dw + isl_dev->ds.stencil_offset / 4,1694stencil_address, 0);1695}16961697isl_emit_depth_stencil_hiz_s(isl_dev, dw, &info);16981699#if GFX_VER >= 121700/* Wa_14082245811701*1702* Workaround: Gfx12LP Astep only An additional pipe control with1703* post-sync = store dword operation would be required.( w/a is to1704* have an additional pipe control after the stencil state whenever1705* the surface state bits of this state is changing).1706*/1707blorp_emit(batch, GENX(PIPE_CONTROL), pc) {1708pc.PostSyncOperation = WriteImmediateData;1709pc.Address = blorp_get_workaround_address(batch);1710}1711#endif1712}17131714#if GFX_VER >= 81715/* Emits the Optimized HiZ sequence specified in the BDW+ PRMs. The1716* depth/stencil buffer extents are ignored to handle APIs which perform1717* clearing operations without such information.1718* */1719static void1720blorp_emit_gfx8_hiz_op(struct blorp_batch *batch,1721const struct blorp_params *params)1722{1723/* We should be performing an operation on a depth or stencil buffer.1724*/1725assert(params->depth.enabled || params->stencil.enabled);17261727/* The stencil buffer should only be enabled if a fast clear operation is1728* requested.1729*/1730if (params->stencil.enabled)1731assert(params->hiz_op == ISL_AUX_OP_FAST_CLEAR);17321733/* From the BDW PRM Volume 2, 3DSTATE_WM_HZ_OP:1734*1735* 3DSTATE_MULTISAMPLE packet must be used prior to this packet to change1736* the Number of Multisamples. This packet must not be used to change1737* Number of Multisamples in a rendering sequence.1738*1739* Since HIZ may be the first thing in a batch buffer, play safe and always1740* emit 3DSTATE_MULTISAMPLE.1741*/1742blorp_emit_3dstate_multisample(batch, params);17431744/* From the BDW PRM Volume 7, Depth Buffer Clear:1745*1746* The clear value must be between the min and max depth values1747* (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is1748* D32_FLOAT, then +/-DENORM values are also allowed.1749*1750* Set the bounds to match our hardware limits, [0.0, 1.0].1751*/1752if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) {1753assert(params->depth.clear_color.f32[0] >= 0.0f);1754assert(params->depth.clear_color.f32[0] <= 1.0f);1755blorp_emit_cc_viewport(batch);1756}17571758/* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the1759* 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch1760* even when WM_HZ_OP is active. However, WM thread dispatch is normally1761* disabled for HiZ ops and it appears that force-enabling it can lead to1762* GPU hangs on at least Skylake. Since we don't know the current state of1763* the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.1764*/1765blorp_emit(batch, GENX(3DSTATE_WM), wm);17661767/* If we can't alter the depth stencil config and multiple layers are1768* involved, the HiZ op will fail. This is because the op requires that a1769* new config is emitted for each additional layer.1770*/1771if (batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL) {1772assert(params->num_layers <= 1);1773} else {1774blorp_emit_depth_stencil_config(batch, params);1775}17761777blorp_measure_start(batch, params);17781779blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp) {1780switch (params->hiz_op) {1781case ISL_AUX_OP_FAST_CLEAR:1782hzp.StencilBufferClearEnable = params->stencil.enabled;1783hzp.DepthBufferClearEnable = params->depth.enabled;1784hzp.StencilClearValue = params->stencil_ref;1785hzp.FullSurfaceDepthandStencilClear = params->full_surface_hiz_op;1786break;1787case ISL_AUX_OP_FULL_RESOLVE:1788assert(params->full_surface_hiz_op);1789hzp.DepthBufferResolveEnable = true;1790break;1791case ISL_AUX_OP_AMBIGUATE:1792assert(params->full_surface_hiz_op);1793hzp.HierarchicalDepthBufferResolveEnable = true;1794break;1795case ISL_AUX_OP_PARTIAL_RESOLVE:1796case ISL_AUX_OP_NONE:1797unreachable("Invalid HIZ op");1798}17991800hzp.NumberofMultisamples = ffs(params->num_samples) - 1;1801hzp.SampleMask = 0xFFFF;18021803/* Due to a hardware issue, this bit MBZ */1804assert(hzp.ScissorRectangleEnable == false);18051806/* Contrary to the HW docs both fields are inclusive */1807hzp.ClearRectangleXMin = params->x0;1808hzp.ClearRectangleYMin = params->y0;18091810/* Contrary to the HW docs both fields are exclusive */1811hzp.ClearRectangleXMax = params->x1;1812hzp.ClearRectangleYMax = params->y1;1813}18141815/* PIPE_CONTROL w/ all bits clear except for “Post-Sync Operation” must set1816* to “Write Immediate Data” enabled.1817*/1818blorp_emit(batch, GENX(PIPE_CONTROL), pc) {1819pc.PostSyncOperation = WriteImmediateData;1820pc.Address = blorp_get_workaround_address(batch);1821}18221823blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp);1824}1825#endif18261827static void1828blorp_update_clear_color(UNUSED struct blorp_batch *batch,1829const struct brw_blorp_surface_info *info,1830enum isl_aux_op op)1831{1832if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) {1833#if GFX_VER == 111834blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {1835pipe.CommandStreamerStallEnable = true;1836}18371838/* 2 QWORDS */1839const unsigned inlinedata_dw = 2 * 2;1840const unsigned num_dwords = GENX(MI_ATOMIC_length) + inlinedata_dw;18411842struct blorp_address clear_addr = info->clear_color_addr;1843uint32_t *dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,1844.DataSize = MI_ATOMIC_QWORD,1845.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,1846.InlineData = true,1847.MemoryAddress = clear_addr);1848/* dw starts at dword 1, but we need to fill dwords 3 and 5 */1849dw[2] = info->clear_color.u32[0];1850dw[3] = 0;1851dw[4] = info->clear_color.u32[1];1852dw[5] = 0;18531854clear_addr.offset += 8;1855dw = blorp_emitn(batch, GENX(MI_ATOMIC), num_dwords,1856.DataSize = MI_ATOMIC_QWORD,1857.ATOMICOPCODE = MI_ATOMIC_OP_MOVE8B,1858.CSSTALL = true,1859.ReturnDataControl = true,1860.InlineData = true,1861.MemoryAddress = clear_addr);1862/* dw starts at dword 1, but we need to fill dwords 3 and 5 */1863dw[2] = info->clear_color.u32[2];1864dw[3] = 0;1865dw[4] = info->clear_color.u32[3];1866dw[5] = 0;18671868blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {1869pipe.StateCacheInvalidationEnable = true;1870pipe.TextureCacheInvalidationEnable = true;1871}1872#elif GFX_VER >= 918731874/* According to Wa_2201730850, in the Clear Color Programming Note1875* under the Red channel, "Software shall write the converted Depth1876* Clear to this dword." The only depth formats listed under the red1877* channel are IEEE_FP and UNORM24_X8. These two requirements are1878* incompatible with the UNORM16 depth format, so just ignore that case1879* and simply perform the conversion for all depth formats.1880*/1881union isl_color_value fixed_color = info->clear_color;1882if (GFX_VER == 12 && isl_surf_usage_is_depth(info->surf.usage)) {1883isl_color_value_pack(&info->clear_color, info->surf.format,1884fixed_color.u32);1885}18861887for (int i = 0; i < 4; i++) {1888blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {1889sdi.Address = info->clear_color_addr;1890sdi.Address.offset += i * 4;1891sdi.ImmediateData = fixed_color.u32[i];1892#if GFX_VER >= 121893if (i == 3)1894sdi.ForceWriteCompletionCheck = true;1895#endif1896}1897}18981899/* The RENDER_SURFACE_STATE::ClearColor field states that software should1900* write the converted depth value 16B after the clear address:1901*1902* 3D Sampler will always fetch clear depth from the location 16-bytes1903* above this address, where the clear depth, converted to native1904* surface format by software, will be stored.1905*1906*/1907#if GFX_VER >= 121908if (isl_surf_usage_is_depth(info->surf.usage)) {1909blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {1910sdi.Address = info->clear_color_addr;1911sdi.Address.offset += 4 * 4;1912sdi.ImmediateData = fixed_color.u32[0];1913sdi.ForceWriteCompletionCheck = true;1914}1915}1916#endif19171918#elif GFX_VER >= 71919blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {1920sdi.Address = info->clear_color_addr;1921sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |1922ISL_CHANNEL_SELECT_GREEN << 22 |1923ISL_CHANNEL_SELECT_BLUE << 19 |1924ISL_CHANNEL_SELECT_ALPHA << 16;1925if (isl_format_has_int_channel(info->view.format)) {1926for (unsigned i = 0; i < 4; i++) {1927assert(info->clear_color.u32[i] == 0 ||1928info->clear_color.u32[i] == 1);1929}1930sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31;1931sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30;1932sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29;1933sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28;1934} else {1935for (unsigned i = 0; i < 4; i++) {1936assert(info->clear_color.f32[i] == 0.0f ||1937info->clear_color.f32[i] == 1.0f);1938}1939sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31;1940sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30;1941sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29;1942sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28;1943}1944}1945#endif1946}1947}19481949/**1950* \brief Execute a blit or render pass operation.1951*1952* To execute the operation, this function manually constructs and emits a1953* batch to draw a rectangle primitive. The batchbuffer is flushed before1954* constructing and after emitting the batch.1955*1956* This function alters no GL state.1957*/1958static void1959blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)1960{1961if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) {1962blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op);1963blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op);1964}19651966#if GFX_VER >= 81967if (params->hiz_op != ISL_AUX_OP_NONE) {1968blorp_emit_gfx8_hiz_op(batch, params);1969return;1970}1971#endif19721973blorp_emit_vertex_buffers(batch, params);1974blorp_emit_vertex_elements(batch, params);19751976blorp_emit_pipeline(batch, params);19771978blorp_emit_surface_states(batch, params);19791980if (!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))1981blorp_emit_depth_stencil_config(batch, params);19821983blorp_measure_start(batch, params);19841985blorp_emit(batch, GENX(3DPRIMITIVE), prim) {1986prim.VertexAccessType = SEQUENTIAL;1987prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;1988#if GFX_VER >= 71989prim.PredicateEnable = batch->flags & BLORP_BATCH_PREDICATE_ENABLE;1990#endif1991prim.VertexCountPerInstance = 3;1992prim.InstanceCount = params->num_layers;1993}1994}19951996#endif /* BLORP_GENX_EXEC_H */199719981999