Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_state.c
4570 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file crocus_state.c24*25* ============================= GENXML CODE =============================26* [This file is compiled once per generation.]27* =======================================================================28*29* This is the main state upload code.30*31* Gallium uses Constant State Objects, or CSOs, for most state. Large,32* complex, or highly reusable state can be created once, and bound and33* rebound multiple times. This is modeled with the pipe->create_*_state()34* and pipe->bind_*_state() hooks. Highly dynamic or inexpensive state is35* streamed out on the fly, via pipe->set_*_state() hooks.36*37* OpenGL involves frequently mutating context state, which is mirrored in38* core Mesa by highly mutable data structures. However, most applications39* typically draw the same things over and over - from frame to frame, most40* of the same objects are still visible and need to be redrawn. So, rather41* than inventing new state all the time, applications usually mutate to swap42* between known states that we've seen before.43*44* Gallium isolates us from this mutation by tracking API state, and45* distilling it into a set of Constant State Objects, or CSOs. Large,46* complex, or typically reusable state can be created once, then reused47* multiple times. Drivers can create and store their own associated data.48* This create/bind model corresponds to the pipe->create_*_state() and49* pipe->bind_*_state() driver hooks.50*51* Some state is cheap to create, or expected to be highly dynamic. Rather52* than creating and caching piles of CSOs for these, Gallium simply streams53* them out, via the pipe->set_*_state() driver hooks.54*55* To reduce draw time overhead, we try to compute as much state at create56* time as possible. Wherever possible, we translate the Gallium pipe state57* to 3DSTATE commands, and store those commands in the CSO. At draw time,58* we can simply memcpy them into a batch buffer.59*60* No hardware matches the abstraction perfectly, so some commands require61* information from multiple CSOs. In this case, we can store two copies62* of the packet (one in each CSO), and simply | together their DWords at63* draw time. Sometimes the second set is trivial (one or two fields), so64* we simply pack it at draw time.65*66* There are two main components in the file below. First, the CSO hooks67* create/bind/track state. The second are the draw-time upload functions,68* crocus_upload_render_state() and crocus_upload_compute_state(), which read69* the context state and emit the commands into the actual batch.70*/7172#include <errno.h>73#include <stdio.h>7475#if HAVE_VALGRIND76#include <memcheck.h>77#include <valgrind.h>78#define VG(x) x79#ifdef DEBUG80#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))81#endif82#else83#define VG(x)84#endif8586#include "drm-uapi/i915_drm.h"87#include "intel/common/intel_l3_config.h"88#include "intel/common/intel_sample_positions.h"89#include "intel/compiler/brw_compiler.h"90#include "pipe/p_context.h"91#include "pipe/p_defines.h"92#include "pipe/p_screen.h"93#include "pipe/p_state.h"94#include "util/format/u_format.h"95#include "util/half_float.h"96#include "util/u_dual_blend.h"97#include "util/u_framebuffer.h"98#include "util/u_helpers.h"99#include "util/u_inlines.h"100#include "util/u_memory.h"101#include "util/u_prim.h"102#include "util/u_transfer.h"103#include "util/u_upload_mgr.h"104#include "util/u_viewport.h"105#include "crocus_batch.h"106#include "crocus_context.h"107#include "crocus_defines.h"108#include "crocus_pipe.h"109#include "crocus_resource.h"110111#include "crocus_genx_macros.h"112#include "intel/common/intel_guardband.h"113114/**115* Statically assert that PIPE_* enums match the hardware packets.116* (As long as they match, we don't need to translate them.)117*/118UNUSED static void pipe_asserts()119{120#define PIPE_ASSERT(x) STATIC_ASSERT((int)x)121122/* pipe_logicop happens to match the hardware. */123PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);124PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);125PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);126PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);127PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);128PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);129PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);130PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);131PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);132PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);133PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);134PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);135PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);136PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);137PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);138PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);139140/* pipe_blend_func happens to match the hardware. */141PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);142PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);143PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);144PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);145PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);146PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);147PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);148PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);149PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);150PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);151PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);152PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);153PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);154PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);155PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);156PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);157PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);158PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);159PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);160161/* pipe_blend_func happens to match the hardware. */162PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);163PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);164PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);165PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);166PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);167168/* pipe_stencil_op happens to match the hardware. */169PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);170PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);171PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);172PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);173PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);174PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);175PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);176PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);177178#if GFX_VER >= 6179/* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */180PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);181PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);182#endif183#undef PIPE_ASSERT184}185186static unsigned187translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)188{189static const unsigned map[] = {190[PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,191[PIPE_PRIM_LINES] = _3DPRIM_LINELIST,192[PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,193[PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,194[PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,195[PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,196[PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,197[PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,198[PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,199[PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,200#if GFX_VER >= 6201[PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,202[PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,203[PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,204[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,205#endif206#if GFX_VER >= 7207[PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,208#endif209};210211return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);212}213214static unsigned215translate_compare_func(enum pipe_compare_func pipe_func)216{217static const unsigned map[] = {218[PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,219[PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,220[PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,221[PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,222[PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,223[PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,224[PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,225[PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,226};227return map[pipe_func];228}229230static unsigned231translate_shadow_func(enum pipe_compare_func pipe_func)232{233/* Gallium specifies the result of shadow comparisons as:234*235* 1 if ref <op> texel,236* 0 otherwise.237*238* The hardware does:239*240* 0 if texel <op> ref,241* 1 otherwise.242*243* So we need to flip the operator and also negate.244*/245static const unsigned map[] = {246[PIPE_FUNC_NEVER] = PREFILTEROP_ALWAYS,247[PIPE_FUNC_LESS] = PREFILTEROP_LEQUAL,248[PIPE_FUNC_EQUAL] = PREFILTEROP_NOTEQUAL,249[PIPE_FUNC_LEQUAL] = PREFILTEROP_LESS,250[PIPE_FUNC_GREATER] = PREFILTEROP_GEQUAL,251[PIPE_FUNC_NOTEQUAL] = PREFILTEROP_EQUAL,252[PIPE_FUNC_GEQUAL] = PREFILTEROP_GREATER,253[PIPE_FUNC_ALWAYS] = PREFILTEROP_NEVER,254};255return map[pipe_func];256}257258static unsigned259translate_cull_mode(unsigned pipe_face)260{261static const unsigned map[4] = {262[PIPE_FACE_NONE] = CULLMODE_NONE,263[PIPE_FACE_FRONT] = CULLMODE_FRONT,264[PIPE_FACE_BACK] = CULLMODE_BACK,265[PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,266};267return map[pipe_face];268}269270#if GFX_VER >= 6271static unsigned272translate_fill_mode(unsigned pipe_polymode)273{274static const unsigned map[4] = {275[PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,276[PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,277[PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,278[PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,279};280return map[pipe_polymode];281}282#endif283284static unsigned285translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)286{287static const unsigned map[] = {288[PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,289[PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,290[PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,291};292return map[pipe_mip];293}294295static uint32_t296translate_wrap(unsigned pipe_wrap, bool either_nearest)297{298static const unsigned map[] = {299[PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,300#if GFX_VER == 8301[PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,302#else303[PIPE_TEX_WRAP_CLAMP] = TCM_CLAMP_BORDER,304#endif305[PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,306[PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,307[PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,308[PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,309310/* These are unsupported. */311[PIPE_TEX_WRAP_MIRROR_CLAMP] = -1,312[PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,313};314#if GFX_VER < 8315if (pipe_wrap == PIPE_TEX_WRAP_CLAMP && either_nearest)316return TCM_CLAMP;317#endif318return map[pipe_wrap];319}320321/**322* Equiv if brw_state_batch323*/324static uint32_t *325stream_state(struct crocus_batch *batch,326unsigned size,327unsigned alignment,328uint32_t *out_offset)329{330uint32_t offset = ALIGN(batch->state.used, alignment);331332if (offset + size >= STATE_SZ && !batch->no_wrap) {333crocus_batch_flush(batch);334offset = ALIGN(batch->state.used, alignment);335} else if (offset + size >= batch->state.bo->size) {336const unsigned new_size =337MIN2(batch->state.bo->size + batch->state.bo->size / 2,338MAX_STATE_SIZE);339crocus_grow_buffer(batch, true, batch->state.used, new_size);340assert(offset + size < batch->state.bo->size);341}342343crocus_record_state_size(batch->state_sizes, offset, size);344345batch->state.used = offset + size;346*out_offset = offset;347348return (uint32_t *)batch->state.map + (offset >> 2);349}350351/**352* stream_state() + memcpy.353*/354static uint32_t355emit_state(struct crocus_batch *batch, const void *data, unsigned size,356unsigned alignment)357{358unsigned offset = 0;359uint32_t *map = stream_state(batch, size, alignment, &offset);360361if (map)362memcpy(map, data, size);363364return offset;365}366367#if GFX_VER <= 5368static void369upload_pipelined_state_pointers(struct crocus_batch *batch,370bool gs_active, uint32_t gs_offset,371uint32_t vs_offset, uint32_t sf_offset,372uint32_t clip_offset, uint32_t wm_offset, uint32_t cc_offset)373{374#if GFX_VER == 5375/* Need to flush before changing clip max threads for errata. */376crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);377#endif378379crocus_emit_cmd(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {380pp.PointertoVSState = ro_bo(batch->state.bo, vs_offset);381pp.GSEnable = gs_active;382if (gs_active)383pp.PointertoGSState = ro_bo(batch->state.bo, gs_offset);384pp.ClipEnable = true;385pp.PointertoCLIPState = ro_bo(batch->state.bo, clip_offset);386pp.PointertoSFState = ro_bo(batch->state.bo, sf_offset);387pp.PointertoWMState = ro_bo(batch->state.bo, wm_offset);388pp.PointertoColorCalcState = ro_bo(batch->state.bo, cc_offset);389}390}391392#endif393/**394* Did field 'x' change between 'old_cso' and 'new_cso'?395*396* (If so, we may want to set some dirty flags.)397*/398#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))399#define cso_changed_memcmp(x) \400(!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)401402static void403flush_before_state_base_change(struct crocus_batch *batch)404{405#if GFX_VER >= 6406/* Flush before emitting STATE_BASE_ADDRESS.407*408* This isn't documented anywhere in the PRM. However, it seems to be409* necessary prior to changing the surface state base adress. We've410* seen issues in Vulkan where we get GPU hangs when using multi-level411* command buffers which clear depth, reset state base address, and then412* go render stuff.413*414* Normally, in GL, we would trust the kernel to do sufficient stalls415* and flushes prior to executing our batch. However, it doesn't seem416* as if the kernel's flushing is always sufficient and we don't want to417* rely on it.418*419* We make this an end-of-pipe sync instead of a normal flush because we420* do not know the current status of the GPU. On Haswell at least,421* having a fast-clear operation in flight at the same time as a normal422* rendering operation can cause hangs. Since the kernel's flushing is423* insufficient, we need to ensure that any rendering operations from424* other processes are definitely complete before we try to do our own425* rendering. It's a bit of a big hammer but it appears to work.426*/427const unsigned dc_flush =428batch->screen->devinfo.ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;429crocus_emit_end_of_pipe_sync(batch,430"change STATE_BASE_ADDRESS (flushes)",431PIPE_CONTROL_RENDER_TARGET_FLUSH |432dc_flush |433PIPE_CONTROL_DEPTH_CACHE_FLUSH);434#endif435}436437static void438flush_after_state_base_change(struct crocus_batch *batch)439{440/* After re-setting the surface state base address, we have to do some441* cache flusing so that the sampler engine will pick up the new442* SURFACE_STATE objects and binding tables. From the Broadwell PRM,443* Shared Function > 3D Sampler > State > State Caching (page 96):444*445* Coherency with system memory in the state cache, like the texture446* cache is handled partially by software. It is expected that the447* command stream or shader will issue Cache Flush operation or448* Cache_Flush sampler message to ensure that the L1 cache remains449* coherent with system memory.450*451* [...]452*453* Whenever the value of the Dynamic_State_Base_Addr,454* Surface_State_Base_Addr are altered, the L1 state cache must be455* invalidated to ensure the new surface or sampler state is fetched456* from system memory.457*458* The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit459* which, according the PIPE_CONTROL instruction documentation in the460* Broadwell PRM:461*462* Setting this bit is independent of any other bit in this packet.463* This bit controls the invalidation of the L1 and L2 state caches464* at the top of the pipe i.e. at the parsing time.465*466* Unfortunately, experimentation seems to indicate that state cache467* invalidation through a PIPE_CONTROL does nothing whatsoever in468* regards to surface state and binding tables. In stead, it seems that469* invalidating the texture cache is what is actually needed.470*471* XXX: As far as we have been able to determine through472* experimentation, shows that flush the texture cache appears to be473* sufficient. The theory here is that all of the sampling/rendering474* units cache the binding table in the texture cache. However, we have475* yet to be able to actually confirm this.476*/477#if GFX_VER >= 6478crocus_emit_end_of_pipe_sync(batch,479"change STATE_BASE_ADDRESS (invalidates)",480PIPE_CONTROL_INSTRUCTION_INVALIDATE |481PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |482PIPE_CONTROL_CONST_CACHE_INVALIDATE |483PIPE_CONTROL_STATE_CACHE_INVALIDATE);484#endif485}486487#if GFX_VER >= 6488static void489crocus_store_register_mem32(struct crocus_batch *batch, uint32_t reg,490struct crocus_bo *bo, uint32_t offset,491bool predicated)492{493crocus_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) {494srm.RegisterAddress = reg;495srm.MemoryAddress = ggtt_bo(bo, offset);496#if GFX_VERx10 >= 75497srm.PredicateEnable = predicated;498#else499if (predicated)500unreachable("unsupported predication");501#endif502}503}504505static void506crocus_store_register_mem64(struct crocus_batch *batch, uint32_t reg,507struct crocus_bo *bo, uint32_t offset,508bool predicated)509{510crocus_store_register_mem32(batch, reg + 0, bo, offset + 0, predicated);511crocus_store_register_mem32(batch, reg + 4, bo, offset + 4, predicated);512}513#endif514515#if GFX_VER >= 7516static void517_crocus_emit_lri(struct crocus_batch *batch, uint32_t reg, uint32_t val)518{519crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {520lri.RegisterOffset = reg;521lri.DataDWord = val;522}523}524#define crocus_emit_lri(b, r, v) _crocus_emit_lri(b, GENX(r##_num), v)525526#if GFX_VERx10 >= 75527static void528_crocus_emit_lrr(struct crocus_batch *batch, uint32_t dst, uint32_t src)529{530crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_REG), lrr) {531lrr.SourceRegisterAddress = src;532lrr.DestinationRegisterAddress = dst;533}534}535536static void537crocus_load_register_reg32(struct crocus_batch *batch, uint32_t dst,538uint32_t src)539{540_crocus_emit_lrr(batch, dst, src);541}542543static void544crocus_load_register_reg64(struct crocus_batch *batch, uint32_t dst,545uint32_t src)546{547_crocus_emit_lrr(batch, dst, src);548_crocus_emit_lrr(batch, dst + 4, src + 4);549}550#endif551552static void553crocus_load_register_imm32(struct crocus_batch *batch, uint32_t reg,554uint32_t val)555{556_crocus_emit_lri(batch, reg, val);557}558559static void560crocus_load_register_imm64(struct crocus_batch *batch, uint32_t reg,561uint64_t val)562{563_crocus_emit_lri(batch, reg + 0, val & 0xffffffff);564_crocus_emit_lri(batch, reg + 4, val >> 32);565}566567/**568* Emit MI_LOAD_REGISTER_MEM to load a 32-bit MMIO register from a buffer.569*/570static void571crocus_load_register_mem32(struct crocus_batch *batch, uint32_t reg,572struct crocus_bo *bo, uint32_t offset)573{574crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {575lrm.RegisterAddress = reg;576lrm.MemoryAddress = ro_bo(bo, offset);577}578}579580/**581* Load a 64-bit value from a buffer into a MMIO register via582* two MI_LOAD_REGISTER_MEM commands.583*/584static void585crocus_load_register_mem64(struct crocus_batch *batch, uint32_t reg,586struct crocus_bo *bo, uint32_t offset)587{588crocus_load_register_mem32(batch, reg + 0, bo, offset + 0);589crocus_load_register_mem32(batch, reg + 4, bo, offset + 4);590}591592#if GFX_VERx10 >= 75593static void594crocus_store_data_imm32(struct crocus_batch *batch,595struct crocus_bo *bo, uint32_t offset,596uint32_t imm)597{598crocus_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) {599sdi.Address = rw_bo(bo, offset);600#if GFX_VER >= 6601sdi.ImmediateData = imm;602#endif603}604}605606static void607crocus_store_data_imm64(struct crocus_batch *batch,608struct crocus_bo *bo, uint32_t offset,609uint64_t imm)610{611/* Can't use crocus_emit_cmd because MI_STORE_DATA_IMM has a length of612* 2 in genxml but it's actually variable length and we need 5 DWords.613*/614void *map = crocus_get_command_space(batch, 4 * 5);615_crocus_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) {616sdi.DWordLength = 5 - 2;617sdi.Address = rw_bo(bo, offset);618#if GFX_VER >= 6619sdi.ImmediateData = imm;620#endif621}622}623#endif624625static void626crocus_copy_mem_mem(struct crocus_batch *batch,627struct crocus_bo *dst_bo, uint32_t dst_offset,628struct crocus_bo *src_bo, uint32_t src_offset,629unsigned bytes)630{631assert(bytes % 4 == 0);632assert(dst_offset % 4 == 0);633assert(src_offset % 4 == 0);634635#define CROCUS_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */636for (unsigned i = 0; i < bytes; i += 4) {637crocus_load_register_mem32(batch, CROCUS_TEMP_REG,638src_bo, src_offset + i);639crocus_store_register_mem32(batch, CROCUS_TEMP_REG,640dst_bo, dst_offset + i, false);641}642}643#endif644645/**646* Gallium CSO for rasterizer state.647*/648struct crocus_rasterizer_state {649struct pipe_rasterizer_state cso;650#if GFX_VER >= 6651uint32_t sf[GENX(3DSTATE_SF_length)];652uint32_t clip[GENX(3DSTATE_CLIP_length)];653#endif654#if GFX_VER >= 8655uint32_t raster[GENX(3DSTATE_RASTER_length)];656#endif657uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];658659uint8_t num_clip_plane_consts;660bool fill_mode_point_or_line;661};662663#if GFX_VER <= 5664#define URB_VS 0665#define URB_GS 1666#define URB_CLP 2667#define URB_SF 3668#define URB_CS 4669670static const struct {671uint32_t min_nr_entries;672uint32_t preferred_nr_entries;673uint32_t min_entry_size;674uint32_t max_entry_size;675} limits[URB_CS+1] = {676{ 16, 32, 1, 5 }, /* vs */677{ 4, 8, 1, 5 }, /* gs */678{ 5, 10, 1, 5 }, /* clp */679{ 1, 8, 1, 12 }, /* sf */680{ 1, 4, 1, 32 } /* cs */681};682683static bool check_urb_layout(struct crocus_context *ice)684{685ice->urb.vs_start = 0;686ice->urb.gs_start = ice->urb.nr_vs_entries * ice->urb.vsize;687ice->urb.clip_start = ice->urb.gs_start + ice->urb.nr_gs_entries * ice->urb.vsize;688ice->urb.sf_start = ice->urb.clip_start + ice->urb.nr_clip_entries * ice->urb.vsize;689ice->urb.cs_start = ice->urb.sf_start + ice->urb.nr_sf_entries * ice->urb.sfsize;690691return ice->urb.cs_start + ice->urb.nr_cs_entries *692ice->urb.csize <= ice->urb.size;693}694695696static bool697crocus_calculate_urb_fence(struct crocus_batch *batch, unsigned csize,698unsigned vsize, unsigned sfsize)699{700const struct intel_device_info *devinfo = &batch->screen->devinfo;701struct crocus_context *ice = batch->ice;702if (csize < limits[URB_CS].min_entry_size)703csize = limits[URB_CS].min_entry_size;704705if (vsize < limits[URB_VS].min_entry_size)706vsize = limits[URB_VS].min_entry_size;707708if (sfsize < limits[URB_SF].min_entry_size)709sfsize = limits[URB_SF].min_entry_size;710711if (ice->urb.vsize < vsize ||712ice->urb.sfsize < sfsize ||713ice->urb.csize < csize ||714(ice->urb.constrained && (ice->urb.vsize > vsize ||715ice->urb.sfsize > sfsize ||716ice->urb.csize > csize))) {717718719ice->urb.csize = csize;720ice->urb.sfsize = sfsize;721ice->urb.vsize = vsize;722723ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;724ice->urb.nr_gs_entries = limits[URB_GS].preferred_nr_entries;725ice->urb.nr_clip_entries = limits[URB_CLP].preferred_nr_entries;726ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;727ice->urb.nr_cs_entries = limits[URB_CS].preferred_nr_entries;728729ice->urb.constrained = 0;730731if (devinfo->ver == 5) {732ice->urb.nr_vs_entries = 128;733ice->urb.nr_sf_entries = 48;734if (check_urb_layout(ice)) {735goto done;736} else {737ice->urb.constrained = 1;738ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;739ice->urb.nr_sf_entries = limits[URB_SF].preferred_nr_entries;740}741} else if (devinfo->is_g4x) {742ice->urb.nr_vs_entries = 64;743if (check_urb_layout(ice)) {744goto done;745} else {746ice->urb.constrained = 1;747ice->urb.nr_vs_entries = limits[URB_VS].preferred_nr_entries;748}749}750751if (!check_urb_layout(ice)) {752ice->urb.nr_vs_entries = limits[URB_VS].min_nr_entries;753ice->urb.nr_gs_entries = limits[URB_GS].min_nr_entries;754ice->urb.nr_clip_entries = limits[URB_CLP].min_nr_entries;755ice->urb.nr_sf_entries = limits[URB_SF].min_nr_entries;756ice->urb.nr_cs_entries = limits[URB_CS].min_nr_entries;757758/* Mark us as operating with constrained nr_entries, so that next759* time we recalculate we'll resize the fences in the hope of760* escaping constrained mode and getting back to normal performance.761*/762ice->urb.constrained = 1;763764if (!check_urb_layout(ice)) {765/* This is impossible, given the maximal sizes of urb766* entries and the values for minimum nr of entries767* provided above.768*/769fprintf(stderr, "couldn't calculate URB layout!\n");770exit(1);771}772773if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))774fprintf(stderr, "URB CONSTRAINED\n");775}776777done:778if (unlikely(INTEL_DEBUG & DEBUG_URB))779fprintf(stderr,780"URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",781ice->urb.vs_start,782ice->urb.gs_start,783ice->urb.clip_start,784ice->urb.sf_start,785ice->urb.cs_start,786ice->urb.size);787return true;788}789return false;790}791792static void793crocus_upload_urb_fence(struct crocus_batch *batch)794{795uint32_t urb_fence[3];796_crocus_pack_command(batch, GENX(URB_FENCE), urb_fence, urb) {797urb.VSUnitURBReallocationRequest = 1;798urb.GSUnitURBReallocationRequest = 1;799urb.CLIPUnitURBReallocationRequest = 1;800urb.SFUnitURBReallocationRequest = 1;801urb.VFEUnitURBReallocationRequest = 1;802urb.CSUnitURBReallocationRequest = 1;803804urb.VSFence = batch->ice->urb.gs_start;805urb.GSFence = batch->ice->urb.clip_start;806urb.CLIPFence = batch->ice->urb.sf_start;807urb.SFFence = batch->ice->urb.cs_start;808urb.CSFence = batch->ice->urb.size;809}810811/* erratum: URB_FENCE must not cross a 64byte cacheline */812if ((crocus_batch_bytes_used(batch) & 15) > 12) {813int pad = 16 - (crocus_batch_bytes_used(batch) & 15);814do {815*(uint32_t *)batch->command.map_next = 0;816batch->command.map_next += sizeof(uint32_t);817} while (--pad);818}819820crocus_batch_emit(batch, urb_fence, sizeof(uint32_t) * 3);821}822823static bool824calculate_curbe_offsets(struct crocus_batch *batch)825{826struct crocus_context *ice = batch->ice;827828unsigned nr_fp_regs, nr_vp_regs, nr_clip_regs = 0;829unsigned total_regs;830831nr_fp_regs = 0;832for (int i = 0; i < 4; i++) {833const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data->ubo_ranges[i];834if (range->length == 0)835continue;836837/* ubo range tracks at 256-bit, we need 512-bit */838nr_fp_regs += (range->length + 1) / 2;839}840841if (ice->state.cso_rast->cso.clip_plane_enable) {842unsigned nr_planes = 6 + util_bitcount(ice->state.cso_rast->cso.clip_plane_enable);843nr_clip_regs = (nr_planes * 4 + 15) / 16;844}845846nr_vp_regs = 0;847for (int i = 0; i < 4; i++) {848const struct brw_ubo_range *range = &ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data->ubo_ranges[i];849if (range->length == 0)850continue;851852/* ubo range tracks at 256-bit, we need 512-bit */853nr_vp_regs += (range->length + 1) / 2;854}855if (nr_vp_regs == 0) {856/* The pre-gen6 VS requires that some push constants get loaded no857* matter what, or the GPU would hang.858*/859nr_vp_regs = 1;860}861total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;862863/* The CURBE allocation size is limited to 32 512-bit units (128 EU864* registers, or 1024 floats). See CS_URB_STATE in the gen4 or gen5865* (volume 1, part 1) PRMs.866*867* Note that in brw_fs.cpp we're only loading up to 16 EU registers of868* values as push constants before spilling to pull constants, and in869* brw_vec4.cpp we're loading up to 32 registers of push constants. An EU870* register is 1/2 of one of these URB entry units, so that leaves us 16 EU871* regs for clip.872*/873assert(total_regs <= 32);874875/* Lazy resize:876*/877if (nr_fp_regs > ice->curbe.wm_size ||878nr_vp_regs > ice->curbe.vs_size ||879nr_clip_regs != ice->curbe.clip_size ||880(total_regs < ice->curbe.total_size / 4 &&881ice->curbe.total_size > 16)) {882883GLuint reg = 0;884885/* Calculate a new layout:886*/887reg = 0;888ice->curbe.wm_start = reg;889ice->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;890ice->curbe.clip_start = reg;891ice->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;892ice->curbe.vs_start = reg;893ice->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;894ice->curbe.total_size = reg;895896if (0)897fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",898ice->curbe.wm_start,899ice->curbe.wm_size,900ice->curbe.clip_start,901ice->curbe.clip_size,902ice->curbe.vs_start,903ice->curbe.vs_size );904return true;905}906return false;907}908909static void910upload_shader_consts(struct crocus_context *ice,911gl_shader_stage stage,912uint32_t *map,913unsigned start)914{915struct crocus_compiled_shader *shader = ice->shaders.prog[stage];916struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;917uint32_t *cmap;918bool found = false;919unsigned offset = start * 16;920int total = 0;921for (int i = 0; i < 4; i++) {922const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];923924if (range->length == 0)925continue;926927unsigned block_index = crocus_bti_to_group_index(928&shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);929unsigned len = range->length * 8 * sizeof(float);930unsigned start = range->start * 8 * sizeof(float);931struct pipe_transfer *transfer;932933cmap = pipe_buffer_map_range(&ice->ctx, ice->state.shaders[stage].constbufs[block_index].buffer,934ice->state.shaders[stage].constbufs[block_index].buffer_offset + start, len,935PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED, &transfer);936if (cmap)937memcpy(&map[offset + (total * 8)], cmap, len);938pipe_buffer_unmap(&ice->ctx, transfer);939total += range->length;940found = true;941}942943if (stage == MESA_SHADER_VERTEX && !found) {944/* The pre-gen6 VS requires that some push constants get loaded no945* matter what, or the GPU would hang.946*/947unsigned len = 16;948memset(&map[offset], 0, len);949}950}951952static const float fixed_plane[6][4] = {953{ 0, 0, -1, 1 },954{ 0, 0, 1, 1 },955{ 0, -1, 0, 1 },956{ 0, 1, 0, 1 },957{-1, 0, 0, 1 },958{ 1, 0, 0, 1 }959};960961static void962gen4_upload_curbe(struct crocus_batch *batch)963{964struct crocus_context *ice = batch->ice;965const unsigned sz = ice->curbe.total_size;966const unsigned buf_sz = sz * 16 * sizeof(float);967968if (sz == 0)969goto emit;970971uint32_t *map;972u_upload_alloc(ice->ctx.const_uploader, 0, buf_sz, 64,973&ice->curbe.curbe_offset, (struct pipe_resource **)&ice->curbe.curbe_res, (void **) &map);974975/* fragment shader constants */976if (ice->curbe.wm_size) {977upload_shader_consts(ice, MESA_SHADER_FRAGMENT, map, ice->curbe.wm_start);978}979980/* clipper constants */981if (ice->curbe.clip_size) {982unsigned offset = ice->curbe.clip_start * 16;983float *fmap = (float *)map;984unsigned i;985/* If any planes are going this way, send them all this way:986*/987for (i = 0; i < 6; i++) {988fmap[offset + i * 4 + 0] = fixed_plane[i][0];989fmap[offset + i * 4 + 1] = fixed_plane[i][1];990fmap[offset + i * 4 + 2] = fixed_plane[i][2];991fmap[offset + i * 4 + 3] = fixed_plane[i][3];992}993994unsigned mask = ice->state.cso_rast->cso.clip_plane_enable;995struct pipe_clip_state *cp = &ice->state.clip_planes;996while (mask) {997const int j = u_bit_scan(&mask);998fmap[offset + i * 4 + 0] = cp->ucp[j][0];999fmap[offset + i * 4 + 1] = cp->ucp[j][1];1000fmap[offset + i * 4 + 2] = cp->ucp[j][2];1001fmap[offset + i * 4 + 3] = cp->ucp[j][3];1002i++;1003}1004}10051006/* vertex shader constants */1007if (ice->curbe.vs_size) {1008upload_shader_consts(ice, MESA_SHADER_VERTEX, map, ice->curbe.vs_start);1009}1010if (0) {1011for (int i = 0; i < sz*16; i+=4) {1012float *f = (float *)map;1013fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,1014f[i+0], f[i+1], f[i+2], f[i+3]);1015}1016}10171018emit:1019crocus_emit_cmd(batch, GENX(CONSTANT_BUFFER), cb) {1020if (ice->curbe.curbe_res) {1021cb.BufferLength = ice->curbe.total_size - 1;1022cb.Valid = 1;1023cb.BufferStartingAddress = ro_bo(ice->curbe.curbe_res->bo, ice->curbe.curbe_offset);1024}1025}10261027#if GFX_VER == 4 && GFX_VERx10 != 451028/* Work around a Broadwater/Crestline depth interpolator bug. The1029* following sequence will cause GPU hangs:1030*1031* 1. Change state so that all depth related fields in CC_STATE are1032* disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.1033* 2. Emit a CONSTANT_BUFFER packet.1034* 3. Draw via 3DPRIMITIVE.1035*1036* The recommended workaround is to emit a non-pipelined state change after1037* emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.1038*1039* We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),1040* and always emit it when "PS Use Source Depth" is set. We could be more1041* precise, but the additional complexity is probably not worth it.1042*1043*/1044const struct shader_info *fs_info =1045crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);10461047if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) {1048ice->state.global_depth_offset_clamp = 0;1049crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp);1050}1051#endif1052}1053#endif10541055#if GFX_VER >= 710561057#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x007300001058#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d300001059#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x0061000010601061static void1062setup_l3_config(struct crocus_batch *batch, const struct intel_l3_config *cfg)1063{1064#if GFX_VER == 71065const struct intel_device_info *devinfo = &batch->screen->devinfo;1066const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];1067const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||1068cfg->n[INTEL_L3P_ALL];1069const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||1070cfg->n[INTEL_L3P_ALL];1071const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||1072cfg->n[INTEL_L3P_ALL];1073const bool has_slm = cfg->n[INTEL_L3P_SLM];1074#endif10751076/* According to the hardware docs, the L3 partitioning can only be changed1077* while the pipeline is completely drained and the caches are flushed,1078* which involves a first PIPE_CONTROL flush which stalls the pipeline...1079*/1080crocus_emit_pipe_control_flush(batch, "l3_config",1081PIPE_CONTROL_DATA_CACHE_FLUSH |1082PIPE_CONTROL_CS_STALL);10831084/* ...followed by a second pipelined PIPE_CONTROL that initiates1085* invalidation of the relevant caches. Note that because RO invalidation1086* happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL1087* command is processed by the CS) we cannot combine it with the previous1088* stalling flush as the hardware documentation suggests, because that1089* would cause the CS to stall on previous rendering *after* RO1090* invalidation and wouldn't prevent the RO caches from being polluted by1091* concurrent rendering before the stall completes. This intentionally1092* doesn't implement the SKL+ hardware workaround suggesting to enable CS1093* stall on PIPE_CONTROLs with the texture cache invalidation bit set for1094* GPGPU workloads because the previous and subsequent PIPE_CONTROLs1095* already guarantee that there is no concurrent GPGPU kernel execution1096* (see SKL HSD 2132585).1097*/1098crocus_emit_pipe_control_flush(batch, "l3 config",1099PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |1100PIPE_CONTROL_CONST_CACHE_INVALIDATE |1101PIPE_CONTROL_INSTRUCTION_INVALIDATE |1102PIPE_CONTROL_STATE_CACHE_INVALIDATE);11031104/* Now send a third stalling flush to make sure that invalidation is1105* complete when the L3 configuration registers are modified.1106*/1107crocus_emit_pipe_control_flush(batch, "l3 config",1108PIPE_CONTROL_DATA_CACHE_FLUSH |1109PIPE_CONTROL_CS_STALL);11101111#if GFX_VER == 81112assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]);1113crocus_emit_reg(batch, GENX(L3CNTLREG), reg) {1114reg.SLMEnable = cfg->n[INTEL_L3P_SLM] > 0;1115reg.URBAllocation = cfg->n[INTEL_L3P_URB];1116reg.ROAllocation = cfg->n[INTEL_L3P_RO];1117reg.DCAllocation = cfg->n[INTEL_L3P_DC];1118reg.AllAllocation = cfg->n[INTEL_L3P_ALL];1119}1120#else1121assert(!cfg->n[INTEL_L3P_ALL]);11221123/* When enabled SLM only uses a portion of the L3 on half of the banks,1124* the matching space on the remaining banks has to be allocated to a1125* client (URB for all validated configurations) set to the1126* lower-bandwidth 2-bank address hashing mode.1127*/1128const bool urb_low_bw = has_slm && !devinfo->is_baytrail;1129assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);11301131/* Minimum number of ways that can be allocated to the URB. */1132const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);1133assert(cfg->n[INTEL_L3P_URB] >= n0_urb);11341135uint32_t l3sqcr1, l3cr2, l3cr3;11361137crocus_pack_state(GENX(L3SQCREG1), &l3sqcr1, reg) {1138reg.ConvertDC_UC = !has_dc;1139reg.ConvertIS_UC = !has_is;1140reg.ConvertC_UC = !has_c;1141reg.ConvertT_UC = !has_t;1142#if GFX_VERx10 == 751143reg.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;1144#else1145reg.L3SQGeneralPriorityCreditInitialization =1146devinfo->is_baytrail ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;1147#endif1148reg.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;1149};11501151crocus_pack_state(GENX(L3CNTLREG2), &l3cr2, reg) {1152reg.SLMEnable = has_slm;1153reg.URBLowBandwidth = urb_low_bw;1154reg.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;1155#if !(GFX_VERx10 == 75)1156reg.ALLAllocation = cfg->n[INTEL_L3P_ALL];1157#endif1158reg.ROAllocation = cfg->n[INTEL_L3P_RO];1159reg.DCAllocation = cfg->n[INTEL_L3P_DC];1160};11611162crocus_pack_state(GENX(L3CNTLREG3), &l3cr3, reg) {1163reg.ISAllocation = cfg->n[INTEL_L3P_IS];1164reg.ISLowBandwidth = 0;1165reg.CAllocation = cfg->n[INTEL_L3P_C];1166reg.CLowBandwidth = 0;1167reg.TAllocation = cfg->n[INTEL_L3P_T];1168reg.TLowBandwidth = 0;1169};11701171/* Set up the L3 partitioning. */1172crocus_emit_lri(batch, L3SQCREG1, l3sqcr1);1173crocus_emit_lri(batch, L3CNTLREG2, l3cr2);1174crocus_emit_lri(batch, L3CNTLREG3, l3cr3);11751176#if GFX_VERSIONx10 == 751177/* TODO: Fail screen creation if command parser version < 4 */1178uint32_t scratch1, chicken3;1179crocus_pack_state(GENX(SCRATCH1), &scratch1, reg) {1180reg.L3AtomicDisable = !has_dc;1181}1182crocus_pack_state(GENX(CHICKEN3), &chicken3, reg) {1183reg.L3AtomicDisableMask = true;1184reg.L3AtomicDisable = !has_dc;1185}1186crocus_emit_lri(batch, SCRATCH1, scratch1);1187crocus_emit_lri(batch, CHICKEN3, chicken3);1188#endif1189#endif1190}11911192static void1193emit_l3_state(struct crocus_batch *batch, bool compute)1194{1195const struct intel_l3_config *const cfg =1196compute ? batch->screen->l3_config_cs : batch->screen->l3_config_3d;11971198setup_l3_config(batch, cfg);1199if (unlikely(INTEL_DEBUG & DEBUG_L3)) {1200intel_dump_l3_config(cfg, stderr);1201}1202}12031204/**1205* Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.1206*/1207static void1208gen7_emit_cs_stall_flush(struct crocus_batch *batch)1209{1210crocus_emit_pipe_control_write(batch,1211"workaround",1212PIPE_CONTROL_CS_STALL1213| PIPE_CONTROL_WRITE_IMMEDIATE,1214batch->ice->workaround_bo,1215batch->ice->workaround_offset, 0);1216}1217#endif12181219static void1220emit_pipeline_select(struct crocus_batch *batch, uint32_t pipeline)1221{1222#if GFX_VER == 81223/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:1224*1225* Software must clear the COLOR_CALC_STATE Valid field in1226* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT1227* with Pipeline Select set to GPGPU.1228*1229* The internal hardware docs recommend the same workaround for Gfx91230* hardware too.1231*/1232if (pipeline == GPGPU)1233crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), t);1234#endif12351236#if GFX_VER >= 61237/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]1238* PIPELINE_SELECT [DevBWR+]":1239*1240* "Project: DEVSNB+1241*1242* Software must ensure all the write caches are flushed through a1243* stalling PIPE_CONTROL command followed by another PIPE_CONTROL1244* command to invalidate read only caches prior to programming1245* MI_PIPELINE_SELECT command to change the Pipeline Select Mode."1246*/1247const unsigned dc_flush =1248batch->screen->devinfo.ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;1249crocus_emit_pipe_control_flush(batch,1250"workaround: PIPELINE_SELECT flushes (1/2)",1251PIPE_CONTROL_RENDER_TARGET_FLUSH |1252PIPE_CONTROL_DEPTH_CACHE_FLUSH |1253dc_flush |1254PIPE_CONTROL_CS_STALL);12551256crocus_emit_pipe_control_flush(batch,1257"workaround: PIPELINE_SELECT flushes (2/2)",1258PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |1259PIPE_CONTROL_CONST_CACHE_INVALIDATE |1260PIPE_CONTROL_STATE_CACHE_INVALIDATE |1261PIPE_CONTROL_INSTRUCTION_INVALIDATE);1262#else1263/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]1264* PIPELINE_SELECT [DevBWR+]":1265*1266* Project: PRE-DEVSNB1267*1268* Software must ensure the current pipeline is flushed via an1269* MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.1270*/1271crocus_emit_cmd(batch, GENX(MI_FLUSH), foo);1272#endif12731274crocus_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) {1275sel.PipelineSelection = pipeline;1276}12771278#if GFX_VER == 7 && !(GFX_VERx10 == 75)1279if (pipeline == _3D) {1280gen7_emit_cs_stall_flush(batch);12811282crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {1283prim.PrimitiveTopologyType = _3DPRIM_POINTLIST;1284};1285}1286#endif1287}12881289/**1290* The following diagram shows how we partition the URB:1291*1292* 16kB or 32kB Rest of the URB space1293* __________-__________ _________________-_________________1294* / \ / \1295* +-------------------------------------------------------------+1296* | VS/HS/DS/GS/FS Push | VS/HS/DS/GS URB |1297* | Constants | Entries |1298* +-------------------------------------------------------------+1299*1300* Notably, push constants must be stored at the beginning of the URB1301* space, while entries can be stored anywhere. Ivybridge and Haswell1302* GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT31303* doubles this (32kB).1304*1305* Ivybridge and Haswell GT1/GT2 allow push constants to be located (and1306* sized) in increments of 1kB. Haswell GT3 requires them to be located and1307* sized in increments of 2kB.1308*1309* Currently we split the constant buffer space evenly among whatever stages1310* are active. This is probably not ideal, but simple.1311*1312* Ivybridge GT1 and Haswell GT1 have 128kB of URB space.1313* Ivybridge GT2 and Haswell GT2 have 256kB of URB space.1314* Haswell GT3 has 512kB of URB space.1315*1316* See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",1317* and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.1318*/1319#if GFX_VER >= 71320static void1321crocus_alloc_push_constants(struct crocus_batch *batch)1322{1323#if GFX_VERx10 == 751324const unsigned push_constant_kb = batch->screen->devinfo.gt == 3 ? 32 : 16;1325#elif GFX_VER == 81326const unsigned push_constant_kb = 32;1327#else1328const unsigned push_constant_kb = 16;1329#endif1330unsigned size_per_stage = push_constant_kb / 5;13311332/* For now, we set a static partitioning of the push constant area,1333* assuming that all stages could be in use.1334*1335* TODO: Try lazily allocating the HS/DS/GS sections as needed, and1336* see if that improves performance by offering more space to1337* the VS/FS when those aren't in use. Also, try dynamically1338* enabling/disabling it like i965 does. This would be more1339* stalls and may not actually help; we don't know yet.1340*/1341for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {1342crocus_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {1343alloc._3DCommandSubOpcode = 18 + i;1344alloc.ConstantBufferOffset = size_per_stage * i;1345alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? (push_constant_kb - 4 * size_per_stage) : size_per_stage;1346}1347}13481349/* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):1350*1351* A PIPE_CONTROL command with the CS Stall bit set must be programmed1352* in the ring after this instruction.1353*1354* No such restriction exists for Haswell or Baytrail.1355*/1356if (!(GFX_VERx10 == 75) && !batch->screen->devinfo.is_baytrail)1357gen7_emit_cs_stall_flush(batch);1358}1359#endif13601361/**1362* Upload the initial GPU state for a render context.1363*1364* This sets some invariant state that needs to be programmed a particular1365* way, but we never actually change.1366*/1367static void1368crocus_init_render_context(struct crocus_batch *batch)1369{1370UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;13711372emit_pipeline_select(batch, _3D);13731374crocus_emit_cmd(batch, GENX(STATE_SIP), foo);13751376#if GFX_VER >= 71377emit_l3_state(batch, false);1378#endif1379#if (GFX_VERx10 == 70 || GFX_VERx10 == 80)1380crocus_emit_reg(batch, GENX(INSTPM), reg) {1381reg.CONSTANT_BUFFERAddressOffsetDisable = true;1382reg.CONSTANT_BUFFERAddressOffsetDisableMask = true;1383}1384#endif1385#if GFX_VER >= 5 || GFX_VERx10 == 451386/* Use the legacy AA line coverage computation. */1387crocus_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);1388#endif13891390/* No polygon stippling offsets are necessary. */1391/* TODO: may need to set an offset for origin-UL framebuffers */1392crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);13931394#if GFX_VER >= 71395crocus_alloc_push_constants(batch);1396#endif13971398#if GFX_VER == 81399/* Set the initial MSAA sample positions. */1400crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {1401INTEL_SAMPLE_POS_1X(pat._1xSample);1402INTEL_SAMPLE_POS_2X(pat._2xSample);1403INTEL_SAMPLE_POS_4X(pat._4xSample);1404INTEL_SAMPLE_POS_8X(pat._8xSample);1405}14061407/* Disable chromakeying (it's for media) */1408crocus_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);14091410/* We want regular rendering, not special HiZ operations. */1411crocus_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);1412#endif1413}14141415#if GFX_VER >= 71416static void1417crocus_init_compute_context(struct crocus_batch *batch)1418{1419UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;14201421emit_pipeline_select(batch, GPGPU);14221423#if GFX_VER >= 71424emit_l3_state(batch, true);1425#endif1426}1427#endif14281429/**1430* Generation-specific context state (ice->state.genx->...).1431*1432* Most state can go in crocus_context directly, but these encode hardware1433* packets which vary by generation.1434*/1435struct crocus_genx_state {1436struct {1437#if GFX_VER >= 71438struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES];1439#endif1440} shaders[MESA_SHADER_STAGES];14411442#if GFX_VER == 81443bool pma_fix_enabled;1444#endif1445};14461447/**1448* The pipe->set_blend_color() driver hook.1449*1450* This corresponds to our COLOR_CALC_STATE.1451*/1452static void1453crocus_set_blend_color(struct pipe_context *ctx,1454const struct pipe_blend_color *state)1455{1456struct crocus_context *ice = (struct crocus_context *) ctx;14571458/* Our COLOR_CALC_STATE is exactly pipe_blend_color, so just memcpy */1459memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));1460#if GFX_VER <= 51461ice->state.dirty |= CROCUS_DIRTY_GEN4_CONSTANT_COLOR;1462#else1463ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;1464#endif1465}14661467/**1468* Gallium CSO for blend state (see pipe_blend_state).1469*/1470struct crocus_blend_state {1471#if GFX_VER == 81472/** Partial 3DSTATE_PS_BLEND */1473uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];1474#endif14751476/** copy of BLEND_STATE */1477struct pipe_blend_state cso;14781479/** Bitfield of whether blending is enabled for RT[i] - for aux resolves */1480uint8_t blend_enables;14811482/** Bitfield of whether color writes are enabled for RT[i] */1483uint8_t color_write_enables;14841485/** Does RT[0] use dual color blending? */1486bool dual_color_blending;1487};14881489static enum pipe_blendfactor1490fix_blendfactor(enum pipe_blendfactor f, bool alpha_to_one)1491{1492if (alpha_to_one) {1493if (f == PIPE_BLENDFACTOR_SRC1_ALPHA)1494return PIPE_BLENDFACTOR_ONE;14951496if (f == PIPE_BLENDFACTOR_INV_SRC1_ALPHA)1497return PIPE_BLENDFACTOR_ZERO;1498}14991500return f;1501}15021503#if GFX_VER >= 61504typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;1505#else1506typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;1507#endif15081509static bool1510can_emit_logic_op(struct crocus_context *ice)1511{1512/* all pre gen8 have logicop restricted to unorm */1513enum pipe_format pformat = PIPE_FORMAT_NONE;1514for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {1515if (ice->state.framebuffer.cbufs[i]) {1516pformat = ice->state.framebuffer.cbufs[i]->format;1517break;1518}1519}1520return (pformat == PIPE_FORMAT_NONE || util_format_is_unorm(pformat));1521}15221523static bool1524set_blend_entry_bits(struct crocus_batch *batch, BLEND_ENTRY_GENXML *entry,1525struct crocus_blend_state *cso_blend,1526int idx)1527{1528struct crocus_context *ice = batch->ice;1529bool independent_alpha_blend = false;1530const struct pipe_rt_blend_state *rt =1531&cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? idx : 0];1532const unsigned blend_enabled = rt->blend_enable;15331534enum pipe_blendfactor src_rgb =1535fix_blendfactor(rt->rgb_src_factor, cso_blend->cso.alpha_to_one);1536enum pipe_blendfactor src_alpha =1537fix_blendfactor(rt->alpha_src_factor, cso_blend->cso.alpha_to_one);1538enum pipe_blendfactor dst_rgb =1539fix_blendfactor(rt->rgb_dst_factor, cso_blend->cso.alpha_to_one);1540enum pipe_blendfactor dst_alpha =1541fix_blendfactor(rt->alpha_dst_factor, cso_blend->cso.alpha_to_one);15421543if (rt->rgb_func != rt->alpha_func ||1544src_rgb != src_alpha || dst_rgb != dst_alpha)1545independent_alpha_blend = true;1546if (cso_blend->cso.logicop_enable) {1547if (GFX_VER >= 8 || can_emit_logic_op(ice)) {1548entry->LogicOpEnable = cso_blend->cso.logicop_enable;1549entry->LogicOpFunction = cso_blend->cso.logicop_func;1550}1551} else if (blend_enabled) {1552if (idx == 0) {1553struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];1554struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;1555entry->ColorBufferBlendEnable =1556(!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);1557} else1558entry->ColorBufferBlendEnable = 1;15591560entry->ColorBlendFunction = rt->rgb_func;1561entry->AlphaBlendFunction = rt->alpha_func;1562entry->SourceBlendFactor = (int) src_rgb;1563entry->SourceAlphaBlendFactor = (int) src_alpha;1564entry->DestinationBlendFactor = (int) dst_rgb;1565entry->DestinationAlphaBlendFactor = (int) dst_alpha;1566}1567#if GFX_VER <= 51568/*1569* Gen4/GM45/ILK can't handle have ColorBufferBlendEnable == 01570* when a dual src blend shader is in use. Setup dummy blending.1571*/1572struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];1573struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;1574if (idx == 0 && !blend_enabled && wm_prog_data->dual_src_blend) {1575entry->ColorBufferBlendEnable = 1;1576entry->ColorBlendFunction = PIPE_BLEND_ADD;1577entry->AlphaBlendFunction = PIPE_BLEND_ADD;1578entry->SourceBlendFactor = PIPE_BLENDFACTOR_ONE;1579entry->SourceAlphaBlendFactor = PIPE_BLENDFACTOR_ONE;1580entry->DestinationBlendFactor = PIPE_BLENDFACTOR_ZERO;1581entry->DestinationAlphaBlendFactor = PIPE_BLENDFACTOR_ZERO;1582}1583#endif1584return independent_alpha_blend;1585}15861587/**1588* The pipe->create_blend_state() driver hook.1589*1590* Translates a pipe_blend_state into crocus_blend_state.1591*/1592static void *1593crocus_create_blend_state(struct pipe_context *ctx,1594const struct pipe_blend_state *state)1595{1596struct crocus_blend_state *cso = malloc(sizeof(struct crocus_blend_state));15971598cso->blend_enables = 0;1599cso->color_write_enables = 0;1600STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS <= 8);16011602cso->cso = *state;1603cso->dual_color_blending = util_blend_state_is_dual(state, 0);16041605#if GFX_VER == 81606bool indep_alpha_blend = false;1607#endif1608for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {1609const struct pipe_rt_blend_state *rt =1610&state->rt[state->independent_blend_enable ? i : 0];1611if (rt->blend_enable)1612cso->blend_enables |= 1u << i;1613if (rt->colormask)1614cso->color_write_enables |= 1u << i;1615#if GFX_VER == 81616enum pipe_blendfactor src_rgb =1617fix_blendfactor(rt->rgb_src_factor, state->alpha_to_one);1618enum pipe_blendfactor src_alpha =1619fix_blendfactor(rt->alpha_src_factor, state->alpha_to_one);1620enum pipe_blendfactor dst_rgb =1621fix_blendfactor(rt->rgb_dst_factor, state->alpha_to_one);1622enum pipe_blendfactor dst_alpha =1623fix_blendfactor(rt->alpha_dst_factor, state->alpha_to_one);16241625if (rt->rgb_func != rt->alpha_func ||1626src_rgb != src_alpha || dst_rgb != dst_alpha)1627indep_alpha_blend = true;1628#endif1629}16301631#if GFX_VER == 81632crocus_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {1633/* pb.HasWriteableRT is filled in at draw time.1634* pb.AlphaTestEnable is filled in at draw time.1635*1636* pb.ColorBufferBlendEnable is filled in at draw time so we can avoid1637* setting it when dual color blending without an appropriate shader.1638*/16391640pb.AlphaToCoverageEnable = state->alpha_to_coverage;1641pb.IndependentAlphaBlendEnable = indep_alpha_blend;16421643/* The casts prevent warnings about implicit enum type conversions. */1644pb.SourceBlendFactor =1645(int) fix_blendfactor(state->rt[0].rgb_src_factor, state->alpha_to_one);1646pb.SourceAlphaBlendFactor =1647(int) fix_blendfactor(state->rt[0].alpha_src_factor, state->alpha_to_one);1648pb.DestinationBlendFactor =1649(int) fix_blendfactor(state->rt[0].rgb_dst_factor, state->alpha_to_one);1650pb.DestinationAlphaBlendFactor =1651(int) fix_blendfactor(state->rt[0].alpha_dst_factor, state->alpha_to_one);1652}1653#endif1654return cso;1655}16561657/**1658* The pipe->bind_blend_state() driver hook.1659*1660* Bind a blending CSO and flag related dirty bits.1661*/1662static void1663crocus_bind_blend_state(struct pipe_context *ctx, void *state)1664{1665struct crocus_context *ice = (struct crocus_context *) ctx;1666struct crocus_blend_state *cso = state;16671668ice->state.cso_blend = cso;1669ice->state.blend_enables = cso ? cso->blend_enables : 0;16701671ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;1672ice->state.dirty |= CROCUS_DIRTY_WM;1673#if GFX_VER >= 61674ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;1675#endif1676#if GFX_VER >= 71677ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;1678#endif1679#if GFX_VER == 81680ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;1681ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;1682#endif1683ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;1684ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;1685ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_BLEND];1686}16871688/**1689* Return true if the FS writes to any color outputs which are not disabled1690* via color masking.1691*/1692static bool1693has_writeable_rt(const struct crocus_blend_state *cso_blend,1694const struct shader_info *fs_info)1695{1696if (!fs_info)1697return false;16981699unsigned rt_outputs = fs_info->outputs_written >> FRAG_RESULT_DATA0;17001701if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))1702rt_outputs = (1 << BRW_MAX_DRAW_BUFFERS) - 1;17031704return cso_blend->color_write_enables & rt_outputs;1705}17061707/**1708* Gallium CSO for depth, stencil, and alpha testing state.1709*/1710struct crocus_depth_stencil_alpha_state {1711struct pipe_depth_stencil_alpha_state cso;17121713bool depth_writes_enabled;1714bool stencil_writes_enabled;1715};17161717/**1718* The pipe->create_depth_stencil_alpha_state() driver hook.1719*1720* We encode most of 3DSTATE_WM_DEPTH_STENCIL, and just save off the alpha1721* testing state since we need pieces of it in a variety of places.1722*/1723static void *1724crocus_create_zsa_state(struct pipe_context *ctx,1725const struct pipe_depth_stencil_alpha_state *state)1726{1727struct crocus_depth_stencil_alpha_state *cso =1728malloc(sizeof(struct crocus_depth_stencil_alpha_state));17291730bool two_sided_stencil = state->stencil[1].enabled;1731cso->cso = *state;17321733cso->depth_writes_enabled = state->depth_writemask;1734cso->stencil_writes_enabled =1735state->stencil[0].writemask != 0 ||1736(two_sided_stencil && state->stencil[1].writemask != 0);17371738/* The state tracker needs to optimize away EQUAL writes for us. */1739assert(!(state->depth_func == PIPE_FUNC_EQUAL && state->depth_writemask));17401741return cso;1742}17431744/**1745* The pipe->bind_depth_stencil_alpha_state() driver hook.1746*1747* Bind a depth/stencil/alpha CSO and flag related dirty bits.1748*/1749static void1750crocus_bind_zsa_state(struct pipe_context *ctx, void *state)1751{1752struct crocus_context *ice = (struct crocus_context *) ctx;1753struct crocus_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;1754struct crocus_depth_stencil_alpha_state *new_cso = state;17551756if (new_cso) {1757if (cso_changed(cso.alpha_ref_value))1758ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;17591760if (cso_changed(cso.alpha_enabled))1761ice->state.dirty |= CROCUS_DIRTY_WM;1762#if GFX_VER >= 61763if (cso_changed(cso.alpha_enabled))1764ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;17651766if (cso_changed(cso.alpha_func))1767ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;1768#endif1769#if GFX_VER == 81770if (cso_changed(cso.alpha_enabled))1771ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;1772#endif17731774if (cso_changed(depth_writes_enabled))1775ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;17761777ice->state.depth_writes_enabled = new_cso->depth_writes_enabled;1778ice->state.stencil_writes_enabled = new_cso->stencil_writes_enabled;17791780#if GFX_VER <= 51781ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;1782#endif1783}17841785ice->state.cso_zsa = new_cso;1786ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;1787#if GFX_VER >= 61788ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;1789#endif1790#if GFX_VER == 81791ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;1792#endif1793ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_DEPTH_STENCIL_ALPHA];1794}17951796#if GFX_VER == 81797static bool1798want_pma_fix(struct crocus_context *ice)1799{1800UNUSED struct crocus_screen *screen = (void *) ice->ctx.screen;1801UNUSED const struct intel_device_info *devinfo = &screen->devinfo;1802const struct brw_wm_prog_data *wm_prog_data = (void *)1803ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;1804const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;1805const struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;1806const struct crocus_blend_state *cso_blend = ice->state.cso_blend;18071808/* In very specific combinations of state, we can instruct Gfx8-9 hardware1809* to avoid stalling at the pixel mask array. The state equations are1810* documented in these places:1811*1812* - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE1813* - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable1814*1815* Both equations share some common elements:1816*1817* no_hiz_op =1818* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||1819* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||1820* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||1821* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&1822*1823* killpixels =1824* 3DSTATE_WM::ForceKillPix != ForceOff &&1825* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||1826* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||1827* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||1828* 3DSTATE_PS_BLEND::AlphaTestEnable ||1829* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)1830*1831* (Technically the stencil PMA treats ForceKillPix differently,1832* but I think this is a documentation oversight, and we don't1833* ever use it in this way, so it doesn't matter).1834*1835* common_pma_fix =1836* 3DSTATE_WM::ForceThreadDispatch != 1 &&1837* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 &&1838* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&1839* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&1840* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS &&1841* 3DSTATE_PS_EXTRA::PixelShaderValid &&1842* no_hiz_op1843*1844* These are always true:1845*1846* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_01847* 3DSTATE_PS_EXTRA::PixelShaderValid1848*1849* Also, we never use the normal drawing path for HiZ ops; these are true:1850*1851* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||1852* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||1853* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||1854* 3DSTATE_WM_HZ_OP::StencilBufferClear)1855*1856* This happens sometimes:1857*1858* 3DSTATE_WM::ForceThreadDispatch != 11859*1860* However, we choose to ignore it as it either agrees with the signal1861* (dispatch was already enabled, so nothing out of the ordinary), or1862* there are no framebuffer attachments (so no depth or HiZ anyway,1863* meaning the PMA signal will already be disabled).1864*/18651866if (!cso_fb->zsbuf)1867return false;18681869struct crocus_resource *zres, *sres;1870crocus_get_depth_stencil_resources(devinfo,1871cso_fb->zsbuf->texture, &zres, &sres);18721873/* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&1874* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&1875*/1876if (!zres || !crocus_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level))1877return false;18781879/* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */1880if (wm_prog_data->early_fragment_tests)1881return false;18821883/* 3DSTATE_WM::ForceKillPix != ForceOff &&1884* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||1885* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||1886* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||1887* 3DSTATE_PS_BLEND::AlphaTestEnable ||1888* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)1889*/1890bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||1891cso_blend->cso.alpha_to_coverage || cso_zsa->cso.alpha_enabled;18921893/* The Gfx8 depth PMA equation becomes:1894*1895* depth_writes =1896* 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&1897* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE1898*1899* stencil_writes =1900* 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&1901* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&1902* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE1903*1904* Z_PMA_OPT =1905* common_pma_fix &&1906* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable &&1907* ((killpixels && (depth_writes || stencil_writes)) ||1908* 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF)1909*1910*/1911if (!cso_zsa->cso.depth_enabled)1912return false;19131914return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF ||1915(killpixels && (cso_zsa->depth_writes_enabled ||1916(sres && cso_zsa->stencil_writes_enabled)));1917}1918#endif1919void1920genX(crocus_update_pma_fix)(struct crocus_context *ice,1921struct crocus_batch *batch,1922bool enable)1923{1924#if GFX_VER == 81925struct crocus_genx_state *genx = ice->state.genx;19261927if (genx->pma_fix_enabled == enable)1928return;19291930genx->pma_fix_enabled = enable;19311932/* According to the Broadwell PIPE_CONTROL documentation, software should1933* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set1934* prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary.1935*1936* The Gfx9 docs say to use a depth stall rather than a command streamer1937* stall. However, the hardware seems to violently disagree. A full1938* command streamer stall seems to be needed in both cases.1939*/1940crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",1941PIPE_CONTROL_CS_STALL |1942PIPE_CONTROL_DEPTH_CACHE_FLUSH |1943PIPE_CONTROL_RENDER_TARGET_FLUSH);19441945crocus_emit_reg(batch, GENX(CACHE_MODE_1), reg) {1946reg.NPPMAFixEnable = enable;1947reg.NPEarlyZFailsDisable = enable;1948reg.NPPMAFixEnableMask = true;1949reg.NPEarlyZFailsDisableMask = true;1950}19511952/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache1953* Flush bits is often necessary. We do it regardless because it's easier.1954* The render cache flush is also necessary if stencil writes are enabled.1955*1956* Again, the Gfx9 docs give a different set of flushes but the Broadwell1957* flushes seem to work just as well.1958*/1959crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",1960PIPE_CONTROL_DEPTH_STALL |1961PIPE_CONTROL_DEPTH_CACHE_FLUSH |1962PIPE_CONTROL_RENDER_TARGET_FLUSH);1963#endif1964}19651966static float1967get_line_width(const struct pipe_rasterizer_state *state)1968{1969float line_width = state->line_width;19701971/* From the OpenGL 4.4 spec:1972*1973* "The actual width of non-antialiased lines is determined by rounding1974* the supplied width to the nearest integer, then clamping it to the1975* implementation-dependent maximum non-antialiased line width."1976*/1977if (!state->multisample && !state->line_smooth)1978line_width = roundf(state->line_width);19791980if (!state->multisample && state->line_smooth && line_width < 1.5f) {1981/* For 1 pixel line thickness or less, the general anti-aliasing1982* algorithm gives up, and a garbage line is generated. Setting a1983* Line Width of 0.0 specifies the rasterization of the "thinnest"1984* (one-pixel-wide), non-antialiased lines.1985*1986* Lines rendered with zero Line Width are rasterized using the1987* "Grid Intersection Quantization" rules as specified by the1988* "Zero-Width (Cosmetic) Line Rasterization" section of the docs.1989*/1990line_width = 0.0f;1991}19921993return line_width;1994}19951996/**1997* The pipe->create_rasterizer_state() driver hook.1998*/1999static void *2000crocus_create_rasterizer_state(struct pipe_context *ctx,2001const struct pipe_rasterizer_state *state)2002{2003struct crocus_rasterizer_state *cso =2004malloc(sizeof(struct crocus_rasterizer_state));20052006cso->fill_mode_point_or_line =2007state->fill_front == PIPE_POLYGON_MODE_LINE ||2008state->fill_front == PIPE_POLYGON_MODE_POINT ||2009state->fill_back == PIPE_POLYGON_MODE_LINE ||2010state->fill_back == PIPE_POLYGON_MODE_POINT;20112012if (state->clip_plane_enable != 0)2013cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1;2014else2015cso->num_clip_plane_consts = 0;20162017cso->cso = *state;20182019#if GFX_VER >= 62020float line_width = get_line_width(state);20212022crocus_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {2023sf.StatisticsEnable = true;2024sf.AALineDistanceMode = AALINEDISTANCE_TRUE;2025sf.LineEndCapAntialiasingRegionWidth =2026state->line_smooth ? _10pixels : _05pixels;2027sf.LastPixelEnable = state->line_last_pixel;2028#if GFX_VER == 82029struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;2030if (screen->devinfo.is_cherryview)2031sf.CHVLineWidth = line_width;2032else2033sf.LineWidth = line_width;2034#else2035sf.LineWidth = line_width;2036#endif2037sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;2038sf.PointWidth = state->point_size;20392040if (state->flatshade_first) {2041sf.TriangleFanProvokingVertexSelect = 1;2042} else {2043sf.TriangleStripListProvokingVertexSelect = 2;2044sf.TriangleFanProvokingVertexSelect = 2;2045sf.LineStripListProvokingVertexSelect = 1;2046}20472048#if GFX_VER == 62049sf.AttributeSwizzleEnable = true;2050if (state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)2051sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;2052else2053sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;2054#endif20552056#if GFX_VER <= 72057sf.FrontWinding = state->front_ccw ? 1 : 0; // Or the other way...20582059#if GFX_VER >= 62060sf.GlobalDepthOffsetEnableSolid = state->offset_tri;2061sf.GlobalDepthOffsetEnableWireframe = state->offset_line;2062sf.GlobalDepthOffsetEnablePoint = state->offset_point;2063sf.GlobalDepthOffsetConstant = state->offset_units * 2;2064sf.GlobalDepthOffsetScale = state->offset_scale;2065sf.GlobalDepthOffsetClamp = state->offset_clamp;20662067sf.FrontFaceFillMode = translate_fill_mode(state->fill_front);2068sf.BackFaceFillMode = translate_fill_mode(state->fill_back);2069#endif20702071sf.CullMode = translate_cull_mode(state->cull_face);2072sf.ScissorRectangleEnable = true;20732074#if GFX_VERx10 == 752075sf.LineStippleEnable = state->line_stipple_enable;2076#endif2077#endif2078}2079#endif20802081#if GFX_VER == 82082crocus_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {2083rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;2084rr.CullMode = translate_cull_mode(state->cull_face);2085rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);2086rr.BackFaceFillMode = translate_fill_mode(state->fill_back);2087rr.DXMultisampleRasterizationEnable = state->multisample;2088rr.GlobalDepthOffsetEnableSolid = state->offset_tri;2089rr.GlobalDepthOffsetEnableWireframe = state->offset_line;2090rr.GlobalDepthOffsetEnablePoint = state->offset_point;2091rr.GlobalDepthOffsetConstant = state->offset_units * 2;2092rr.GlobalDepthOffsetScale = state->offset_scale;2093rr.GlobalDepthOffsetClamp = state->offset_clamp;2094rr.SmoothPointEnable = state->point_smooth;2095rr.AntialiasingEnable = state->line_smooth;2096rr.ScissorRectangleEnable = state->scissor;2097rr.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);2098}2099#endif21002101#if GFX_VER >= 62102crocus_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {2103/* cl.NonPerspectiveBarycentricEnable is filled in at draw time from2104* the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.2105*/2106#if GFX_VER >= 72107cl.EarlyCullEnable = true;2108#endif21092110#if GFX_VER == 72111cl.FrontWinding = state->front_ccw ? 1 : 0;2112cl.CullMode = translate_cull_mode(state->cull_face);2113#endif2114cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;2115#if GFX_VER < 82116cl.ViewportZClipTestEnable = (state->depth_clip_near || state->depth_clip_far);2117#endif2118cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;2119cl.GuardbandClipTestEnable = true;2120cl.ClipEnable = true;2121cl.MinimumPointWidth = 0.125;2122cl.MaximumPointWidth = 255.875;21232124#if GFX_VER == 82125cl.ForceUserClipDistanceClipTestEnableBitmask = true;2126#endif21272128if (state->flatshade_first) {2129cl.TriangleFanProvokingVertexSelect = 1;2130} else {2131cl.TriangleStripListProvokingVertexSelect = 2;2132cl.TriangleFanProvokingVertexSelect = 2;2133cl.LineStripListProvokingVertexSelect = 1;2134}2135}2136#endif21372138/* Remap from 0..255 back to 1..256 */2139const unsigned line_stipple_factor = state->line_stipple_factor + 1;21402141crocus_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {2142if (state->line_stipple_enable) {2143line.LineStipplePattern = state->line_stipple_pattern;2144line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;2145line.LineStippleRepeatCount = line_stipple_factor;2146}2147}21482149return cso;2150}21512152/**2153* The pipe->bind_rasterizer_state() driver hook.2154*2155* Bind a rasterizer CSO and flag related dirty bits.2156*/2157static void2158crocus_bind_rasterizer_state(struct pipe_context *ctx, void *state)2159{2160struct crocus_context *ice = (struct crocus_context *) ctx;2161struct crocus_rasterizer_state *old_cso = ice->state.cso_rast;2162struct crocus_rasterizer_state *new_cso = state;21632164if (new_cso) {2165/* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */2166if (cso_changed_memcmp(line_stipple))2167ice->state.dirty |= CROCUS_DIRTY_LINE_STIPPLE;2168#if GFX_VER >= 62169if (cso_changed(cso.half_pixel_center))2170ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;2171if (cso_changed(cso.scissor))2172ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;2173if (cso_changed(cso.multisample))2174ice->state.dirty |= CROCUS_DIRTY_WM;2175#else2176if (cso_changed(cso.scissor))2177ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;2178#endif21792180if (cso_changed(cso.line_stipple_enable) || cso_changed(cso.poly_stipple_enable))2181ice->state.dirty |= CROCUS_DIRTY_WM;21822183#if GFX_VER >= 62184if (cso_changed(cso.rasterizer_discard))2185ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;21862187if (cso_changed(cso.flatshade_first))2188ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;2189#endif21902191if (cso_changed(cso.depth_clip_near) || cso_changed(cso.depth_clip_far) ||2192cso_changed(cso.clip_halfz))2193ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;21942195#if GFX_VER >= 72196if (cso_changed(cso.sprite_coord_enable) ||2197cso_changed(cso.sprite_coord_mode) ||2198cso_changed(cso.light_twoside))2199ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;2200#endif2201#if GFX_VER <= 52202if (cso_changed(cso.clip_plane_enable))2203ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;2204#endif2205}22062207ice->state.cso_rast = new_cso;2208ice->state.dirty |= CROCUS_DIRTY_RASTER;2209ice->state.dirty |= CROCUS_DIRTY_CLIP;2210#if GFX_VER <= 52211ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;2212ice->state.dirty |= CROCUS_DIRTY_WM;2213#endif2214#if GFX_VER <= 62215ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;2216#endif2217ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_RASTERIZER];2218}22192220/**2221* Return true if the given wrap mode requires the border color to exist.2222*2223* (We can skip uploading it if the sampler isn't going to use it.)2224*/2225static bool2226wrap_mode_needs_border_color(unsigned wrap_mode)2227{2228#if GFX_VER == 82229return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;2230#else2231return wrap_mode == TCM_CLAMP_BORDER;2232#endif2233}22342235/**2236* Gallium CSO for sampler state.2237*/2238struct crocus_sampler_state {2239struct pipe_sampler_state pstate;2240union pipe_color_union border_color;2241bool needs_border_color;2242unsigned wrap_s;2243unsigned wrap_t;2244unsigned wrap_r;2245unsigned mag_img_filter;2246float min_lod;2247};22482249/**2250* The pipe->create_sampler_state() driver hook.2251*2252* We fill out SAMPLER_STATE (except for the border color pointer), and2253* store that on the CPU. It doesn't make sense to upload it to a GPU2254* buffer object yet, because 3DSTATE_SAMPLER_STATE_POINTERS requires2255* all bound sampler states to be in contiguous memor.2256*/2257static void *2258crocus_create_sampler_state(struct pipe_context *ctx,2259const struct pipe_sampler_state *state)2260{2261struct crocus_sampler_state *cso = CALLOC_STRUCT(crocus_sampler_state);22622263if (!cso)2264return NULL;22652266STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);2267STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);22682269bool either_nearest = state->min_img_filter == PIPE_TEX_FILTER_NEAREST ||2270state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;2271cso->wrap_s = translate_wrap(state->wrap_s, either_nearest);2272cso->wrap_t = translate_wrap(state->wrap_t, either_nearest);2273cso->wrap_r = translate_wrap(state->wrap_r, either_nearest);22742275cso->pstate = *state;22762277memcpy(&cso->border_color, &state->border_color, sizeof(cso->border_color));22782279cso->needs_border_color = wrap_mode_needs_border_color(cso->wrap_s) ||2280wrap_mode_needs_border_color(cso->wrap_t) ||2281wrap_mode_needs_border_color(cso->wrap_r);22822283cso->min_lod = state->min_lod;2284cso->mag_img_filter = state->mag_img_filter;22852286// XXX: explain this code ported from ilo...I don't get it at all...2287if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&2288state->min_lod > 0.0f) {2289cso->min_lod = 0.0f;2290cso->mag_img_filter = state->min_img_filter;2291}22922293return cso;2294}22952296/**2297* The pipe->bind_sampler_states() driver hook.2298*/2299static void2300crocus_bind_sampler_states(struct pipe_context *ctx,2301enum pipe_shader_type p_stage,2302unsigned start, unsigned count,2303void **states)2304{2305struct crocus_context *ice = (struct crocus_context *) ctx;2306gl_shader_stage stage = stage_from_pipe(p_stage);2307struct crocus_shader_state *shs = &ice->state.shaders[stage];23082309assert(start + count <= CROCUS_MAX_TEXTURE_SAMPLERS);23102311bool dirty = false;23122313for (int i = 0; i < count; i++) {2314if (shs->samplers[start + i] != states[i]) {2315shs->samplers[start + i] = states[i];2316dirty = true;2317}2318}23192320if (dirty) {2321#if GFX_VER <= 52322if (p_stage == PIPE_SHADER_FRAGMENT)2323ice->state.dirty |= CROCUS_DIRTY_WM;2324else if (p_stage == PIPE_SHADER_VERTEX)2325ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;2326#endif2327ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;2328ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];2329}2330}23312332enum samp_workaround {2333SAMP_NORMAL,2334SAMP_CUBE_CLAMP,2335SAMP_CUBE_CUBE,2336SAMP_T_WRAP,2337};23382339static void2340crocus_upload_sampler_state(struct crocus_batch *batch,2341struct crocus_sampler_state *cso,2342uint32_t border_color_offset,2343enum samp_workaround samp_workaround,2344uint32_t first_level,2345void *map)2346{2347struct pipe_sampler_state *state = &cso->pstate;2348uint32_t wrap_s, wrap_t, wrap_r;23492350wrap_s = cso->wrap_s;2351wrap_t = cso->wrap_t;2352wrap_r = cso->wrap_r;23532354switch (samp_workaround) {2355case SAMP_CUBE_CLAMP:2356wrap_s = TCM_CLAMP;2357wrap_t = TCM_CLAMP;2358wrap_r = TCM_CLAMP;2359break;2360case SAMP_CUBE_CUBE:2361wrap_s = TCM_CUBE;2362wrap_t = TCM_CUBE;2363wrap_r = TCM_CUBE;2364break;2365case SAMP_T_WRAP:2366wrap_t = TCM_WRAP;2367break;2368default:2369break;2370}23712372_crocus_pack_state(batch, GENX(SAMPLER_STATE), map, samp) {2373samp.TCXAddressControlMode = wrap_s;2374samp.TCYAddressControlMode = wrap_t;2375samp.TCZAddressControlMode = wrap_r;23762377#if GFX_VER >= 62378samp.NonnormalizedCoordinateEnable = !state->normalized_coords;2379#endif2380samp.MinModeFilter = state->min_img_filter;2381samp.MagModeFilter = cso->mag_img_filter;2382samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);2383samp.MaximumAnisotropy = RATIO21;23842385if (state->max_anisotropy >= 2) {2386if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {2387samp.MinModeFilter = MAPFILTER_ANISOTROPIC;2388#if GFX_VER >= 72389samp.AnisotropicAlgorithm = EWAApproximation;2390#endif2391}23922393if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)2394samp.MagModeFilter = MAPFILTER_ANISOTROPIC;23952396samp.MaximumAnisotropy =2397MIN2((state->max_anisotropy - 2) / 2, RATIO161);2398}23992400/* Set address rounding bits if not using nearest filtering. */2401if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {2402samp.UAddressMinFilterRoundingEnable = true;2403samp.VAddressMinFilterRoundingEnable = true;2404samp.RAddressMinFilterRoundingEnable = true;2405}24062407if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {2408samp.UAddressMagFilterRoundingEnable = true;2409samp.VAddressMagFilterRoundingEnable = true;2410samp.RAddressMagFilterRoundingEnable = true;2411}24122413if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)2414samp.ShadowFunction = translate_shadow_func(state->compare_func);24152416const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;24172418#if GFX_VER == 82419samp.LODPreClampMode = CLAMP_MODE_OGL;2420#else2421samp.LODPreClampEnable = true;2422#endif2423samp.MinLOD = CLAMP(cso->min_lod, 0, hw_max_lod);2424samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);2425samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);24262427#if GFX_VER == 62428samp.BaseMipLevel = CLAMP(first_level, 0, hw_max_lod);2429samp.MinandMagStateNotEqual = samp.MinModeFilter != samp.MagModeFilter;2430#endif24312432#if GFX_VER < 62433samp.BorderColorPointer =2434ro_bo(batch->state.bo, border_color_offset);2435#else2436samp.BorderColorPointer = border_color_offset;2437#endif2438}2439}24402441static void2442crocus_upload_border_color(struct crocus_batch *batch,2443struct crocus_sampler_state *cso,2444struct crocus_sampler_view *tex,2445uint32_t *bc_offset)2446{2447/* We may need to swizzle the border color for format faking.2448* A/LA formats are faked as R/RG with 000R or R00G swizzles.2449* This means we need to move the border color's A channel into2450* the R or G channels so that those read swizzles will move it2451* back into A.2452*/2453enum pipe_format internal_format = PIPE_FORMAT_NONE;2454union pipe_color_union *color = &cso->border_color;2455union pipe_color_union tmp;2456if (tex) {2457internal_format = tex->res->internal_format;24582459if (util_format_is_alpha(internal_format)) {2460unsigned char swz[4] = {2461PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,2462PIPE_SWIZZLE_0, PIPE_SWIZZLE_W,2463};2464util_format_apply_color_swizzle(&tmp, color, swz, true);2465color = &tmp;2466} else if (util_format_is_luminance_alpha(internal_format) &&2467internal_format != PIPE_FORMAT_L8A8_SRGB) {2468unsigned char swz[4] = {2469PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,2470PIPE_SWIZZLE_X, PIPE_SWIZZLE_W2471};2472util_format_apply_color_swizzle(&tmp, color, swz, true);2473color = &tmp;2474}2475}2476bool is_integer_format = util_format_is_pure_integer(internal_format);2477unsigned sbc_size = GENX(SAMPLER_BORDER_COLOR_STATE_length) * 4;2478const int sbc_align = (GFX_VER == 8 ? 64 : ((GFX_VERx10 == 75 && is_integer_format) ? 512 : 32));2479uint32_t *sbc = stream_state(batch, sbc_size, sbc_align, bc_offset);24802481struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };24822483#define ASSIGN(dst, src) \2484do { \2485dst = src; \2486} while (0)24872488#define ASSIGNu16(dst, src) \2489do { \2490dst = (uint16_t)src; \2491} while (0)24922493#define ASSIGNu8(dst, src) \2494do { \2495dst = (uint8_t)src; \2496} while (0)24972498#define BORDER_COLOR_ATTR(macro, _color_type, src) \2499macro(state.BorderColor ## _color_type ## Red, src[0]); \2500macro(state.BorderColor ## _color_type ## Green, src[1]); \2501macro(state.BorderColor ## _color_type ## Blue, src[2]); \2502macro(state.BorderColor ## _color_type ## Alpha, src[3]);25032504#if GFX_VER >= 82505/* On Broadwell, the border color is represented as four 32-bit floats,2506* integers, or unsigned values, interpreted according to the surface2507* format. This matches the sampler->BorderColor union exactly; just2508* memcpy the values.2509*/2510BORDER_COLOR_ATTR(ASSIGN, 32bit, color->ui);2511#elif GFX_VERx10 == 752512if (is_integer_format) {2513const struct util_format_description *format_desc =2514util_format_description(internal_format);25152516/* From the Haswell PRM, "Command Reference: Structures", Page 36:2517* "If any color channel is missing from the surface format,2518* corresponding border color should be programmed as zero and if2519* alpha channel is missing, corresponding Alpha border color should2520* be programmed as 1."2521*/2522unsigned c[4] = { 0, 0, 0, 1 };2523for (int i = 0; i < 4; i++) {2524if (format_desc->channel[i].size)2525c[i] = color->ui[i];2526}25272528switch (format_desc->channel[0].size) {2529case 8:2530/* Copy RGBA in order. */2531BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);2532break;2533case 10:2534/* R10G10B10A2_UINT is treated like a 16-bit format. */2535case 16:2536BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);2537break;2538case 32:2539if (format_desc->channel[1].size && !format_desc->channel[2].size) {2540/* Careful inspection of the tables reveals that for RG32 formats,2541* the green channel needs to go where blue normally belongs.2542*/2543state.BorderColor32bitRed = c[0];2544state.BorderColor32bitBlue = c[1];2545state.BorderColor32bitAlpha = 1;2546} else {2547/* Copy RGBA in order. */2548BORDER_COLOR_ATTR(ASSIGN, 32bit, c);2549}2550break;2551default:2552assert(!"Invalid number of bits per channel in integer format.");2553break;2554}2555} else {2556BORDER_COLOR_ATTR(ASSIGN, Float, color->f);2557}2558#elif GFX_VER == 5 || GFX_VER == 62559BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color->f);2560BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color->f);2561BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color->f);25622563#define MESA_FLOAT_TO_HALF(dst, src) \2564dst = _mesa_float_to_half(src);25652566BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color->f);25672568#undef MESA_FLOAT_TO_HALF25692570state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8;2571state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;2572state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8;2573state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;25742575BORDER_COLOR_ATTR(ASSIGN, Float, color->f);25762577#elif GFX_VER == 42578BORDER_COLOR_ATTR(ASSIGN, , color->f);2579#else2580BORDER_COLOR_ATTR(ASSIGN, Float, color->f);2581#endif25822583#undef ASSIGN2584#undef BORDER_COLOR_ATTR25852586GENX(SAMPLER_BORDER_COLOR_STATE_pack)(batch, sbc, &state);2587}25882589/**2590* Upload the sampler states into a contiguous area of GPU memory, for2591* for 3DSTATE_SAMPLER_STATE_POINTERS_*.2592*2593* Also fill out the border color state pointers.2594*/2595static void2596crocus_upload_sampler_states(struct crocus_context *ice,2597struct crocus_batch *batch, gl_shader_stage stage)2598{2599struct crocus_shader_state *shs = &ice->state.shaders[stage];2600const struct shader_info *info = crocus_get_shader_info(ice, stage);26012602/* We assume the state tracker will call pipe->bind_sampler_states()2603* if the program's number of textures changes.2604*/2605unsigned count = info ? BITSET_LAST_BIT(info->textures_used) : 0;26062607if (!count)2608return;26092610/* Assemble the SAMPLER_STATEs into a contiguous table that lives2611* in the dynamic state memory zone, so we can point to it via the2612* 3DSTATE_SAMPLER_STATE_POINTERS_* commands.2613*/2614unsigned size = count * 4 * GENX(SAMPLER_STATE_length);2615uint32_t *map = stream_state(batch, size, 32, &shs->sampler_offset);26162617if (unlikely(!map))2618return;26192620for (int i = 0; i < count; i++) {2621struct crocus_sampler_state *state = shs->samplers[i];2622struct crocus_sampler_view *tex = shs->textures[i];26232624if (!state || !tex) {2625memset(map, 0, 4 * GENX(SAMPLER_STATE_length));2626} else {2627unsigned border_color_offset = 0;2628if (state->needs_border_color) {2629crocus_upload_border_color(batch, state, tex, &border_color_offset);2630}26312632enum samp_workaround wa = SAMP_NORMAL;2633/* There's a bug in 1D texture sampling - it actually pays2634* attention to the wrap_t value, though it should not.2635* Override the wrap_t value here to GL_REPEAT to keep2636* any nonexistent border pixels from floating in.2637*/2638if (tex->base.target == PIPE_TEXTURE_1D)2639wa = SAMP_T_WRAP;2640else if (tex->base.target == PIPE_TEXTURE_CUBE ||2641tex->base.target == PIPE_TEXTURE_CUBE_ARRAY) {2642/* Cube maps must use the same wrap mode for all three coordinate2643* dimensions. Prior to Haswell, only CUBE and CLAMP are valid.2644*2645* Ivybridge and Baytrail seem to have problems with CUBE mode and2646* integer formats. Fall back to CLAMP for now.2647*/2648if (state->pstate.seamless_cube_map &&2649!(GFX_VERx10 == 70 && util_format_is_pure_integer(tex->base.format)))2650wa = SAMP_CUBE_CUBE;2651else2652wa = SAMP_CUBE_CLAMP;2653}26542655uint32_t first_level = 0;2656if (tex->base.target != PIPE_BUFFER)2657first_level = tex->base.u.tex.first_level;26582659crocus_upload_sampler_state(batch, state, border_color_offset, wa, first_level, map);2660}26612662map += GENX(SAMPLER_STATE_length);2663}2664}26652666/**2667* The pipe->create_sampler_view() driver hook.2668*/2669static struct pipe_sampler_view *2670crocus_create_sampler_view(struct pipe_context *ctx,2671struct pipe_resource *tex,2672const struct pipe_sampler_view *tmpl)2673{2674struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;2675const struct intel_device_info *devinfo = &screen->devinfo;2676struct crocus_sampler_view *isv = calloc(1, sizeof(struct crocus_sampler_view));26772678if (!isv)2679return NULL;26802681/* initialize base object */2682isv->base = *tmpl;2683isv->base.context = ctx;2684isv->base.texture = NULL;2685pipe_reference_init(&isv->base.reference, 1);2686pipe_resource_reference(&isv->base.texture, tex);26872688if (util_format_is_depth_or_stencil(tmpl->format)) {2689struct crocus_resource *zres, *sres;2690const struct util_format_description *desc =2691util_format_description(tmpl->format);26922693crocus_get_depth_stencil_resources(devinfo, tex, &zres, &sres);26942695tex = util_format_has_depth(desc) ? &zres->base.b : &sres->base.b;26962697if (tex->format == PIPE_FORMAT_S8_UINT)2698if (devinfo->ver == 7 && sres->shadow)2699tex = &sres->shadow->base.b;2700}27012702isv->res = (struct crocus_resource *) tex;27032704isl_surf_usage_flags_t usage = ISL_SURF_USAGE_TEXTURE_BIT;27052706if (isv->base.target == PIPE_TEXTURE_CUBE ||2707isv->base.target == PIPE_TEXTURE_CUBE_ARRAY)2708usage |= ISL_SURF_USAGE_CUBE_BIT;27092710const struct crocus_format_info fmt =2711crocus_format_for_usage(devinfo, tmpl->format, usage);27122713enum pipe_swizzle vswz[4] = { tmpl->swizzle_r, tmpl->swizzle_g, tmpl->swizzle_b, tmpl->swizzle_a };2714crocus_combine_swizzle(isv->swizzle, fmt.swizzles, vswz);27152716/* hardcode stencil swizzles - hw returns 0G01, we want GGGG */2717if (devinfo->ver < 6 &&2718(tmpl->format == PIPE_FORMAT_X32_S8X24_UINT ||2719tmpl->format == PIPE_FORMAT_X24S8_UINT)) {2720isv->swizzle[0] = tmpl->swizzle_g;2721isv->swizzle[1] = tmpl->swizzle_g;2722isv->swizzle[2] = tmpl->swizzle_g;2723isv->swizzle[3] = tmpl->swizzle_g;2724}27252726isv->clear_color = isv->res->aux.clear_color;27272728isv->view = (struct isl_view) {2729.format = fmt.fmt,2730#if GFX_VERx10 >= 752731.swizzle = (struct isl_swizzle) {2732.r = pipe_to_isl_swizzle(isv->swizzle[0], false),2733.g = pipe_to_isl_swizzle(isv->swizzle[1], false),2734.b = pipe_to_isl_swizzle(isv->swizzle[2], false),2735.a = pipe_to_isl_swizzle(isv->swizzle[3], false),2736},2737#else2738/* swizzling handled in shader code */2739.swizzle = ISL_SWIZZLE_IDENTITY,2740#endif2741.usage = usage,2742};27432744/* Fill out SURFACE_STATE for this view. */2745if (tmpl->target != PIPE_BUFFER) {2746isv->view.base_level = tmpl->u.tex.first_level;2747isv->view.levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1;2748// XXX: do I need to port f9fd0cf4790cb2a530e75d1a2206dbb9d8af7cb2?2749isv->view.base_array_layer = tmpl->u.tex.first_layer;2750isv->view.array_len =2751tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;2752}2753#if GFX_VER >= 62754/* just create a second view struct for texture gather just in case */2755isv->gather_view = isv->view;27562757#if GFX_VER == 72758if (fmt.fmt == ISL_FORMAT_R32G32_FLOAT ||2759fmt.fmt == ISL_FORMAT_R32G32_SINT ||2760fmt.fmt == ISL_FORMAT_R32G32_UINT) {2761isv->gather_view.format = ISL_FORMAT_R32G32_FLOAT_LD;2762#if GFX_VERx10 >= 752763isv->gather_view.swizzle = (struct isl_swizzle) {2764.r = pipe_to_isl_swizzle(isv->swizzle[0], GFX_VERx10 == 75),2765.g = pipe_to_isl_swizzle(isv->swizzle[1], GFX_VERx10 == 75),2766.b = pipe_to_isl_swizzle(isv->swizzle[2], GFX_VERx10 == 75),2767.a = pipe_to_isl_swizzle(isv->swizzle[3], GFX_VERx10 == 75),2768};2769#endif2770}2771#endif2772#if GFX_VER == 62773/* Sandybridge's gather4 message is broken for integer formats.2774* To work around this, we pretend the surface is UNORM for2775* 8 or 16-bit formats, and emit shader instructions to recover2776* the real INT/UINT value. For 32-bit formats, we pretend2777* the surface is FLOAT, and simply reinterpret the resulting2778* bits.2779*/2780switch (fmt.fmt) {2781case ISL_FORMAT_R8_SINT:2782case ISL_FORMAT_R8_UINT:2783isv->gather_view.format = ISL_FORMAT_R8_UNORM;2784break;27852786case ISL_FORMAT_R16_SINT:2787case ISL_FORMAT_R16_UINT:2788isv->gather_view.format = ISL_FORMAT_R16_UNORM;2789break;27902791case ISL_FORMAT_R32_SINT:2792case ISL_FORMAT_R32_UINT:2793isv->gather_view.format = ISL_FORMAT_R32_FLOAT;2794break;27952796default:2797break;2798}2799#endif2800#endif2801/* Fill out SURFACE_STATE for this view. */2802if (tmpl->target != PIPE_BUFFER) {2803if (crocus_resource_unfinished_aux_import(isv->res))2804crocus_resource_finish_aux_import(&screen->base, isv->res);28052806}28072808return &isv->base;2809}28102811static void2812crocus_sampler_view_destroy(struct pipe_context *ctx,2813struct pipe_sampler_view *state)2814{2815struct crocus_sampler_view *isv = (void *) state;2816pipe_resource_reference(&state->texture, NULL);2817free(isv);2818}28192820/**2821* The pipe->create_surface() driver hook.2822*2823* In Gallium nomenclature, "surfaces" are a view of a resource that2824* can be bound as a render target or depth/stencil buffer.2825*/2826static struct pipe_surface *2827crocus_create_surface(struct pipe_context *ctx,2828struct pipe_resource *tex,2829const struct pipe_surface *tmpl)2830{2831struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;2832const struct intel_device_info *devinfo = &screen->devinfo;28332834isl_surf_usage_flags_t usage = 0;2835if (tmpl->writable)2836usage = ISL_SURF_USAGE_STORAGE_BIT;2837else if (util_format_is_depth_or_stencil(tmpl->format))2838usage = ISL_SURF_USAGE_DEPTH_BIT;2839else2840usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;28412842const struct crocus_format_info fmt =2843crocus_format_for_usage(devinfo, tmpl->format, usage);28442845if ((usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&2846!isl_format_supports_rendering(devinfo, fmt.fmt)) {2847/* Framebuffer validation will reject this invalid case, but it2848* hasn't had the opportunity yet. In the meantime, we need to2849* avoid hitting ISL asserts about unsupported formats below.2850*/2851return NULL;2852}28532854struct crocus_surface *surf = calloc(1, sizeof(struct crocus_surface));2855struct pipe_surface *psurf = &surf->base;2856struct crocus_resource *res = (struct crocus_resource *) tex;28572858if (!surf)2859return NULL;28602861pipe_reference_init(&psurf->reference, 1);2862pipe_resource_reference(&psurf->texture, tex);2863psurf->context = ctx;2864psurf->format = tmpl->format;2865psurf->width = tex->width0;2866psurf->height = tex->height0;2867psurf->texture = tex;2868psurf->u.tex.first_layer = tmpl->u.tex.first_layer;2869psurf->u.tex.last_layer = tmpl->u.tex.last_layer;2870psurf->u.tex.level = tmpl->u.tex.level;28712872uint32_t array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1;28732874struct isl_view *view = &surf->view;2875*view = (struct isl_view) {2876.format = fmt.fmt,2877.base_level = tmpl->u.tex.level,2878.levels = 1,2879.base_array_layer = tmpl->u.tex.first_layer,2880.array_len = array_len,2881.swizzle = ISL_SWIZZLE_IDENTITY,2882.usage = usage,2883};28842885#if GFX_VER >= 62886struct isl_view *read_view = &surf->read_view;2887*read_view = (struct isl_view) {2888.format = fmt.fmt,2889.base_level = tmpl->u.tex.level,2890.levels = 1,2891.base_array_layer = tmpl->u.tex.first_layer,2892.array_len = array_len,2893.swizzle = ISL_SWIZZLE_IDENTITY,2894.usage = ISL_SURF_USAGE_TEXTURE_BIT,2895};2896#endif28972898surf->clear_color = res->aux.clear_color;28992900/* Bail early for depth/stencil - we don't want SURFACE_STATE for them. */2901if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |2902ISL_SURF_USAGE_STENCIL_BIT))2903return psurf;29042905if (!isl_format_is_compressed(res->surf.format)) {2906if (crocus_resource_unfinished_aux_import(res))2907crocus_resource_finish_aux_import(&screen->base, res);29082909memcpy(&surf->surf, &res->surf, sizeof(surf->surf));2910uint32_t temp_offset, temp_x, temp_y;29112912isl_surf_get_image_offset_B_tile_sa(&res->surf, tmpl->u.tex.level,2913res->base.b.target == PIPE_TEXTURE_3D ? 0 : tmpl->u.tex.first_layer,2914res->base.b.target == PIPE_TEXTURE_3D ? tmpl->u.tex.first_layer : 0,2915&temp_offset, &temp_x, &temp_y);2916if (!devinfo->has_surface_tile_offset &&2917(temp_x || temp_y)) {2918/* Original gfx4 hardware couldn't draw to a non-tile-aligned2919* destination.2920*/2921/* move to temp */2922struct pipe_resource wa_templ = (struct pipe_resource) {2923.width0 = u_minify(res->base.b.width0, tmpl->u.tex.level),2924.height0 = u_minify(res->base.b.height0, tmpl->u.tex.level),2925.depth0 = 1,2926.array_size = 1,2927.format = res->base.b.format,2928.target = PIPE_TEXTURE_2D,2929.bind = (usage & ISL_SURF_USAGE_DEPTH_BIT ? PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET) | PIPE_BIND_SAMPLER_VIEW,2930};2931surf->align_res = screen->base.resource_create(&screen->base, &wa_templ);2932view->base_level = 0;2933view->base_array_layer = 0;2934view->array_len = 1;2935struct crocus_resource *align_res = (struct crocus_resource *)surf->align_res;2936memcpy(&surf->surf, &align_res->surf, sizeof(surf->surf));2937}2938return psurf;2939}29402941/* The resource has a compressed format, which is not renderable, but we2942* have a renderable view format. We must be attempting to upload blocks2943* of compressed data via an uncompressed view.2944*2945* In this case, we can assume there are no auxiliary buffers, a single2946* miplevel, and that the resource is single-sampled. Gallium may try2947* and create an uncompressed view with multiple layers, however.2948*/2949assert(!isl_format_is_compressed(fmt.fmt));2950assert(res->surf.samples == 1);2951assert(view->levels == 1);29522953/* TODO: compressed pbo uploads aren't working here */2954return NULL;29552956uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0;29572958if (view->base_level > 0) {2959/* We can't rely on the hardware's miplevel selection with such2960* a substantial lie about the format, so we select a single image2961* using the Tile X/Y Offset fields. In this case, we can't handle2962* multiple array slices.2963*2964* On Broadwell, HALIGN and VALIGN are specified in pixels and are2965* hard-coded to align to exactly the block size of the compressed2966* texture. This means that, when reinterpreted as a non-compressed2967* texture, the tile offsets may be anything and we can't rely on2968* X/Y Offset.2969*2970* Return NULL to force the state tracker to take fallback paths.2971*/2972// TODO: check if the gen7 check is right, originally gen82973if (view->array_len > 1 || GFX_VER == 7)2974return NULL;29752976const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D;2977isl_surf_get_image_surf(&screen->isl_dev, &res->surf,2978view->base_level,2979is_3d ? 0 : view->base_array_layer,2980is_3d ? view->base_array_layer : 0,2981&surf->surf,2982&offset_B, &tile_x_sa, &tile_y_sa);29832984/* We use address and tile offsets to access a single level/layer2985* as a subimage, so reset level/layer so it doesn't offset again.2986*/2987view->base_array_layer = 0;2988view->base_level = 0;2989} else {2990/* Level 0 doesn't require tile offsets, and the hardware can find2991* array slices using QPitch even with the format override, so we2992* can allow layers in this case. Copy the original ISL surface.2993*/2994memcpy(&surf->surf, &res->surf, sizeof(surf->surf));2995}29962997/* Scale down the image dimensions by the block size. */2998const struct isl_format_layout *fmtl =2999isl_format_get_layout(res->surf.format);3000surf->surf.format = fmt.fmt;3001surf->surf.logical_level0_px = isl_surf_get_logical_level0_el(&surf->surf);3002surf->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&surf->surf);3003tile_x_sa /= fmtl->bw;3004tile_y_sa /= fmtl->bh;30053006psurf->width = surf->surf.logical_level0_px.width;3007psurf->height = surf->surf.logical_level0_px.height;30083009return psurf;3010}30113012#if GFX_VER >= 73013static void3014fill_default_image_param(struct brw_image_param *param)3015{3016memset(param, 0, sizeof(*param));3017/* Set the swizzling shifts to all-ones to effectively disable swizzling --3018* See emit_address_calculation() in brw_fs_surface_builder.cpp for a more3019* detailed explanation of these parameters.3020*/3021param->swizzling[0] = 0xff;3022param->swizzling[1] = 0xff;3023}30243025static void3026fill_buffer_image_param(struct brw_image_param *param,3027enum pipe_format pfmt,3028unsigned size)3029{3030const unsigned cpp = util_format_get_blocksize(pfmt);30313032fill_default_image_param(param);3033param->size[0] = size / cpp;3034param->stride[0] = cpp;3035}30363037#endif30383039/**3040* The pipe->set_shader_images() driver hook.3041*/3042static void3043crocus_set_shader_images(struct pipe_context *ctx,3044enum pipe_shader_type p_stage,3045unsigned start_slot, unsigned count,3046unsigned unbind_num_trailing_slots,3047const struct pipe_image_view *p_images)3048{3049#if GFX_VER >= 73050struct crocus_context *ice = (struct crocus_context *) ctx;3051struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;3052const struct intel_device_info *devinfo = &screen->devinfo;3053gl_shader_stage stage = stage_from_pipe(p_stage);3054struct crocus_shader_state *shs = &ice->state.shaders[stage];3055struct crocus_genx_state *genx = ice->state.genx;3056struct brw_image_param *image_params = genx->shaders[stage].image_param;30573058shs->bound_image_views &= ~u_bit_consecutive(start_slot, count);30593060for (unsigned i = 0; i < count; i++) {3061struct crocus_image_view *iv = &shs->image[start_slot + i];30623063if (p_images && p_images[i].resource) {3064const struct pipe_image_view *img = &p_images[i];3065struct crocus_resource *res = (void *) img->resource;30663067util_copy_image_view(&iv->base, img);30683069shs->bound_image_views |= 1 << (start_slot + i);30703071res->bind_history |= PIPE_BIND_SHADER_IMAGE;3072res->bind_stages |= 1 << stage;30733074isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT;3075struct crocus_format_info fmt =3076crocus_format_for_usage(devinfo, img->format, usage);30773078struct isl_swizzle swiz = pipe_to_isl_swizzles(fmt.swizzles);3079if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {3080/* On Gen8, try to use typed surfaces reads (which support a3081* limited number of formats), and if not possible, fall back3082* to untyped reads.3083*/3084if (!isl_has_matching_typed_storage_image_format(devinfo, fmt.fmt))3085fmt.fmt = ISL_FORMAT_RAW;3086else3087fmt.fmt = isl_lower_storage_image_format(devinfo, fmt.fmt);3088}30893090if (res->base.b.target != PIPE_BUFFER) {3091struct isl_view view = {3092.format = fmt.fmt,3093.base_level = img->u.tex.level,3094.levels = 1,3095.base_array_layer = img->u.tex.first_layer,3096.array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1,3097.swizzle = swiz,3098.usage = usage,3099};31003101iv->view = view;31023103isl_surf_fill_image_param(&screen->isl_dev,3104&image_params[start_slot + i],3105&res->surf, &view);3106} else {3107struct isl_view view = {3108.format = fmt.fmt,3109.swizzle = swiz,3110.usage = usage,3111};3112iv->view = view;31133114util_range_add(&res->base.b, &res->valid_buffer_range, img->u.buf.offset,3115img->u.buf.offset + img->u.buf.size);3116fill_buffer_image_param(&image_params[start_slot + i],3117img->format, img->u.buf.size);3118}3119} else {3120pipe_resource_reference(&iv->base.resource, NULL);3121fill_default_image_param(&image_params[start_slot + i]);3122}3123}31243125ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;3126ice->state.dirty |=3127stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES3128: CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;31293130/* Broadwell also needs brw_image_params re-uploaded */3131ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;3132shs->sysvals_need_upload = true;3133#endif3134}313531363137/**3138* The pipe->set_sampler_views() driver hook.3139*/3140static void3141crocus_set_sampler_views(struct pipe_context *ctx,3142enum pipe_shader_type p_stage,3143unsigned start, unsigned count,3144unsigned unbind_num_trailing_slots,3145struct pipe_sampler_view **views)3146{3147struct crocus_context *ice = (struct crocus_context *) ctx;3148gl_shader_stage stage = stage_from_pipe(p_stage);3149struct crocus_shader_state *shs = &ice->state.shaders[stage];31503151shs->bound_sampler_views &= ~u_bit_consecutive(start, count);31523153for (unsigned i = 0; i < count; i++) {3154struct pipe_sampler_view *pview = views ? views[i] : NULL;3155pipe_sampler_view_reference((struct pipe_sampler_view **)3156&shs->textures[start + i], pview);3157struct crocus_sampler_view *view = (void *) pview;3158if (view) {3159view->res->bind_history |= PIPE_BIND_SAMPLER_VIEW;3160view->res->bind_stages |= 1 << stage;31613162shs->bound_sampler_views |= 1 << (start + i);3163}3164}3165#if GFX_VER == 63166/* first level parameters to crocus_upload_sampler_state is gfx6 only */3167ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;3168#endif3169ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage);3170ice->state.dirty |=3171stage == MESA_SHADER_COMPUTE ? CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES3172: CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;3173ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_TEXTURES];3174}31753176/**3177* The pipe->set_tess_state() driver hook.3178*/3179static void3180crocus_set_tess_state(struct pipe_context *ctx,3181const float default_outer_level[4],3182const float default_inner_level[2])3183{3184struct crocus_context *ice = (struct crocus_context *) ctx;3185struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];31863187memcpy(&ice->state.default_outer_level[0], &default_outer_level[0], 4 * sizeof(float));3188memcpy(&ice->state.default_inner_level[0], &default_inner_level[0], 2 * sizeof(float));31893190ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;3191shs->sysvals_need_upload = true;3192}31933194static void3195crocus_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)3196{3197struct crocus_surface *surf = (void *) p_surf;3198pipe_resource_reference(&p_surf->texture, NULL);31993200pipe_resource_reference(&surf->align_res, NULL);3201free(surf);3202}32033204static void3205crocus_set_clip_state(struct pipe_context *ctx,3206const struct pipe_clip_state *state)3207{3208struct crocus_context *ice = (struct crocus_context *) ctx;3209struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];3210struct crocus_shader_state *gshs = &ice->state.shaders[MESA_SHADER_GEOMETRY];3211struct crocus_shader_state *tshs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];32123213memcpy(&ice->state.clip_planes, state, sizeof(*state));32143215#if GFX_VER <= 53216ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;3217#endif3218ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS | CROCUS_STAGE_DIRTY_CONSTANTS_GS |3219CROCUS_STAGE_DIRTY_CONSTANTS_TES;3220shs->sysvals_need_upload = true;3221gshs->sysvals_need_upload = true;3222tshs->sysvals_need_upload = true;3223}32243225/**3226* The pipe->set_polygon_stipple() driver hook.3227*/3228static void3229crocus_set_polygon_stipple(struct pipe_context *ctx,3230const struct pipe_poly_stipple *state)3231{3232struct crocus_context *ice = (struct crocus_context *) ctx;3233memcpy(&ice->state.poly_stipple, state, sizeof(*state));3234ice->state.dirty |= CROCUS_DIRTY_POLYGON_STIPPLE;3235}32363237/**3238* The pipe->set_sample_mask() driver hook.3239*/3240static void3241crocus_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)3242{3243struct crocus_context *ice = (struct crocus_context *) ctx;32443245/* We only support 16x MSAA, so we have 16 bits of sample maks.3246* st/mesa may pass us 0xffffffff though, meaning "enable all samples".3247*/3248ice->state.sample_mask = sample_mask & 0xff;3249ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;3250}32513252static void3253crocus_fill_scissor_rect(struct crocus_context *ice,3254int idx,3255struct pipe_scissor_state *ss)3256{3257struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;3258struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;3259const struct pipe_viewport_state *vp = &ice->state.viewports[idx];3260struct pipe_scissor_state scissor = (struct pipe_scissor_state) {3261.minx = MAX2(-fabsf(vp->scale[0]) + vp->translate[0], 0),3262.maxx = MIN2( fabsf(vp->scale[0]) + vp->translate[0], cso_fb->width) - 1,3263.miny = MAX2(-fabsf(vp->scale[1]) + vp->translate[1], 0),3264.maxy = MIN2( fabsf(vp->scale[1]) + vp->translate[1], cso_fb->height) - 1,3265};3266if (cso_state->scissor) {3267struct pipe_scissor_state *s = &ice->state.scissors[idx];3268scissor.minx = MAX2(scissor.minx, s->minx);3269scissor.miny = MAX2(scissor.miny, s->miny);3270scissor.maxx = MIN2(scissor.maxx, s->maxx);3271scissor.maxy = MIN2(scissor.maxy, s->maxy);3272}3273*ss = scissor;3274}32753276/**3277* The pipe->set_scissor_states() driver hook.3278*3279* This corresponds to our SCISSOR_RECT state structures. It's an3280* exact match, so we just store them, and memcpy them out later.3281*/3282static void3283crocus_set_scissor_states(struct pipe_context *ctx,3284unsigned start_slot,3285unsigned num_scissors,3286const struct pipe_scissor_state *rects)3287{3288struct crocus_context *ice = (struct crocus_context *) ctx;32893290for (unsigned i = 0; i < num_scissors; i++) {3291if (rects[i].minx == rects[i].maxx || rects[i].miny == rects[i].maxy) {3292/* If the scissor was out of bounds and got clamped to 0 width/height3293* at the bounds, the subtraction of 1 from maximums could produce a3294* negative number and thus not clip anything. Instead, just provide3295* a min > max scissor inside the bounds, which produces the expected3296* no rendering.3297*/3298ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {3299.minx = 1, .maxx = 0, .miny = 1, .maxy = 0,3300};3301} else {3302ice->state.scissors[start_slot + i] = (struct pipe_scissor_state) {3303.minx = rects[i].minx, .miny = rects[i].miny,3304.maxx = rects[i].maxx - 1, .maxy = rects[i].maxy - 1,3305};3306}3307}33083309#if GFX_VER < 63310ice->state.dirty |= CROCUS_DIRTY_RASTER; /* SF state */3311#else3312ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;3313#endif3314ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;33153316}33173318/**3319* The pipe->set_stencil_ref() driver hook.3320*3321* This is added to 3DSTATE_WM_DEPTH_STENCIL dynamically at draw time.3322*/3323static void3324crocus_set_stencil_ref(struct pipe_context *ctx,3325const struct pipe_stencil_ref ref)3326{3327struct crocus_context *ice = (struct crocus_context *) ctx;3328ice->state.stencil_ref = ref;3329ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;3330}33313332#if GFX_VER == 83333static float3334viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)3335{3336return copysignf(state->scale[axis], sign) + state->translate[axis];3337}3338#endif33393340/**3341* The pipe->set_viewport_states() driver hook.3342*3343* This corresponds to our SF_CLIP_VIEWPORT states. We can't calculate3344* the guardband yet, as we need the framebuffer dimensions, but we can3345* at least fill out the rest.3346*/3347static void3348crocus_set_viewport_states(struct pipe_context *ctx,3349unsigned start_slot,3350unsigned count,3351const struct pipe_viewport_state *states)3352{3353struct crocus_context *ice = (struct crocus_context *) ctx;33543355memcpy(&ice->state.viewports[start_slot], states, sizeof(*states) * count);33563357ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;3358ice->state.dirty |= CROCUS_DIRTY_RASTER;3359#if GFX_VER >= 63360ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;3361#endif33623363if (ice->state.cso_rast && (!ice->state.cso_rast->cso.depth_clip_near ||3364!ice->state.cso_rast->cso.depth_clip_far))3365ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;3366}33673368/**3369* The pipe->set_framebuffer_state() driver hook.3370*3371* Sets the current draw FBO, including color render targets, depth,3372* and stencil buffers.3373*/3374static void3375crocus_set_framebuffer_state(struct pipe_context *ctx,3376const struct pipe_framebuffer_state *state)3377{3378struct crocus_context *ice = (struct crocus_context *) ctx;3379struct pipe_framebuffer_state *cso = &ice->state.framebuffer;3380struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;3381const struct intel_device_info *devinfo = &screen->devinfo;3382#if 03383struct isl_device *isl_dev = &screen->isl_dev;3384struct crocus_resource *zres;3385struct crocus_resource *stencil_res;3386#endif33873388unsigned samples = util_framebuffer_get_num_samples(state);3389unsigned layers = util_framebuffer_get_num_layers(state);33903391#if GFX_VER >= 63392if (cso->samples != samples) {3393ice->state.dirty |= CROCUS_DIRTY_GEN6_MULTISAMPLE;3394ice->state.dirty |= CROCUS_DIRTY_GEN6_SAMPLE_MASK;3395ice->state.dirty |= CROCUS_DIRTY_RASTER;3396#if GFX_VERx10 == 753397ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;3398#endif3399}3400#endif34013402#if GFX_VER >= 6 && GFX_VER < 83403ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;3404#endif34053406if ((cso->layers == 0) != (layers == 0)) {3407ice->state.dirty |= CROCUS_DIRTY_CLIP;3408}34093410if (cso->width != state->width || cso->height != state->height) {3411ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT;3412ice->state.dirty |= CROCUS_DIRTY_RASTER;3413ice->state.dirty |= CROCUS_DIRTY_DRAWING_RECTANGLE;3414#if GFX_VER >= 63415ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;3416#endif3417}34183419if (cso->zsbuf || state->zsbuf) {3420ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;34213422/* update SF's depth buffer format */3423if (GFX_VER == 7 && cso->zsbuf)3424ice->state.dirty |= CROCUS_DIRTY_RASTER;3425}34263427/* wm thread dispatch enable */3428ice->state.dirty |= CROCUS_DIRTY_WM;3429util_copy_framebuffer_state(cso, state);3430cso->samples = samples;3431cso->layers = layers;34323433if (cso->zsbuf) {3434struct crocus_resource *zres;3435struct crocus_resource *stencil_res;3436enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;3437crocus_get_depth_stencil_resources(devinfo, cso->zsbuf->texture, &zres,3438&stencil_res);3439if (zres && crocus_resource_level_has_hiz(zres, cso->zsbuf->u.tex.level)) {3440aux_usage = zres->aux.usage;3441}3442ice->state.hiz_usage = aux_usage;3443}34443445/* Render target change */3446ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_FS;34473448ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;34493450ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_FRAMEBUFFER];3451}34523453/**3454* The pipe->set_constant_buffer() driver hook.3455*3456* This uploads any constant data in user buffers, and references3457* any UBO resources containing constant data.3458*/3459static void3460crocus_set_constant_buffer(struct pipe_context *ctx,3461enum pipe_shader_type p_stage, unsigned index,3462bool take_ownership,3463const struct pipe_constant_buffer *input)3464{3465struct crocus_context *ice = (struct crocus_context *) ctx;3466gl_shader_stage stage = stage_from_pipe(p_stage);3467struct crocus_shader_state *shs = &ice->state.shaders[stage];3468struct pipe_constant_buffer *cbuf = &shs->constbufs[index];34693470util_copy_constant_buffer(&shs->constbufs[index], input, take_ownership);34713472if (input && input->buffer_size && (input->buffer || input->user_buffer)) {3473shs->bound_cbufs |= 1u << index;34743475if (input->user_buffer) {3476void *map = NULL;3477pipe_resource_reference(&cbuf->buffer, NULL);3478u_upload_alloc(ice->ctx.const_uploader, 0, input->buffer_size, 64,3479&cbuf->buffer_offset, &cbuf->buffer, (void **) &map);34803481if (!cbuf->buffer) {3482/* Allocation was unsuccessful - just unbind */3483crocus_set_constant_buffer(ctx, p_stage, index, false, NULL);3484return;3485}34863487assert(map);3488memcpy(map, input->user_buffer, input->buffer_size);3489}3490cbuf->buffer_size =3491MIN2(input->buffer_size,3492crocus_resource_bo(cbuf->buffer)->size - cbuf->buffer_offset);34933494struct crocus_resource *res = (void *) cbuf->buffer;3495res->bind_history |= PIPE_BIND_CONSTANT_BUFFER;3496res->bind_stages |= 1 << stage;3497} else {3498shs->bound_cbufs &= ~(1u << index);3499}35003501ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage;3502}35033504static void3505upload_sysvals(struct crocus_context *ice,3506gl_shader_stage stage)3507{3508UNUSED struct crocus_genx_state *genx = ice->state.genx;3509struct crocus_shader_state *shs = &ice->state.shaders[stage];35103511struct crocus_compiled_shader *shader = ice->shaders.prog[stage];3512if (!shader || shader->num_system_values == 0)3513return;35143515assert(shader->num_cbufs > 0);35163517unsigned sysval_cbuf_index = shader->num_cbufs - 1;3518struct pipe_constant_buffer *cbuf = &shs->constbufs[sysval_cbuf_index];3519unsigned upload_size = shader->num_system_values * sizeof(uint32_t);3520uint32_t *map = NULL;35213522assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);3523u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,3524&cbuf->buffer_offset, &cbuf->buffer, (void **) &map);35253526for (int i = 0; i < shader->num_system_values; i++) {3527uint32_t sysval = shader->system_values[i];3528uint32_t value = 0;35293530if (BRW_PARAM_DOMAIN(sysval) == BRW_PARAM_DOMAIN_IMAGE) {3531#if GFX_VER >= 73532unsigned img = BRW_PARAM_IMAGE_IDX(sysval);3533unsigned offset = BRW_PARAM_IMAGE_OFFSET(sysval);3534struct brw_image_param *param =3535&genx->shaders[stage].image_param[img];35363537assert(offset < sizeof(struct brw_image_param));3538value = ((uint32_t *) param)[offset];3539#endif3540} else if (sysval == BRW_PARAM_BUILTIN_ZERO) {3541value = 0;3542} else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(sysval)) {3543int plane = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(sysval);3544int comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(sysval);3545value = fui(ice->state.clip_planes.ucp[plane][comp]);3546} else if (sysval == BRW_PARAM_BUILTIN_PATCH_VERTICES_IN) {3547if (stage == MESA_SHADER_TESS_CTRL) {3548value = ice->state.vertices_per_patch;3549} else {3550assert(stage == MESA_SHADER_TESS_EVAL);3551const struct shader_info *tcs_info =3552crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);3553if (tcs_info)3554value = tcs_info->tess.tcs_vertices_out;3555else3556value = ice->state.vertices_per_patch;3557}3558} else if (sysval >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&3559sysval <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {3560unsigned i = sysval - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;3561value = fui(ice->state.default_outer_level[i]);3562} else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {3563value = fui(ice->state.default_inner_level[0]);3564} else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {3565value = fui(ice->state.default_inner_level[1]);3566} else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&3567sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {3568unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;3569value = ice->state.last_block[i];3570} else {3571assert(!"unhandled system value");3572}35733574*map++ = value;3575}35763577cbuf->buffer_size = upload_size;3578shs->sysvals_need_upload = false;3579}35803581/**3582* The pipe->set_shader_buffers() driver hook.3583*3584* This binds SSBOs and ABOs. Unfortunately, we need to stream out3585* SURFACE_STATE here, as the buffer offset may change each time.3586*/3587static void3588crocus_set_shader_buffers(struct pipe_context *ctx,3589enum pipe_shader_type p_stage,3590unsigned start_slot, unsigned count,3591const struct pipe_shader_buffer *buffers,3592unsigned writable_bitmask)3593{3594struct crocus_context *ice = (struct crocus_context *) ctx;3595gl_shader_stage stage = stage_from_pipe(p_stage);3596struct crocus_shader_state *shs = &ice->state.shaders[stage];35973598unsigned modified_bits = u_bit_consecutive(start_slot, count);35993600shs->bound_ssbos &= ~modified_bits;3601shs->writable_ssbos &= ~modified_bits;3602shs->writable_ssbos |= writable_bitmask << start_slot;36033604for (unsigned i = 0; i < count; i++) {3605if (buffers && buffers[i].buffer) {3606struct crocus_resource *res = (void *) buffers[i].buffer;3607struct pipe_shader_buffer *ssbo = &shs->ssbo[start_slot + i];3608pipe_resource_reference(&ssbo->buffer, &res->base.b);3609ssbo->buffer_offset = buffers[i].buffer_offset;3610ssbo->buffer_size =3611MIN2(buffers[i].buffer_size, res->bo->size - ssbo->buffer_offset);36123613shs->bound_ssbos |= 1 << (start_slot + i);36143615res->bind_history |= PIPE_BIND_SHADER_BUFFER;3616res->bind_stages |= 1 << stage;36173618util_range_add(&res->base.b, &res->valid_buffer_range, ssbo->buffer_offset,3619ssbo->buffer_offset + ssbo->buffer_size);3620} else {3621pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);3622}3623}36243625ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;3626}36273628static void3629crocus_delete_state(struct pipe_context *ctx, void *state)3630{3631free(state);3632}36333634/**3635* The pipe->set_vertex_buffers() driver hook.3636*3637* This translates pipe_vertex_buffer to our 3DSTATE_VERTEX_BUFFERS packet.3638*/3639static void3640crocus_set_vertex_buffers(struct pipe_context *ctx,3641unsigned start_slot, unsigned count,3642unsigned unbind_num_trailing_slots,3643bool take_ownership,3644const struct pipe_vertex_buffer *buffers)3645{3646struct crocus_context *ice = (struct crocus_context *) ctx;3647struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;3648const unsigned padding =3649(GFX_VERx10 < 75 && !screen->devinfo.is_baytrail) * 2;3650ice->state.bound_vertex_buffers &=3651~u_bit_consecutive64(start_slot, count + unbind_num_trailing_slots);36523653util_set_vertex_buffers_mask(ice->state.vertex_buffers, &ice->state.bound_vertex_buffers,3654buffers, start_slot, count, unbind_num_trailing_slots,3655take_ownership);36563657for (unsigned i = 0; i < count; i++) {3658struct pipe_vertex_buffer *state =3659&ice->state.vertex_buffers[start_slot + i];36603661if (!state->is_user_buffer && state->buffer.resource) {3662struct crocus_resource *res = (void *)state->buffer.resource;3663res->bind_history |= PIPE_BIND_VERTEX_BUFFER;3664}36653666uint32_t end = 0;3667if (state->buffer.resource)3668end = state->buffer.resource->width0 + padding;3669ice->state.vb_end[start_slot + i] = end;3670}3671ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;3672}36733674#if GFX_VERx10 < 753675static uint8_t get_wa_flags(enum isl_format format)3676{3677uint8_t wa_flags = 0;36783679switch (format) {3680case ISL_FORMAT_R10G10B10A2_USCALED:3681wa_flags = BRW_ATTRIB_WA_SCALE;3682break;3683case ISL_FORMAT_R10G10B10A2_SSCALED:3684wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE;3685break;3686case ISL_FORMAT_R10G10B10A2_UNORM:3687wa_flags = BRW_ATTRIB_WA_NORMALIZE;3688break;3689case ISL_FORMAT_R10G10B10A2_SNORM:3690wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE;3691break;3692case ISL_FORMAT_R10G10B10A2_SINT:3693wa_flags = BRW_ATTRIB_WA_SIGN;3694break;3695case ISL_FORMAT_B10G10R10A2_USCALED:3696wa_flags = BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;3697break;3698case ISL_FORMAT_B10G10R10A2_SSCALED:3699wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_SCALE | BRW_ATTRIB_WA_BGRA;3700break;3701case ISL_FORMAT_B10G10R10A2_UNORM:3702wa_flags = BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;3703break;3704case ISL_FORMAT_B10G10R10A2_SNORM:3705wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_NORMALIZE | BRW_ATTRIB_WA_BGRA;3706break;3707case ISL_FORMAT_B10G10R10A2_SINT:3708wa_flags = BRW_ATTRIB_WA_SIGN | BRW_ATTRIB_WA_BGRA;3709break;3710case ISL_FORMAT_B10G10R10A2_UINT:3711wa_flags = BRW_ATTRIB_WA_BGRA;3712break;3713default:3714break;3715}3716return wa_flags;3717}3718#endif37193720/**3721* Gallium CSO for vertex elements.3722*/3723struct crocus_vertex_element_state {3724uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];3725#if GFX_VER == 83726uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];3727#endif3728uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)];3729#if GFX_VER == 83730uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)];3731#endif3732uint32_t step_rate[16];3733uint8_t wa_flags[33];3734unsigned count;3735};37363737/**3738* The pipe->create_vertex_elements() driver hook.3739*3740* This translates pipe_vertex_element to our 3DSTATE_VERTEX_ELEMENTS3741* and 3DSTATE_VF_INSTANCING commands. The vertex_elements and vf_instancing3742* arrays are ready to be emitted at draw time if no EdgeFlag or SGVs are3743* needed. In these cases we will need information available at draw time.3744* We setup edgeflag_ve and edgeflag_vfi as alternatives last3745* 3DSTATE_VERTEX_ELEMENT and 3DSTATE_VF_INSTANCING that can be used at3746* draw time if we detect that EdgeFlag is needed by the Vertex Shader.3747*/3748static void *3749crocus_create_vertex_elements(struct pipe_context *ctx,3750unsigned count,3751const struct pipe_vertex_element *state)3752{3753struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;3754const struct intel_device_info *devinfo = &screen->devinfo;3755struct crocus_vertex_element_state *cso =3756malloc(sizeof(struct crocus_vertex_element_state));37573758cso->count = count;37593760crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {3761ve.DWordLength =37621 + GENX(VERTEX_ELEMENT_STATE_length) * MAX2(count, 1) - 2;3763}37643765uint32_t *ve_pack_dest = &cso->vertex_elements[1];3766#if GFX_VER == 83767uint32_t *vfi_pack_dest = cso->vf_instancing;3768#endif37693770if (count == 0) {3771crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {3772ve.Valid = true;3773ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;3774ve.Component0Control = VFCOMP_STORE_0;3775ve.Component1Control = VFCOMP_STORE_0;3776ve.Component2Control = VFCOMP_STORE_0;3777ve.Component3Control = VFCOMP_STORE_1_FP;3778}3779#if GFX_VER == 83780crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {3781}3782#endif3783}37843785for (int i = 0; i < count; i++) {3786const struct crocus_format_info fmt =3787crocus_format_for_usage(devinfo, state[i].src_format, 0);3788unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,3789VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };3790enum isl_format actual_fmt = fmt.fmt;37913792#if GFX_VERx10 < 753793cso->wa_flags[i] = get_wa_flags(fmt.fmt);37943795if (fmt.fmt == ISL_FORMAT_R10G10B10A2_USCALED ||3796fmt.fmt == ISL_FORMAT_R10G10B10A2_SSCALED ||3797fmt.fmt == ISL_FORMAT_R10G10B10A2_UNORM ||3798fmt.fmt == ISL_FORMAT_R10G10B10A2_SNORM ||3799fmt.fmt == ISL_FORMAT_R10G10B10A2_SINT ||3800fmt.fmt == ISL_FORMAT_B10G10R10A2_USCALED ||3801fmt.fmt == ISL_FORMAT_B10G10R10A2_SSCALED ||3802fmt.fmt == ISL_FORMAT_B10G10R10A2_UNORM ||3803fmt.fmt == ISL_FORMAT_B10G10R10A2_SNORM ||3804fmt.fmt == ISL_FORMAT_B10G10R10A2_UINT ||3805fmt.fmt == ISL_FORMAT_B10G10R10A2_SINT)3806actual_fmt = ISL_FORMAT_R10G10B10A2_UINT;3807if (fmt.fmt == ISL_FORMAT_R8G8B8_SINT)3808actual_fmt = ISL_FORMAT_R8G8B8A8_SINT;3809if (fmt.fmt == ISL_FORMAT_R8G8B8_UINT)3810actual_fmt = ISL_FORMAT_R8G8B8A8_UINT;3811if (fmt.fmt == ISL_FORMAT_R16G16B16_SINT)3812actual_fmt = ISL_FORMAT_R16G16B16A16_SINT;3813if (fmt.fmt == ISL_FORMAT_R16G16B16_UINT)3814actual_fmt = ISL_FORMAT_R16G16B16A16_UINT;3815#endif38163817cso->step_rate[state[i].vertex_buffer_index] = state[i].instance_divisor;38183819switch (isl_format_get_num_channels(fmt.fmt)) {3820case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH;3821case 1: comp[1] = VFCOMP_STORE_0; FALLTHROUGH;3822case 2: comp[2] = VFCOMP_STORE_0; FALLTHROUGH;3823case 3:3824comp[3] = isl_format_has_int_channel(fmt.fmt) ? VFCOMP_STORE_1_INT3825: VFCOMP_STORE_1_FP;3826break;3827}3828crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {3829#if GFX_VER >= 63830ve.EdgeFlagEnable = false;3831#endif3832ve.VertexBufferIndex = state[i].vertex_buffer_index;3833ve.Valid = true;3834ve.SourceElementOffset = state[i].src_offset;3835ve.SourceElementFormat = actual_fmt;3836ve.Component0Control = comp[0];3837ve.Component1Control = comp[1];3838ve.Component2Control = comp[2];3839ve.Component3Control = comp[3];3840#if GFX_VER < 53841ve.DestinationElementOffset = i * 4;3842#endif3843}38443845#if GFX_VER == 83846crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {3847vi.VertexElementIndex = i;3848vi.InstancingEnable = state[i].instance_divisor > 0;3849vi.InstanceDataStepRate = state[i].instance_divisor;3850}3851#endif3852ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);3853#if GFX_VER == 83854vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length);3855#endif3856}38573858/* An alternative version of the last VE and VFI is stored so it3859* can be used at draw time in case Vertex Shader uses EdgeFlag3860*/3861if (count) {3862const unsigned edgeflag_index = count - 1;3863const struct crocus_format_info fmt =3864crocus_format_for_usage(devinfo, state[edgeflag_index].src_format, 0);3865crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), cso->edgeflag_ve, ve) {3866#if GFX_VER >= 63867ve.EdgeFlagEnable = true;3868#endif3869ve.VertexBufferIndex = state[edgeflag_index].vertex_buffer_index;3870ve.Valid = true;3871ve.SourceElementOffset = state[edgeflag_index].src_offset;3872ve.SourceElementFormat = fmt.fmt;3873ve.Component0Control = VFCOMP_STORE_SRC;3874ve.Component1Control = VFCOMP_STORE_0;3875ve.Component2Control = VFCOMP_STORE_0;3876ve.Component3Control = VFCOMP_STORE_0;3877}3878#if GFX_VER == 83879crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->edgeflag_vfi, vi) {3880/* The vi.VertexElementIndex of the EdgeFlag Vertex Element is filled3881* at draw time, as it should change if SGVs are emitted.3882*/3883vi.InstancingEnable = state[edgeflag_index].instance_divisor > 0;3884vi.InstanceDataStepRate = state[edgeflag_index].instance_divisor;3885}3886#endif3887}38883889return cso;3890}38913892/**3893* The pipe->bind_vertex_elements_state() driver hook.3894*/3895static void3896crocus_bind_vertex_elements_state(struct pipe_context *ctx, void *state)3897{3898struct crocus_context *ice = (struct crocus_context *) ctx;3899#if GFX_VER == 83900struct crocus_vertex_element_state *old_cso = ice->state.cso_vertex_elements;3901struct crocus_vertex_element_state *new_cso = state;39023903if (new_cso && cso_changed(count))3904ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;3905#endif3906ice->state.cso_vertex_elements = state;3907ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;3908ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_VERTEX_ELEMENTS];3909}39103911#if GFX_VER >= 63912struct crocus_streamout_counter {3913uint32_t offset_start;3914uint32_t offset_end;39153916uint64_t accum;3917};39183919/**3920* Gallium CSO for stream output (transform feedback) targets.3921*/3922struct crocus_stream_output_target {3923struct pipe_stream_output_target base;39243925/** Stride (bytes-per-vertex) during this transform feedback operation */3926uint16_t stride;39273928/** Has 3DSTATE_SO_BUFFER actually been emitted, zeroing the offsets? */3929bool zeroed;39303931struct crocus_resource *offset_res;3932uint32_t offset_offset;39333934#if GFX_VER == 63935void *prim_map;3936struct crocus_streamout_counter prev_count;3937struct crocus_streamout_counter count;3938#endif3939#if GFX_VER == 83940/** Does the next 3DSTATE_SO_BUFFER need to zero the offsets? */3941bool zero_offset;3942#endif3943};39443945#if GFX_VER >= 73946static uint32_t3947crocus_get_so_offset(struct pipe_stream_output_target *so)3948{3949struct crocus_stream_output_target *tgt = (void *)so;3950struct pipe_transfer *transfer;3951struct pipe_box box;3952uint32_t result;3953u_box_1d(tgt->offset_offset, 4, &box);3954void *val = so->context->buffer_map(so->context, &tgt->offset_res->base.b,39550, PIPE_MAP_DIRECTLY,3956&box, &transfer);3957assert(val);3958result = *(uint32_t *)val;3959so->context->buffer_unmap(so->context, transfer);39603961return result / tgt->stride;3962}3963#endif39643965#if GFX_VER == 63966static void3967compute_vertices_written_so_far(struct crocus_context *ice,3968struct crocus_stream_output_target *tgt,3969struct crocus_streamout_counter *count,3970uint64_t *svbi);39713972static uint32_t3973crocus_get_so_offset(struct pipe_stream_output_target *so)3974{3975struct crocus_stream_output_target *tgt = (void *)so;3976struct crocus_context *ice = (void *)so->context;39773978uint64_t vert_written;3979compute_vertices_written_so_far(ice, tgt, &tgt->prev_count, &vert_written);3980return vert_written;3981}3982#endif39833984/**3985* The pipe->create_stream_output_target() driver hook.3986*3987* "Target" here refers to a destination buffer. We translate this into3988* a 3DSTATE_SO_BUFFER packet. We can handle most fields, but don't yet3989* know which buffer this represents, or whether we ought to zero the3990* write-offsets, or append. Those are handled in the set() hook.3991*/3992static struct pipe_stream_output_target *3993crocus_create_stream_output_target(struct pipe_context *ctx,3994struct pipe_resource *p_res,3995unsigned buffer_offset,3996unsigned buffer_size)3997{3998struct crocus_resource *res = (void *) p_res;3999struct crocus_stream_output_target *cso = calloc(1, sizeof(*cso));4000if (!cso)4001return NULL;40024003res->bind_history |= PIPE_BIND_STREAM_OUTPUT;40044005pipe_reference_init(&cso->base.reference, 1);4006pipe_resource_reference(&cso->base.buffer, p_res);4007cso->base.buffer_offset = buffer_offset;4008cso->base.buffer_size = buffer_size;4009cso->base.context = ctx;40104011util_range_add(&res->base.b, &res->valid_buffer_range, buffer_offset,4012buffer_offset + buffer_size);4013#if GFX_VER >= 74014struct crocus_context *ice = (struct crocus_context *) ctx;4015void *temp;4016u_upload_alloc(ice->ctx.stream_uploader, 0, sizeof(uint32_t), 4,4017&cso->offset_offset,4018(struct pipe_resource **)&cso->offset_res,4019&temp);4020#endif40214022return &cso->base;4023}40244025static void4026crocus_stream_output_target_destroy(struct pipe_context *ctx,4027struct pipe_stream_output_target *state)4028{4029struct crocus_stream_output_target *cso = (void *) state;40304031pipe_resource_reference((struct pipe_resource **)&cso->offset_res, NULL);4032pipe_resource_reference(&cso->base.buffer, NULL);40334034free(cso);4035}40364037#define GEN6_SO_NUM_PRIMS_WRITTEN 0x22884038#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4)40394040#if GFX_VER == 64041static void4042aggregate_stream_counter(struct crocus_batch *batch, struct crocus_stream_output_target *tgt,4043struct crocus_streamout_counter *counter)4044{4045uint64_t *prim_counts = tgt->prim_map;40464047if (crocus_batch_references(batch, tgt->offset_res->bo)) {4048struct pipe_fence_handle *out_fence = NULL;4049batch->ice->ctx.flush(&batch->ice->ctx, &out_fence, 0);4050batch->screen->base.fence_finish(&batch->screen->base, &batch->ice->ctx, out_fence, UINT64_MAX);4051batch->screen->base.fence_reference(&batch->screen->base, &out_fence, NULL);4052}40534054for (unsigned i = counter->offset_start / sizeof(uint64_t); i < counter->offset_end / sizeof(uint64_t); i += 2) {4055counter->accum += prim_counts[i + 1] - prim_counts[i];4056}4057tgt->count.offset_start = tgt->count.offset_end = 0;4058}40594060static void4061crocus_stream_store_prims_written(struct crocus_batch *batch,4062struct crocus_stream_output_target *tgt)4063{4064if (!tgt->offset_res) {4065u_upload_alloc(batch->ice->ctx.stream_uploader, 0, 4096, 4,4066&tgt->offset_offset,4067(struct pipe_resource **)&tgt->offset_res,4068&tgt->prim_map);4069tgt->count.offset_start = tgt->count.offset_end = 0;4070}40714072if (tgt->count.offset_end + 16 >= 4096) {4073aggregate_stream_counter(batch, tgt, &tgt->prev_count);4074aggregate_stream_counter(batch, tgt, &tgt->count);4075}40764077crocus_emit_mi_flush(batch);4078crocus_store_register_mem64(batch, GEN6_SO_NUM_PRIMS_WRITTEN,4079tgt->offset_res->bo,4080tgt->count.offset_end + tgt->offset_offset, false);4081tgt->count.offset_end += 8;4082}40834084static void4085compute_vertices_written_so_far(struct crocus_context *ice,4086struct crocus_stream_output_target *tgt,4087struct crocus_streamout_counter *counter,4088uint64_t *svbi)4089{4090//TODO vertices per prim4091aggregate_stream_counter(&ice->batches[0], tgt, counter);40924093*svbi = counter->accum * ice->state.last_xfb_verts_per_prim;4094}4095#endif4096/**4097* The pipe->set_stream_output_targets() driver hook.4098*4099* At this point, we know which targets are bound to a particular index,4100* and also whether we want to append or start over. We can finish the4101* 3DSTATE_SO_BUFFER packets we started earlier.4102*/4103static void4104crocus_set_stream_output_targets(struct pipe_context *ctx,4105unsigned num_targets,4106struct pipe_stream_output_target **targets,4107const unsigned *offsets)4108{4109struct crocus_context *ice = (struct crocus_context *) ctx;4110struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];4111struct pipe_stream_output_target *old_tgt[4] = { NULL, NULL, NULL, NULL };4112const bool active = num_targets > 0;4113if (ice->state.streamout_active != active) {4114ice->state.streamout_active = active;4115#if GFX_VER >= 74116ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;4117#else4118ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;4119#endif41204121/* We only emit 3DSTATE_SO_DECL_LIST when streamout is active, because4122* it's a non-pipelined command. If we're switching streamout on, we4123* may have missed emitting it earlier, so do so now. (We're already4124* taking a stall to update 3DSTATE_SO_BUFFERS anyway...)4125*/4126if (active) {4127#if GFX_VER >= 74128ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;4129#endif4130} else {4131uint32_t flush = 0;4132for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {4133struct crocus_stream_output_target *tgt =4134(void *) ice->state.so_target[i];4135if (tgt) {4136struct crocus_resource *res = (void *) tgt->base.buffer;41374138flush |= crocus_flush_bits_for_history(res);4139crocus_dirty_for_history(ice, res);4140}4141}4142crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],4143"make streamout results visible", flush);4144}4145}41464147ice->state.so_targets = num_targets;4148for (int i = 0; i < 4; i++) {4149pipe_so_target_reference(&old_tgt[i], ice->state.so_target[i]);4150pipe_so_target_reference(&ice->state.so_target[i],4151i < num_targets ? targets[i] : NULL);4152}41534154#if GFX_VER == 64155bool stored_num_prims = false;4156for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {4157if (num_targets) {4158struct crocus_stream_output_target *tgt =4159(void *) ice->state.so_target[i];41604161if (!tgt)4162continue;4163if (offsets[i] == 0) {4164// This means that we're supposed to ignore anything written to4165// the buffer before. We can do this by just clearing out the4166// count of writes to the prim count buffer.4167tgt->count.offset_start = tgt->count.offset_end;4168tgt->count.accum = 0;4169ice->state.svbi = 0;4170} else {4171if (tgt->offset_res) {4172compute_vertices_written_so_far(ice, tgt, &tgt->count, &ice->state.svbi);4173tgt->count.offset_start = tgt->count.offset_end;4174}4175}41764177if (!stored_num_prims) {4178crocus_stream_store_prims_written(batch, tgt);4179stored_num_prims = true;4180}4181} else {4182struct crocus_stream_output_target *tgt =4183(void *) old_tgt[i];4184if (tgt) {4185if (!stored_num_prims) {4186crocus_stream_store_prims_written(batch, tgt);4187stored_num_prims = true;4188}41894190if (tgt->offset_res) {4191tgt->prev_count = tgt->count;4192}4193}4194}4195pipe_so_target_reference(&old_tgt[i], NULL);4196}4197ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;4198#else4199for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {4200if (num_targets) {4201struct crocus_stream_output_target *tgt =4202(void *) ice->state.so_target[i];42034204if (offsets[i] == 0) {4205#if GFX_VER == 84206if (tgt)4207tgt->zero_offset = true;4208#endif4209crocus_load_register_imm32(batch, GEN7_SO_WRITE_OFFSET(i), 0);4210}4211else if (tgt)4212crocus_load_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),4213tgt->offset_res->bo,4214tgt->offset_offset);4215} else {4216struct crocus_stream_output_target *tgt =4217(void *) old_tgt[i];4218if (tgt)4219crocus_store_register_mem32(batch, GEN7_SO_WRITE_OFFSET(i),4220tgt->offset_res->bo,4221tgt->offset_offset, false);4222}4223pipe_so_target_reference(&old_tgt[i], NULL);4224}4225#endif4226/* No need to update 3DSTATE_SO_BUFFER unless SOL is active. */4227if (!active)4228return;4229#if GFX_VER >= 74230ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;4231#elif GFX_VER == 64232ice->state.dirty |= CROCUS_DIRTY_GEN6_SVBI;4233#endif4234}42354236#endif42374238#if GFX_VER >= 74239/**4240* An crocus-vtable helper for encoding the 3DSTATE_SO_DECL_LIST and4241* 3DSTATE_STREAMOUT packets.4242*4243* 3DSTATE_SO_DECL_LIST is a list of shader outputs we want the streamout4244* hardware to record. We can create it entirely based on the shader, with4245* no dynamic state dependencies.4246*4247* 3DSTATE_STREAMOUT is an annoying mix of shader-based information and4248* state-based settings. We capture the shader-related ones here, and merge4249* the rest in at draw time.4250*/4251static uint32_t *4252crocus_create_so_decl_list(const struct pipe_stream_output_info *info,4253const struct brw_vue_map *vue_map)4254{4255struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];4256int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};4257int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};4258int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};4259int max_decls = 0;4260STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);42614262memset(so_decl, 0, sizeof(so_decl));42634264/* Construct the list of SO_DECLs to be emitted. The formatting of the4265* command feels strange -- each dword pair contains a SO_DECL per stream.4266*/4267for (unsigned i = 0; i < info->num_outputs; i++) {4268const struct pipe_stream_output *output = &info->output[i];4269const int buffer = output->output_buffer;4270const int varying = output->register_index;4271const unsigned stream_id = output->stream;4272assert(stream_id < MAX_VERTEX_STREAMS);42734274buffer_mask[stream_id] |= 1 << buffer;42754276assert(vue_map->varying_to_slot[varying] >= 0);42774278/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]4279* array. Instead, it simply increments DstOffset for the following4280* input by the number of components that should be skipped.4281*4282* Our hardware is unusual in that it requires us to program SO_DECLs4283* for fake "hole" components, rather than simply taking the offset4284* for each real varying. Each hole can have size 1, 2, 3, or 4; we4285* program as many size = 4 holes as we can, then a final hole to4286* accommodate the final 1, 2, or 3 remaining.4287*/4288int skip_components = output->dst_offset - next_offset[buffer];42894290while (skip_components > 0) {4291so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {4292.HoleFlag = 1,4293.OutputBufferSlot = output->output_buffer,4294.ComponentMask = (1 << MIN2(skip_components, 4)) - 1,4295};4296skip_components -= 4;4297}42984299next_offset[buffer] = output->dst_offset + output->num_components;43004301so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {4302.OutputBufferSlot = output->output_buffer,4303.RegisterIndex = vue_map->varying_to_slot[varying],4304.ComponentMask =4305((1 << output->num_components) - 1) << output->start_component,4306};43074308if (decls[stream_id] > max_decls)4309max_decls = decls[stream_id];4310}43114312unsigned dwords = GENX(3DSTATE_STREAMOUT_length) + (3 + 2 * max_decls);4313uint32_t *map = ralloc_size(NULL, sizeof(uint32_t) * dwords);4314uint32_t *so_decl_map = map + GENX(3DSTATE_STREAMOUT_length);43154316crocus_pack_command(GENX(3DSTATE_STREAMOUT), map, sol) {4317int urb_entry_read_offset = 0;4318int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -4319urb_entry_read_offset;43204321/* We always read the whole vertex. This could be reduced at some4322* point by reading less and offsetting the register index in the4323* SO_DECLs.4324*/4325sol.Stream0VertexReadOffset = urb_entry_read_offset;4326sol.Stream0VertexReadLength = urb_entry_read_length - 1;4327sol.Stream1VertexReadOffset = urb_entry_read_offset;4328sol.Stream1VertexReadLength = urb_entry_read_length - 1;4329sol.Stream2VertexReadOffset = urb_entry_read_offset;4330sol.Stream2VertexReadLength = urb_entry_read_length - 1;4331sol.Stream3VertexReadOffset = urb_entry_read_offset;4332sol.Stream3VertexReadLength = urb_entry_read_length - 1;43334334// TODO: Double-check that stride == 0 means no buffer. Probably this4335// needs to go elsewhere, where the buffer enable stuff is actually4336// known.4337#if GFX_VER < 84338sol.SOBufferEnable0 = !!info->stride[0];4339sol.SOBufferEnable1 = !!info->stride[1];4340sol.SOBufferEnable2 = !!info->stride[2];4341sol.SOBufferEnable3 = !!info->stride[3];4342#else4343/* Set buffer pitches; 0 means unbound. */4344sol.Buffer0SurfacePitch = 4 * info->stride[0];4345sol.Buffer1SurfacePitch = 4 * info->stride[1];4346sol.Buffer2SurfacePitch = 4 * info->stride[2];4347sol.Buffer3SurfacePitch = 4 * info->stride[3];4348#endif4349}43504351crocus_pack_command(GENX(3DSTATE_SO_DECL_LIST), so_decl_map, list) {4352list.DWordLength = 3 + 2 * max_decls - 2;4353list.StreamtoBufferSelects0 = buffer_mask[0];4354list.StreamtoBufferSelects1 = buffer_mask[1];4355list.StreamtoBufferSelects2 = buffer_mask[2];4356list.StreamtoBufferSelects3 = buffer_mask[3];4357list.NumEntries0 = decls[0];4358list.NumEntries1 = decls[1];4359list.NumEntries2 = decls[2];4360list.NumEntries3 = decls[3];4361}43624363for (int i = 0; i < max_decls; i++) {4364crocus_pack_state(GENX(SO_DECL_ENTRY), so_decl_map + 3 + i * 2, entry) {4365entry.Stream0Decl = so_decl[0][i];4366entry.Stream1Decl = so_decl[1][i];4367entry.Stream2Decl = so_decl[2][i];4368entry.Stream3Decl = so_decl[3][i];4369}4370}43714372return map;4373}4374#endif43754376#if GFX_VER == 64377static void4378crocus_emit_so_svbi(struct crocus_context *ice)4379{4380struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];43814382unsigned max_vertex = 0xffffffff;4383for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {4384struct crocus_stream_output_target *tgt =4385(void *) ice->state.so_target[i];4386if (tgt)4387max_vertex = MIN2(max_vertex, tgt->base.buffer_size / tgt->stride);4388}43894390crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {4391svbi.IndexNumber = 0;4392svbi.StreamedVertexBufferIndex = (uint32_t)ice->state.svbi; /* fix when resuming, based on target's prim count */4393svbi.MaximumIndex = max_vertex;4394}43954396/* initialize the rest of the SVBI's to reasonable values so that we don't4397* run out of room writing the regular data.4398*/4399for (int i = 1; i < 4; i++) {4400crocus_emit_cmd(batch, GENX(3DSTATE_GS_SVB_INDEX), svbi) {4401svbi.IndexNumber = i;4402svbi.StreamedVertexBufferIndex = 0;4403svbi.MaximumIndex = 0xffffffff;4404}4405}4406}44074408#endif440944104411#if GFX_VER >= 64412static bool4413crocus_is_drawing_points(const struct crocus_context *ice)4414{4415const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;44164417if (cso_rast->cso.fill_front == PIPE_POLYGON_MODE_POINT ||4418cso_rast->cso.fill_back == PIPE_POLYGON_MODE_POINT)4419return true;44204421if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {4422const struct brw_gs_prog_data *gs_prog_data =4423(void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;4424return gs_prog_data->output_topology == _3DPRIM_POINTLIST;4425} else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {4426const struct brw_tes_prog_data *tes_data =4427(void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;4428return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;4429} else {4430return ice->state.prim_mode == PIPE_PRIM_POINTS;4431}4432}4433#endif44344435#if GFX_VER >= 64436static void4437get_attr_override(4438struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,4439const struct brw_vue_map *vue_map,4440int urb_entry_read_offset, int fs_attr,4441bool two_side_color, uint32_t *max_source_attr)4442{4443/* Find the VUE slot for this attribute. */4444int slot = vue_map->varying_to_slot[fs_attr];44454446/* Viewport and Layer are stored in the VUE header. We need to override4447* them to zero if earlier stages didn't write them, as GL requires that4448* they read back as zero when not explicitly set.4449*/4450if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {4451attr->ComponentOverrideX = true;4452attr->ComponentOverrideW = true;4453attr->ConstantSource = CONST_0000;44544455if (!(vue_map->slots_valid & VARYING_BIT_LAYER))4456attr->ComponentOverrideY = true;4457if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))4458attr->ComponentOverrideZ = true;44594460return;4461}44624463/* If there was only a back color written but not front, use back4464* as the color instead of undefined4465*/4466if (slot == -1 && fs_attr == VARYING_SLOT_COL0)4467slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];4468if (slot == -1 && fs_attr == VARYING_SLOT_COL1)4469slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];44704471if (slot == -1) {4472/* This attribute does not exist in the VUE--that means that the vertex4473* shader did not write to it. This means that either:4474*4475* (a) This attribute is a texture coordinate, and it is going to be4476* replaced with point coordinates (as a consequence of a call to4477* glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the4478* hardware will ignore whatever attribute override we supply.4479*4480* (b) This attribute is read by the fragment shader but not written by4481* the vertex shader, so its value is undefined. Therefore the4482* attribute override we supply doesn't matter.4483*4484* (c) This attribute is gl_PrimitiveID, and it wasn't written by the4485* previous shader stage.4486*4487* Note that we don't have to worry about the cases where the attribute4488* is gl_PointCoord or is undergoing point sprite coordinate4489* replacement, because in those cases, this function isn't called.4490*4491* In case (c), we need to program the attribute overrides so that the4492* primitive ID will be stored in this slot. In every other case, the4493* attribute override we supply doesn't matter. So just go ahead and4494* program primitive ID in every case.4495*/4496attr->ComponentOverrideW = true;4497attr->ComponentOverrideX = true;4498attr->ComponentOverrideY = true;4499attr->ComponentOverrideZ = true;4500attr->ConstantSource = PRIM_ID;4501return;4502}45034504/* Compute the location of the attribute relative to urb_entry_read_offset.4505* Each increment of urb_entry_read_offset represents a 256-bit value, so4506* it counts for two 128-bit VUE slots.4507*/4508int source_attr = slot - 2 * urb_entry_read_offset;4509assert(source_attr >= 0 && source_attr < 32);45104511/* If we are doing two-sided color, and the VUE slot following this one4512* represents a back-facing color, then we need to instruct the SF unit to4513* do back-facing swizzling.4514*/4515bool swizzling = two_side_color &&4516((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&4517vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||4518(vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&4519vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));45204521/* Update max_source_attr. If swizzling, the SF will read this slot + 1. */4522if (*max_source_attr < source_attr + swizzling)4523*max_source_attr = source_attr + swizzling;45244525attr->SourceAttribute = source_attr;4526if (swizzling)4527attr->SwizzleSelect = INPUTATTR_FACING;4528}45294530static void4531calculate_attr_overrides(4532const struct crocus_context *ice,4533struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,4534uint32_t *point_sprite_enables,4535uint32_t *urb_entry_read_length,4536uint32_t *urb_entry_read_offset)4537{4538const struct brw_wm_prog_data *wm_prog_data = (void *)4539ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;4540const struct brw_vue_map *vue_map = ice->shaders.last_vue_map;4541const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;4542uint32_t max_source_attr = 0;4543const struct shader_info *fs_info =4544crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);45454546int first_slot =4547brw_compute_first_urb_slot_required(fs_info->inputs_read, vue_map);45484549/* Each URB offset packs two varying slots */4550assert(first_slot % 2 == 0);4551*urb_entry_read_offset = first_slot / 2;4552*point_sprite_enables = 0;45534554for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) {4555const int input_index = wm_prog_data->urb_setup[fs_attr];45564557if (input_index < 0)4558continue;45594560bool point_sprite = false;4561if (crocus_is_drawing_points(ice)) {4562if (fs_attr >= VARYING_SLOT_TEX0 &&4563fs_attr <= VARYING_SLOT_TEX7 &&4564cso_rast->cso.sprite_coord_enable & (1 << (fs_attr - VARYING_SLOT_TEX0)))4565point_sprite = true;45664567if (fs_attr == VARYING_SLOT_PNTC)4568point_sprite = true;45694570if (point_sprite)4571*point_sprite_enables |= 1U << input_index;4572}45734574struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };4575if (!point_sprite) {4576get_attr_override(&attribute, vue_map, *urb_entry_read_offset, fs_attr,4577cso_rast->cso.light_twoside, &max_source_attr);4578}45794580/* The hardware can only do the overrides on 16 overrides at a4581* time, and the other up to 16 have to be lined up so that the4582* input index = the output index. We'll need to do some4583* tweaking to make sure that's the case.4584*/4585if (input_index < 16)4586attr_overrides[input_index] = attribute;4587else4588assert(attribute.SourceAttribute == input_index);4589}45904591/* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for4592* 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":4593*4594* "This field should be set to the minimum length required to read the4595* maximum source attribute. The maximum source attribute is indicated4596* by the maximum value of the enabled Attribute # Source Attribute if4597* Attribute Swizzle Enable is set, Number of Output Attributes-1 if4598* enable is not set.4599* read_length = ceiling((max_source_attr + 1) / 2)4600*4601* [errata] Corruption/Hang possible if length programmed larger than4602* recommended"4603*4604* Similar text exists for Ivy Bridge.4605*/4606*urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);4607}4608#endif46094610#if GFX_VER >= 74611static void4612crocus_emit_sbe(struct crocus_batch *batch, const struct crocus_context *ice)4613{4614const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;4615const struct brw_wm_prog_data *wm_prog_data = (void *)4616ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;4617#if GFX_VER >= 84618struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };4619#else4620#define attr_overrides sbe.Attribute4621#endif46224623uint32_t urb_entry_read_length;4624uint32_t urb_entry_read_offset;4625uint32_t point_sprite_enables;46264627crocus_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {4628sbe.AttributeSwizzleEnable = true;4629sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;4630sbe.PointSpriteTextureCoordinateOrigin = cso_rast->cso.sprite_coord_mode;46314632calculate_attr_overrides(ice,4633attr_overrides,4634&point_sprite_enables,4635&urb_entry_read_length,4636&urb_entry_read_offset);4637sbe.VertexURBEntryReadOffset = urb_entry_read_offset;4638sbe.VertexURBEntryReadLength = urb_entry_read_length;4639sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;4640sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;4641#if GFX_VER >= 84642sbe.ForceVertexURBEntryReadLength = true;4643sbe.ForceVertexURBEntryReadOffset = true;4644#endif4645}4646#if GFX_VER >= 84647crocus_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {4648for (int i = 0; i < 16; i++)4649sbes.Attribute[i] = attr_overrides[i];4650}4651#endif4652}4653#endif46544655/* ------------------------------------------------------------------- */46564657/**4658* Populate VS program key fields based on the current state.4659*/4660static void4661crocus_populate_vs_key(const struct crocus_context *ice,4662const struct shader_info *info,4663gl_shader_stage last_stage,4664struct brw_vs_prog_key *key)4665{4666const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;46674668if (info->clip_distance_array_size == 0 &&4669(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&4670last_stage == MESA_SHADER_VERTEX)4671key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;46724673#if GFX_VER <= 54674key->copy_edgeflag = (cso_rast->cso.fill_back != PIPE_POLYGON_MODE_FILL ||4675cso_rast->cso.fill_front != PIPE_POLYGON_MODE_FILL);4676key->point_coord_replace = cso_rast->cso.sprite_coord_enable & 0xff;4677#endif46784679key->clamp_vertex_color = cso_rast->cso.clamp_vertex_color;46804681#if GFX_VERx10 < 754682uint64_t inputs_read = info->inputs_read;4683int ve_idx = 0;4684while (inputs_read) {4685int i = u_bit_scan64(&inputs_read);4686key->gl_attrib_wa_flags[i] = ice->state.cso_vertex_elements->wa_flags[ve_idx];4687ve_idx++;4688}4689#endif4690}46914692/**4693* Populate TCS program key fields based on the current state.4694*/4695static void4696crocus_populate_tcs_key(const struct crocus_context *ice,4697struct brw_tcs_prog_key *key)4698{4699}47004701/**4702* Populate TES program key fields based on the current state.4703*/4704static void4705crocus_populate_tes_key(const struct crocus_context *ice,4706const struct shader_info *info,4707gl_shader_stage last_stage,4708struct brw_tes_prog_key *key)4709{4710const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;47114712if (info->clip_distance_array_size == 0 &&4713(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&4714last_stage == MESA_SHADER_TESS_EVAL)4715key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;4716}47174718/**4719* Populate GS program key fields based on the current state.4720*/4721static void4722crocus_populate_gs_key(const struct crocus_context *ice,4723const struct shader_info *info,4724gl_shader_stage last_stage,4725struct brw_gs_prog_key *key)4726{4727const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;47284729if (info->clip_distance_array_size == 0 &&4730(info->outputs_written & (VARYING_BIT_POS | VARYING_BIT_CLIP_VERTEX)) &&4731last_stage == MESA_SHADER_GEOMETRY)4732key->nr_userclip_plane_consts = cso_rast->num_clip_plane_consts;4733}47344735/**4736* Populate FS program key fields based on the current state.4737*/4738static void4739crocus_populate_fs_key(const struct crocus_context *ice,4740const struct shader_info *info,4741struct brw_wm_prog_key *key)4742{4743struct crocus_screen *screen = (void *) ice->ctx.screen;4744const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;4745const struct crocus_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;4746const struct crocus_rasterizer_state *rast = ice->state.cso_rast;4747const struct crocus_blend_state *blend = ice->state.cso_blend;47484749#if GFX_VER < 64750uint32_t lookup = 0;47514752if (info->fs.uses_discard || zsa->cso.alpha_enabled)4753lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;47544755if (info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))4756lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;47574758if (fb->zsbuf && zsa->cso.depth_enabled) {4759lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;47604761if (zsa->cso.depth_writemask)4762lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;47634764}4765if (zsa->cso.stencil[0].enabled || zsa->cso.stencil[1].enabled) {4766lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;4767if (zsa->cso.stencil[0].writemask || zsa->cso.stencil[1].writemask)4768lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;4769}4770key->iz_lookup = lookup;4771key->stats_wm = ice->state.stats_wm;4772#endif47734774uint32_t line_aa = BRW_WM_AA_NEVER;4775if (rast->cso.line_smooth) {4776int reduced_prim = u_reduced_prim(ice->state.prim_mode);4777if (reduced_prim == PIPE_PRIM_LINES)4778line_aa = BRW_WM_AA_ALWAYS;4779else if (reduced_prim == PIPE_PRIM_TRIANGLES) {4780if (rast->cso.fill_front == PIPE_POLYGON_MODE_LINE) {4781line_aa = BRW_WM_AA_SOMETIMES;47824783if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE ||4784rast->cso.cull_face == PIPE_FACE_BACK)4785line_aa = BRW_WM_AA_ALWAYS;4786} else if (rast->cso.fill_back == PIPE_POLYGON_MODE_LINE) {4787line_aa = BRW_WM_AA_SOMETIMES;47884789if (rast->cso.cull_face == PIPE_FACE_FRONT)4790line_aa = BRW_WM_AA_ALWAYS;4791}4792}4793}4794key->line_aa = line_aa;47954796key->nr_color_regions = fb->nr_cbufs;47974798key->clamp_fragment_color = rast->cso.clamp_fragment_color;47994800key->alpha_to_coverage = blend->cso.alpha_to_coverage;48014802key->alpha_test_replicate_alpha = fb->nr_cbufs > 1 && zsa->cso.alpha_enabled;48034804key->flat_shade = rast->cso.flatshade &&4805(info->inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1));48064807key->persample_interp = rast->cso.force_persample_interp;4808key->multisample_fbo = rast->cso.multisample && fb->samples > 1;48094810key->ignore_sample_mask_out = !key->multisample_fbo;4811key->coherent_fb_fetch = false; // TODO: needed?48124813key->force_dual_color_blend =4814screen->driconf.dual_color_blend_by_location &&4815(blend->blend_enables & 1) && blend->dual_color_blending;48164817/* TODO: Respect glHint for key->high_quality_derivatives */48184819#if GFX_VER <= 54820if (fb->nr_cbufs > 1 && zsa->cso.alpha_enabled) {4821key->alpha_test_func = zsa->cso.alpha_func;4822key->alpha_test_ref = zsa->cso.alpha_ref_value;4823}4824#endif4825}48264827static void4828crocus_populate_cs_key(const struct crocus_context *ice,4829struct brw_cs_prog_key *key)4830{4831}48324833#if GFX_VER == 44834#define KSP(ice, shader) ro_bo((ice)->shaders.cache_bo, (shader)->offset);4835#elif GFX_VER >= 54836static uint64_t4837KSP(const struct crocus_context *ice, const struct crocus_compiled_shader *shader)4838{4839return shader->offset;4840}4841#endif48424843/* Gen11 workaround table #2056 WABTPPrefetchDisable suggests to disable4844* prefetching of binding tables in A0 and B0 steppings. XXX: Revisit4845* this WA on C0 stepping.4846*4847* TODO: Fill out SamplerCount for prefetching?4848*/48494850#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \4851pkt.KernelStartPointer = KSP(ice, shader); \4852pkt.BindingTableEntryCount = shader->bt.size_bytes / 4; \4853pkt.FloatingPointMode = prog_data->use_alt_mode; \4854\4855pkt.DispatchGRFStartRegisterForURBData = \4856prog_data->dispatch_grf_start_reg; \4857pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \4858pkt.prefix##URBEntryReadOffset = 0; \4859\4860pkt.StatisticsEnable = true; \4861pkt.Enable = true; \4862\4863if (prog_data->total_scratch) { \4864struct crocus_bo *bo = \4865crocus_get_scratch_space(ice, prog_data->total_scratch, stage); \4866pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \4867pkt.ScratchSpaceBasePointer = rw_bo(bo, 0); \4868}48694870/* ------------------------------------------------------------------- */4871#if GFX_VER >= 64872static const uint32_t push_constant_opcodes[] = {4873[MESA_SHADER_VERTEX] = 21,4874[MESA_SHADER_TESS_CTRL] = 25, /* HS */4875[MESA_SHADER_TESS_EVAL] = 26, /* DS */4876[MESA_SHADER_GEOMETRY] = 22,4877[MESA_SHADER_FRAGMENT] = 23,4878[MESA_SHADER_COMPUTE] = 0,4879};4880#endif48814882static void4883emit_sized_null_surface(struct crocus_batch *batch,4884unsigned width, unsigned height,4885unsigned layers, unsigned levels,4886unsigned minimum_array_element,4887uint32_t *out_offset)4888{4889struct isl_device *isl_dev = &batch->screen->isl_dev;4890uint32_t *surf = stream_state(batch, isl_dev->ss.size,4891isl_dev->ss.align,4892out_offset);4893//TODO gen 6 multisample crash4894isl_null_fill_state(isl_dev, surf,4895.size = isl_extent3d(width, height, layers),4896.levels = levels,4897.minimum_array_element = minimum_array_element);4898}4899static void4900emit_null_surface(struct crocus_batch *batch,4901uint32_t *out_offset)4902{4903emit_sized_null_surface(batch, 1, 1, 1, 0, 0, out_offset);4904}49054906static void4907emit_null_fb_surface(struct crocus_batch *batch,4908struct crocus_context *ice,4909uint32_t *out_offset)4910{4911uint32_t width, height, layers, level, layer;4912/* If set_framebuffer_state() was never called, fall back to 1x1x1 */4913if (ice->state.framebuffer.width == 0 && ice->state.framebuffer.height == 0) {4914emit_null_surface(batch, out_offset);4915return;4916}49174918struct pipe_framebuffer_state *cso = &ice->state.framebuffer;4919width = MAX2(cso->width, 1);4920height = MAX2(cso->height, 1);4921layers = cso->layers ? cso->layers : 1;4922level = 0;4923layer = 0;49244925if (cso->nr_cbufs == 0 && cso->zsbuf) {4926width = cso->zsbuf->width;4927height = cso->zsbuf->height;4928level = cso->zsbuf->u.tex.level;4929layer = cso->zsbuf->u.tex.first_layer;4930}4931emit_sized_null_surface(batch, width, height,4932layers, level, layer,4933out_offset);4934}49354936static void4937emit_surface_state(struct crocus_batch *batch,4938struct crocus_resource *res,4939const struct isl_surf *in_surf,4940bool adjust_surf,4941struct isl_view *view,4942bool writeable,4943enum isl_aux_usage aux_usage,4944bool blend_enable,4945uint32_t write_disables,4946uint32_t *surf_state,4947uint32_t addr_offset)4948{4949const struct intel_device_info *devinfo = &batch->screen->devinfo;4950struct isl_device *isl_dev = &batch->screen->isl_dev;4951uint32_t reloc = RELOC_32BIT;4952uint32_t offset = res->offset, tile_x_sa = 0, tile_y_sa = 0;49534954if (writeable)4955reloc |= RELOC_WRITE;49564957struct isl_surf surf = *in_surf;4958if (adjust_surf) {4959if (res->base.b.target == PIPE_TEXTURE_3D && view->array_len == 1) {4960isl_surf_get_image_surf(isl_dev, in_surf,4961view->base_level, 0,4962view->base_array_layer,4963&surf, &offset,4964&tile_x_sa, &tile_y_sa);4965view->base_array_layer = 0;4966view->base_level = 0;4967} else if (res->base.b.target == PIPE_TEXTURE_CUBE && devinfo->ver == 4) {4968isl_surf_get_image_surf(isl_dev, in_surf,4969view->base_level, view->base_array_layer,49700,4971&surf, &offset,4972&tile_x_sa, &tile_y_sa);4973view->base_array_layer = 0;4974view->base_level = 0;4975} else if (res->base.b.target == PIPE_TEXTURE_1D_ARRAY)4976surf.dim = ISL_SURF_DIM_2D;4977}49784979union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };4980struct crocus_bo *aux_bo = NULL;4981uint32_t aux_offset = 0;4982struct isl_surf *aux_surf = NULL;4983if (aux_usage != ISL_AUX_USAGE_NONE) {4984aux_surf = &res->aux.surf;4985aux_offset = res->aux.offset;4986aux_bo = res->aux.bo;49874988clear_color = crocus_resource_get_clear_color(res);4989}49904991isl_surf_fill_state(isl_dev, surf_state,4992.surf = &surf,4993.view = view,4994.address = crocus_state_reloc(batch,4995addr_offset + isl_dev->ss.addr_offset,4996res->bo, offset, reloc),4997.aux_surf = aux_surf,4998.aux_usage = aux_usage,4999.aux_address = aux_offset,5000.mocs = crocus_mocs(res->bo, isl_dev),5001.clear_color = clear_color,5002.use_clear_address = false,5003.clear_address = 0,5004.x_offset_sa = tile_x_sa,5005.y_offset_sa = tile_y_sa,5006#if GFX_VER <= 55007.blend_enable = blend_enable,5008.write_disables = write_disables,5009#endif5010);50115012if (aux_surf) {5013/* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the5014* upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits5015* contain other control information. Since buffer addresses are always5016* on 4k boundaries (and thus have their lower 12 bits zero), we can use5017* an ordinary reloc to do the necessary address translation.5018*5019* FIXME: move to the point of assignment.5020*/5021if (devinfo->ver == 8) {5022uint64_t *aux_addr = (uint64_t *)(surf_state + (isl_dev->ss.aux_addr_offset / 4));5023*aux_addr = crocus_state_reloc(batch,5024addr_offset + isl_dev->ss.aux_addr_offset,5025aux_bo, *aux_addr,5026reloc);5027} else {5028uint32_t *aux_addr = surf_state + (isl_dev->ss.aux_addr_offset / 4);5029*aux_addr = crocus_state_reloc(batch,5030addr_offset + isl_dev->ss.aux_addr_offset,5031aux_bo, *aux_addr,5032reloc);5033}5034}50355036}50375038static uint32_t5039emit_surface(struct crocus_batch *batch,5040struct crocus_surface *surf,5041enum isl_aux_usage aux_usage,5042bool blend_enable,5043uint32_t write_disables)5044{5045const struct intel_device_info *devinfo = &batch->screen->devinfo;5046struct isl_device *isl_dev = &batch->screen->isl_dev;5047struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;5048struct isl_view *view = &surf->view;5049uint32_t offset = 0;5050enum pipe_texture_target target = res->base.b.target;5051bool adjust_surf = false;50525053if (devinfo->ver == 4 && target == PIPE_TEXTURE_CUBE)5054adjust_surf = true;50555056if (surf->align_res)5057res = (struct crocus_resource *)surf->align_res;50585059uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);50605061emit_surface_state(batch, res, &surf->surf, adjust_surf, view, true,5062aux_usage, blend_enable,5063write_disables,5064surf_state, offset);5065return offset;5066}50675068static uint32_t5069emit_rt_surface(struct crocus_batch *batch,5070struct crocus_surface *surf,5071enum isl_aux_usage aux_usage)5072{5073struct isl_device *isl_dev = &batch->screen->isl_dev;5074struct crocus_resource *res = (struct crocus_resource *)surf->base.texture;5075struct isl_view *view = &surf->read_view;5076uint32_t offset = 0;5077uint32_t *surf_state = stream_state(batch, isl_dev->ss.size, isl_dev->ss.align, &offset);50785079emit_surface_state(batch, res, &surf->surf, true, view, false,5080aux_usage, 0, false,5081surf_state, offset);5082return offset;5083}50845085static uint32_t5086emit_grid(struct crocus_context *ice,5087struct crocus_batch *batch)5088{5089UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5090uint32_t offset = 0;5091struct crocus_state_ref *grid_ref = &ice->state.grid_size;5092uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5093isl_dev->ss.align, &offset);5094isl_buffer_fill_state(isl_dev, surf_state,5095.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5096crocus_resource_bo(grid_ref->res),5097grid_ref->offset,5098RELOC_32BIT),5099.size_B = 12,5100.format = ISL_FORMAT_RAW,5101.stride_B = 1,5102.mocs = crocus_mocs(crocus_resource_bo(grid_ref->res), isl_dev));5103return offset;5104}51055106static uint32_t5107emit_ubo_buffer(struct crocus_context *ice,5108struct crocus_batch *batch,5109struct pipe_constant_buffer *buffer)5110{5111UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5112uint32_t offset = 0;51135114uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5115isl_dev->ss.align, &offset);5116isl_buffer_fill_state(isl_dev, surf_state,5117.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5118crocus_resource_bo(buffer->buffer),5119buffer->buffer_offset,5120RELOC_32BIT),5121.size_B = buffer->buffer_size,5122.format = 0,5123.swizzle = ISL_SWIZZLE_IDENTITY,5124.stride_B = 1,5125.mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));51265127return offset;5128}51295130static uint32_t5131emit_ssbo_buffer(struct crocus_context *ice,5132struct crocus_batch *batch,5133struct pipe_shader_buffer *buffer, bool writeable)5134{5135UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5136uint32_t offset = 0;5137uint32_t reloc = RELOC_32BIT;51385139if (writeable)5140reloc |= RELOC_WRITE;5141uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5142isl_dev->ss.align, &offset);5143isl_buffer_fill_state(isl_dev, surf_state,5144.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5145crocus_resource_bo(buffer->buffer),5146buffer->buffer_offset,5147reloc),5148.size_B = buffer->buffer_size,5149.format = ISL_FORMAT_RAW,5150.swizzle = ISL_SWIZZLE_IDENTITY,5151.stride_B = 1,5152.mocs = crocus_mocs(crocus_resource_bo(buffer->buffer), isl_dev));51535154return offset;5155}51565157static uint32_t5158emit_sampler_view(struct crocus_context *ice,5159struct crocus_batch *batch,5160bool for_gather,5161struct crocus_sampler_view *isv)5162{5163UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5164uint32_t offset = 0;51655166uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5167isl_dev->ss.align, &offset);51685169if (isv->base.target == PIPE_BUFFER) {5170const struct isl_format_layout *fmtl = isl_format_get_layout(isv->view.format);5171const unsigned cpp = isv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;5172unsigned final_size =5173MIN3(isv->base.u.buf.size, isv->res->bo->size - isv->res->offset,5174CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);5175isl_buffer_fill_state(isl_dev, surf_state,5176.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5177isv->res->bo,5178isv->res->offset + isv->base.u.buf.offset, RELOC_32BIT),5179.size_B = final_size,5180.format = isv->view.format,5181.swizzle = isv->view.swizzle,5182.stride_B = cpp,5183.mocs = crocus_mocs(isv->res->bo, isl_dev)5184);5185} else {5186enum isl_aux_usage aux_usage =5187crocus_resource_texture_aux_usage(isv->res);51885189emit_surface_state(batch, isv->res, &isv->res->surf, false,5190for_gather ? &isv->gather_view : &isv->view,5191false, aux_usage, false,51920, surf_state, offset);5193}5194return offset;5195}51965197static uint32_t5198emit_image_view(struct crocus_context *ice,5199struct crocus_batch *batch,5200struct crocus_image_view *iv)5201{5202UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5203uint32_t offset = 0;52045205struct crocus_resource *res = (struct crocus_resource *)iv->base.resource;5206uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5207isl_dev->ss.align, &offset);5208bool write = iv->base.shader_access & PIPE_IMAGE_ACCESS_WRITE;5209uint32_t reloc = RELOC_32BIT | (write ? RELOC_WRITE : 0);5210if (res->base.b.target == PIPE_BUFFER) {5211const struct isl_format_layout *fmtl = isl_format_get_layout(iv->view.format);5212const unsigned cpp = iv->view.format == ISL_FORMAT_RAW ? 1 : fmtl->bpb / 8;5213unsigned final_size =5214MIN3(iv->base.u.buf.size, res->bo->size - res->offset - iv->base.u.buf.offset,5215CROCUS_MAX_TEXTURE_BUFFER_SIZE * cpp);5216isl_buffer_fill_state(isl_dev, surf_state,5217.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5218res->bo,5219res->offset + iv->base.u.buf.offset, reloc),5220.size_B = final_size,5221.format = iv->view.format,5222.swizzle = iv->view.swizzle,5223.stride_B = cpp,5224.mocs = crocus_mocs(res->bo, isl_dev)5225);5226} else {5227if (iv->view.format == ISL_FORMAT_RAW) {5228isl_buffer_fill_state(isl_dev, surf_state,5229.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5230res->bo,5231res->offset, reloc),5232.size_B = res->bo->size - res->offset,5233.format = iv->view.format,5234.swizzle = iv->view.swizzle,5235.stride_B = 1,5236.mocs = crocus_mocs(res->bo, isl_dev),5237);523852395240} else {5241emit_surface_state(batch, res,5242&res->surf, false, &iv->view,5243write, 0, false,52440, surf_state, offset);5245}5246}52475248return offset;5249}52505251#if GFX_VER == 65252static uint32_t5253emit_sol_surface(struct crocus_batch *batch,5254struct pipe_stream_output_info *so_info,5255uint32_t idx)5256{5257struct crocus_context *ice = batch->ice;52585259if (idx >= so_info->num_outputs || !ice->state.streamout_active)5260return 0;5261const struct pipe_stream_output *output = &so_info->output[idx];5262const int buffer = output->output_buffer;5263assert(output->stream == 0);52645265struct crocus_resource *buf = (struct crocus_resource *)ice->state.so_target[buffer]->buffer;5266unsigned stride_dwords = so_info->stride[buffer];5267unsigned offset_dwords = ice->state.so_target[buffer]->buffer_offset / 4 + output->dst_offset;52685269size_t size_dwords = (ice->state.so_target[buffer]->buffer_offset + ice->state.so_target[buffer]->buffer_size) / 4;5270unsigned num_vector_components = output->num_components;5271unsigned num_elements;5272/* FIXME: can we rely on core Mesa to ensure that the buffer isn't5273* too big to map using a single binding table entry?5274*/5275// assert((size_dwords - offset_dwords) / stride_dwords5276// <= BRW_MAX_NUM_BUFFER_ENTRIES);52775278if (size_dwords > offset_dwords + num_vector_components) {5279/* There is room for at least 1 transform feedback output in the buffer.5280* Compute the number of additional transform feedback outputs the5281* buffer has room for.5282*/5283num_elements =5284(size_dwords - offset_dwords - num_vector_components);5285} else {5286/* There isn't even room for a single transform feedback output in the5287* buffer. We can't configure the binding table entry to prevent output5288* entirely; we'll have to rely on the geometry shader to detect5289* overflow. But to minimize the damage in case of a bug, set up the5290* binding table entry to just allow a single output.5291*/5292num_elements = 0;5293}5294num_elements += stride_dwords;52955296uint32_t surface_format;5297switch (num_vector_components) {5298case 1:5299surface_format = ISL_FORMAT_R32_FLOAT;5300break;5301case 2:5302surface_format = ISL_FORMAT_R32G32_FLOAT;5303break;5304case 3:5305surface_format = ISL_FORMAT_R32G32B32_FLOAT;5306break;5307case 4:5308surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;5309break;5310default:5311unreachable("Invalid vector size for transform feedback output");5312}53135314UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;5315uint32_t offset = 0;53165317uint32_t *surf_state = stream_state(batch, isl_dev->ss.size,5318isl_dev->ss.align, &offset);5319isl_buffer_fill_state(isl_dev, surf_state,5320.address = crocus_state_reloc(batch, offset + isl_dev->ss.addr_offset,5321crocus_resource_bo(&buf->base.b),5322offset_dwords * 4, RELOC_32BIT|RELOC_WRITE),5323.size_B = num_elements * 4,5324.stride_B = stride_dwords * 4,5325.swizzle = ISL_SWIZZLE_IDENTITY,5326.format = surface_format);5327return offset;5328}5329#endif53305331#define foreach_surface_used(index, group) \5332for (int index = 0; index < bt->sizes[group]; index++) \5333if (crocus_group_index_to_bti(bt, group, index) != \5334CROCUS_SURFACE_NOT_USED)53355336static void5337crocus_populate_binding_table(struct crocus_context *ice,5338struct crocus_batch *batch,5339gl_shader_stage stage, bool ff_gs)5340{5341struct crocus_compiled_shader *shader = ff_gs ? ice->shaders.ff_gs_prog : ice->shaders.prog[stage];5342struct crocus_shader_state *shs = ff_gs ? NULL : &ice->state.shaders[stage];5343if (!shader)5344return;53455346struct crocus_binding_table *bt = &shader->bt;5347int s = 0;5348uint32_t *surf_offsets = shader->surf_offset;53495350#if GFX_VER < 85351const struct shader_info *info = crocus_get_shader_info(ice, stage);5352#endif53535354if (stage == MESA_SHADER_FRAGMENT) {5355struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;5356/* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */5357if (cso_fb->nr_cbufs) {5358for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {5359uint32_t write_disables = 0;5360bool blend_enable = false;5361#if GFX_VER <= 55362const struct pipe_rt_blend_state *rt =5363&ice->state.cso_blend->cso.rt[ice->state.cso_blend->cso.independent_blend_enable ? i : 0];5364struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];5365struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;5366write_disables |= (rt->colormask & PIPE_MASK_A) ? 0x0 : 0x8;5367write_disables |= (rt->colormask & PIPE_MASK_R) ? 0x0 : 0x4;5368write_disables |= (rt->colormask & PIPE_MASK_G) ? 0x0 : 0x2;5369write_disables |= (rt->colormask & PIPE_MASK_B) ? 0x0 : 0x1;5370/* Gen4/5 can't handle blending off when a dual src blend wm is enabled. */5371blend_enable = rt->blend_enable || wm_prog_data->dual_src_blend;5372#endif5373if (cso_fb->cbufs[i]) {5374surf_offsets[s] = emit_surface(batch,5375(struct crocus_surface *)cso_fb->cbufs[i],5376ice->state.draw_aux_usage[i],5377blend_enable,5378write_disables);5379} else {5380emit_null_fb_surface(batch, ice, &surf_offsets[s]);5381}5382s++;5383}5384} else {5385emit_null_fb_surface(batch, ice, &surf_offsets[s]);5386s++;5387}53885389foreach_surface_used(i, CROCUS_SURFACE_GROUP_RENDER_TARGET_READ) {5390struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;5391if (cso_fb->cbufs[i]) {5392surf_offsets[s++] = emit_rt_surface(batch,5393(struct crocus_surface *)cso_fb->cbufs[i],5394ice->state.draw_aux_usage[i]);5395}5396}5397}53985399if (stage == MESA_SHADER_COMPUTE) {5400foreach_surface_used(i, CROCUS_SURFACE_GROUP_CS_WORK_GROUPS) {5401surf_offsets[s] = emit_grid(ice, batch);5402s++;5403}5404}54055406#if GFX_VER == 65407if (stage == MESA_SHADER_GEOMETRY) {5408struct pipe_stream_output_info *so_info;5409if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])5410so_info = &ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]->stream_output;5411else5412so_info = &ice->shaders.uncompiled[MESA_SHADER_VERTEX]->stream_output;54135414foreach_surface_used(i, CROCUS_SURFACE_GROUP_SOL) {5415surf_offsets[s] = emit_sol_surface(batch, so_info, i);5416s++;5417}5418}5419#endif54205421foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE) {5422struct crocus_sampler_view *view = shs->textures[i];5423if (view)5424surf_offsets[s] = emit_sampler_view(ice, batch, false, view);5425else5426emit_null_surface(batch, &surf_offsets[s]);5427s++;5428}54295430#if GFX_VER < 85431if (info && info->uses_texture_gather) {5432foreach_surface_used(i, CROCUS_SURFACE_GROUP_TEXTURE_GATHER) {5433struct crocus_sampler_view *view = shs->textures[i];5434if (view)5435surf_offsets[s] = emit_sampler_view(ice, batch, true, view);5436else5437emit_null_surface(batch, &surf_offsets[s]);5438s++;5439}5440}5441#endif54425443foreach_surface_used(i, CROCUS_SURFACE_GROUP_IMAGE) {5444struct crocus_image_view *view = &shs->image[i];5445if (view->base.resource)5446surf_offsets[s] = emit_image_view(ice, batch, view);5447else5448emit_null_surface(batch, &surf_offsets[s]);5449s++;5450}5451foreach_surface_used(i, CROCUS_SURFACE_GROUP_UBO) {5452if (shs->constbufs[i].buffer)5453surf_offsets[s] = emit_ubo_buffer(ice, batch, &shs->constbufs[i]);5454else5455emit_null_surface(batch, &surf_offsets[s]);5456s++;5457}5458foreach_surface_used(i, CROCUS_SURFACE_GROUP_SSBO) {5459if (shs->ssbo[i].buffer)5460surf_offsets[s] = emit_ssbo_buffer(ice, batch, &shs->ssbo[i],5461!!(shs->writable_ssbos & (1 << i)));5462else5463emit_null_surface(batch, &surf_offsets[s]);5464s++;5465}54665467}5468/* ------------------------------------------------------------------- */5469static uint32_t5470crocus_upload_binding_table(struct crocus_context *ice,5471struct crocus_batch *batch,5472uint32_t *table,5473uint32_t size)54745475{5476if (size == 0)5477return 0;5478return emit_state(batch, table, size, 32);5479}54805481/**5482* Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address.5483*/54845485static void5486crocus_update_surface_base_address(struct crocus_batch *batch)5487{5488if (batch->state_base_address_emitted)5489return;5490#if GFX_VER >= 65491uint32_t mocs = batch->screen->isl_dev.mocs.internal;5492#endif5493flush_before_state_base_change(batch);54945495crocus_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {54965497sba.SurfaceStateBaseAddressModifyEnable = true;5498sba.SurfaceStateBaseAddress = ro_bo(batch->state.bo, 0);54995500#if GFX_VER >= 55501sba.InstructionBaseAddress = ro_bo(batch->ice->shaders.cache_bo, 0); // TODO!5502#endif55035504sba.GeneralStateBaseAddressModifyEnable = true;5505sba.IndirectObjectBaseAddressModifyEnable = true;5506#if GFX_VER >= 55507sba.InstructionBaseAddressModifyEnable = true;5508#endif55095510#if GFX_VER < 85511sba.GeneralStateAccessUpperBoundModifyEnable = true;5512#endif5513#if GFX_VER >= 5 && GFX_VER < 85514sba.IndirectObjectAccessUpperBoundModifyEnable = true;5515sba.InstructionAccessUpperBoundModifyEnable = true;5516#endif5517#if GFX_VER <= 55518sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);5519#endif5520#if GFX_VER >= 65521/* The hardware appears to pay attention to the MOCS fields even5522* if you don't set the "Address Modify Enable" bit for the base.5523*/5524sba.GeneralStateMOCS = mocs;5525sba.StatelessDataPortAccessMOCS = mocs;5526#if GFX_VER == 85527sba.DynamicStateMOCS = mocs;5528sba.IndirectObjectMOCS = mocs;5529sba.InstructionMOCS = mocs;5530sba.SurfaceStateMOCS = mocs;5531sba.GeneralStateBufferSize = 0xfffff;5532sba.IndirectObjectBufferSize = 0xfffff;5533sba.InstructionBufferSize = 0xfffff;5534sba.DynamicStateBufferSize = MAX_STATE_SIZE;55355536sba.GeneralStateBufferSizeModifyEnable = true;5537sba.DynamicStateBufferSizeModifyEnable = true;5538sba.IndirectObjectBufferSizeModifyEnable = true;5539sba.InstructionBuffersizeModifyEnable = true;5540#endif55415542sba.DynamicStateBaseAddressModifyEnable = true;55435544sba.DynamicStateBaseAddress = ro_bo(batch->state.bo, 0);55455546/* Dynamic state upper bound. Although the documentation says that5547* programming it to zero will cause it to be ignored, that is a lie.5548* If this isn't programmed to a real bound, the sampler border color5549* pointer is rejected, causing border color to mysteriously fail.5550*/5551#if GFX_VER < 85552sba.DynamicStateAccessUpperBoundModifyEnable = true;5553sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);5554#endif55555556#endif5557}55585559flush_after_state_base_change(batch);55605561/* According to section 3.6.1 of VOL1 of the 965 PRM,5562* STATE_BASE_ADDRESS updates require a reissue of:5563*5564* 3DSTATE_PIPELINE_POINTERS5565* 3DSTATE_BINDING_TABLE_POINTERS5566* MEDIA_STATE_POINTERS5567*5568* and this continues through Ironlake. The Sandy Bridge PRM, vol5569* 1 part 1 says that the folowing packets must be reissued:5570*5571* 3DSTATE_CC_POINTERS5572* 3DSTATE_BINDING_TABLE_POINTERS5573* 3DSTATE_SAMPLER_STATE_POINTERS5574* 3DSTATE_VIEWPORT_STATE_POINTERS5575* MEDIA_STATE_POINTERS5576*5577* Those are always reissued following SBA updates anyway (new5578* batch time), except in the case of the program cache BO5579* changing. Having a separate state flag makes the sequence more5580* obvious.5581*/5582#if GFX_VER <= 55583batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS | CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;5584#elif GFX_VER == 65585batch->ice->state.dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS | CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;5586#endif5587batch->state_base_address_emitted = true;5588}55895590static inline void5591crocus_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,5592bool window_space_position, float *zmin, float *zmax)5593{5594if (window_space_position) {5595*zmin = 0.f;5596*zmax = 1.f;5597return;5598}5599util_viewport_zmin_zmax(vp, halfz, zmin, zmax);5600}56015602struct push_bos {5603struct {5604struct crocus_address addr;5605uint32_t length;5606} buffers[4];5607int buffer_count;5608uint32_t max_length;5609};56105611#if GFX_VER >= 65612static void5613setup_constant_buffers(struct crocus_context *ice,5614struct crocus_batch *batch,5615int stage,5616struct push_bos *push_bos)5617{5618struct crocus_shader_state *shs = &ice->state.shaders[stage];5619struct crocus_compiled_shader *shader = ice->shaders.prog[stage];5620struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;56215622uint32_t push_range_sum = 0;56235624int n = 0;5625for (int i = 0; i < 4; i++) {5626const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];56275628if (range->length == 0)5629continue;56305631push_range_sum += range->length;56325633if (range->length > push_bos->max_length)5634push_bos->max_length = range->length;56355636/* Range block is a binding table index, map back to UBO index. */5637unsigned block_index = crocus_bti_to_group_index(5638&shader->bt, CROCUS_SURFACE_GROUP_UBO, range->block);5639assert(block_index != CROCUS_SURFACE_NOT_USED);56405641struct pipe_constant_buffer *cbuf = &shs->constbufs[block_index];5642struct crocus_resource *res = (void *) cbuf->buffer;56435644assert(cbuf->buffer_offset % 32 == 0);56455646push_bos->buffers[n].length = range->length;5647push_bos->buffers[n].addr =5648res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset)5649: ro_bo(batch->ice->workaround_bo,5650batch->ice->workaround_offset);5651n++;5652}56535654/* From the 3DSTATE_CONSTANT_XS and 3DSTATE_CONSTANT_ALL programming notes:5655*5656* "The sum of all four read length fields must be less than or5657* equal to the size of 64."5658*/5659assert(push_range_sum <= 64);56605661push_bos->buffer_count = n;5662}56635664#if GFX_VER == 75665static void5666gen7_emit_vs_workaround_flush(struct crocus_batch *batch)5667{5668ASSERTED const struct intel_device_info *devinfo = &batch->screen->devinfo;56695670assert(devinfo->ver == 7);5671crocus_emit_pipe_control_write(batch,5672"vs workaround",5673PIPE_CONTROL_WRITE_IMMEDIATE5674| PIPE_CONTROL_DEPTH_STALL,5675batch->ice->workaround_bo,5676batch->ice->workaround_offset, 0);5677}5678#endif56795680static void5681emit_push_constant_packets(struct crocus_context *ice,5682struct crocus_batch *batch,5683int stage,5684const struct push_bos *push_bos)5685{5686struct crocus_compiled_shader *shader = ice->shaders.prog[stage];5687struct brw_stage_prog_data *prog_data = shader ? (void *) shader->prog_data : NULL;56885689#if GFX_VER == 75690if (stage == MESA_SHADER_VERTEX) {5691if (!(GFX_VERx10 == 75) && !batch->screen->devinfo.is_baytrail)5692gen7_emit_vs_workaround_flush(batch);5693}5694#endif5695crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {5696pkt._3DCommandSubOpcode = push_constant_opcodes[stage];5697#if GFX_VER >= 75698if (prog_data) {5699/* The Skylake PRM contains the following restriction:5700*5701* "The driver must ensure The following case does not occur5702* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with5703* buffer 3 read length equal to zero committed followed by a5704* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to5705* zero committed."5706*5707* To avoid this, we program the buffers in the highest slots.5708* This way, slot 0 is only used if slot 3 is also used.5709*/5710int n = push_bos->buffer_count;5711assert(n <= 4);5712#if GFX_VERx10 >= 755713const unsigned shift = 4 - n;5714#else5715const unsigned shift = 0;5716#endif5717for (int i = 0; i < n; i++) {5718pkt.ConstantBody.ReadLength[i + shift] =5719push_bos->buffers[i].length;5720pkt.ConstantBody.Buffer[i + shift] = push_bos->buffers[i].addr;5721}5722}5723#else5724if (prog_data) {5725int n = push_bos->buffer_count;5726assert (n <= 1);5727if (n == 1) {5728pkt.Buffer0Valid = true;5729pkt.ConstantBody.PointertoConstantBuffer0 = push_bos->buffers[0].addr.offset;5730pkt.ConstantBody.ConstantBuffer0ReadLength = push_bos->buffers[0].length - 1;5731}5732}5733#endif5734}5735}57365737#endif57385739#if GFX_VER == 85740typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;5741#elif GFX_VER >= 65742typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML;5743#else5744typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;5745#endif57465747static inline void5748set_depth_stencil_bits(struct crocus_context *ice, DEPTH_STENCIL_GENXML *ds)5749{5750struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;5751ds->DepthTestEnable = cso->cso.depth_enabled;5752ds->DepthBufferWriteEnable = cso->cso.depth_writemask;5753ds->DepthTestFunction = translate_compare_func(cso->cso.depth_func);57545755ds->StencilFailOp = cso->cso.stencil[0].fail_op;5756ds->StencilPassDepthFailOp = cso->cso.stencil[0].zfail_op;5757ds->StencilPassDepthPassOp = cso->cso.stencil[0].zpass_op;5758ds->StencilTestFunction = translate_compare_func(cso->cso.stencil[0].func);57595760ds->StencilTestMask = cso->cso.stencil[0].valuemask;5761ds->StencilWriteMask = cso->cso.stencil[0].writemask;57625763ds->BackfaceStencilFailOp = cso->cso.stencil[1].fail_op;5764ds->BackfaceStencilPassDepthFailOp = cso->cso.stencil[1].zfail_op;5765ds->BackfaceStencilPassDepthPassOp = cso->cso.stencil[1].zpass_op;5766ds->BackfaceStencilTestFunction = translate_compare_func(cso->cso.stencil[1].func);57675768ds->BackfaceStencilTestMask = cso->cso.stencil[1].valuemask;5769ds->BackfaceStencilWriteMask = cso->cso.stencil[1].writemask;5770ds->DoubleSidedStencilEnable = cso->cso.stencil[1].enabled;5771ds->StencilTestEnable = cso->cso.stencil[0].enabled;5772ds->StencilBufferWriteEnable =5773cso->cso.stencil[0].writemask != 0 ||5774(cso->cso.stencil[1].enabled && cso->cso.stencil[1].writemask != 0);5775}57765777static void5778emit_vertex_buffer_state(struct crocus_batch *batch,5779unsigned buffer_id,5780struct crocus_bo *bo,5781unsigned start_offset,5782unsigned end_offset,5783unsigned stride,5784unsigned step_rate,5785uint32_t **map)5786{5787const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);5788_crocus_pack_state(batch, GENX(VERTEX_BUFFER_STATE), *map, vb) {5789vb.BufferStartingAddress = ro_bo(bo, start_offset);5790#if GFX_VER >= 85791vb.BufferSize = end_offset - start_offset;5792#endif5793vb.VertexBufferIndex = buffer_id;5794vb.BufferPitch = stride;5795#if GFX_VER >= 75796vb.AddressModifyEnable = true;5797#endif5798#if GFX_VER >= 65799vb.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);5800#endif5801#if GFX_VER < 85802vb.BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA;5803vb.InstanceDataStepRate = step_rate;5804#if GFX_VER >= 55805vb.EndAddress = ro_bo(bo, end_offset - 1);5806#endif5807#endif5808}5809*map += vb_dwords;5810}58115812#if GFX_VER >= 65813static uint32_t5814determine_sample_mask(struct crocus_context *ice)5815{5816uint32_t num_samples = ice->state.framebuffer.samples;58175818if (num_samples <= 1)5819return 1;58205821uint32_t fb_mask = (1 << num_samples) - 1;5822return ice->state.sample_mask & fb_mask;5823}5824#endif58255826static void5827crocus_upload_dirty_render_state(struct crocus_context *ice,5828struct crocus_batch *batch,5829const struct pipe_draw_info *draw)5830{5831uint64_t dirty = ice->state.dirty;5832uint64_t stage_dirty = ice->state.stage_dirty;58335834if (!(dirty & CROCUS_ALL_DIRTY_FOR_RENDER) &&5835!(stage_dirty & CROCUS_ALL_STAGE_DIRTY_FOR_RENDER))5836return;58375838if (dirty & CROCUS_DIRTY_VF_STATISTICS) {5839crocus_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf) {5840vf.StatisticsEnable = true;5841}5842}58435844#if GFX_VER <= 55845if (stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS |5846CROCUS_STAGE_DIRTY_CONSTANTS_FS)) {5847bool ret = calculate_curbe_offsets(batch);5848if (ret) {5849dirty |= CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_WM | CROCUS_DIRTY_CLIP;5850stage_dirty |= CROCUS_STAGE_DIRTY_VS;5851}5852}58535854if (dirty & (CROCUS_DIRTY_GEN4_CURBE | CROCUS_DIRTY_RASTER) ||5855stage_dirty & CROCUS_STAGE_DIRTY_VS) {5856bool ret = crocus_calculate_urb_fence(batch, ice->curbe.total_size,5857brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->urb_entry_size,5858((struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data)->urb_entry_size);5859if (ret)5860dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;5861}5862#endif5863if (dirty & CROCUS_DIRTY_CC_VIEWPORT) {5864const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;5865uint32_t cc_vp_address;58665867/* XXX: could avoid streaming for depth_clip [0,1] case. */5868uint32_t *cc_vp_map =5869stream_state(batch,58704 * ice->state.num_viewports *5871GENX(CC_VIEWPORT_length), 32, &cc_vp_address);5872for (int i = 0; i < ice->state.num_viewports; i++) {5873float zmin, zmax;5874crocus_viewport_zmin_zmax(&ice->state.viewports[i], cso_rast->cso.clip_halfz,5875ice->state.window_space_position,5876&zmin, &zmax);5877if (cso_rast->cso.depth_clip_near)5878zmin = 0.0;5879if (cso_rast->cso.depth_clip_far)5880zmax = 1.0;58815882crocus_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {5883ccv.MinimumDepth = zmin;5884ccv.MaximumDepth = zmax;5885}58865887cc_vp_map += GENX(CC_VIEWPORT_length);5888}58895890#if GFX_VER >= 75891crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {5892ptr.CCViewportPointer = cc_vp_address;5893}5894#elif GFX_VER == 65895crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {5896vp.CCViewportStateChange = 1;5897vp.PointertoCC_VIEWPORT = cc_vp_address;5898}5899#else5900ice->state.cc_vp_address = cc_vp_address;5901dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;5902#endif5903}59045905if (dirty & CROCUS_DIRTY_SF_CL_VIEWPORT) {5906struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;5907#if GFX_VER >= 75908uint32_t sf_cl_vp_address;5909uint32_t *vp_map =5910stream_state(batch,59114 * ice->state.num_viewports *5912GENX(SF_CLIP_VIEWPORT_length), 64, &sf_cl_vp_address);5913#else5914uint32_t *vp_map =5915stream_state(batch,59164 * ice->state.num_viewports * GENX(SF_VIEWPORT_length),591732, &ice->state.sf_vp_address);5918uint32_t *clip_map =5919stream_state(batch,59204 * ice->state.num_viewports * GENX(CLIP_VIEWPORT_length),592132, &ice->state.clip_vp_address);5922#endif59235924for (unsigned i = 0; i < ice->state.num_viewports; i++) {5925const struct pipe_viewport_state *state = &ice->state.viewports[i];5926float gb_xmin, gb_xmax, gb_ymin, gb_ymax;59275928#if GFX_VER == 85929float vp_xmin = viewport_extent(state, 0, -1.0f);5930float vp_xmax = viewport_extent(state, 0, 1.0f);5931float vp_ymin = viewport_extent(state, 1, -1.0f);5932float vp_ymax = viewport_extent(state, 1, 1.0f);5933#endif5934intel_calculate_guardband_size(cso_fb->width, cso_fb->height,5935state->scale[0], state->scale[1],5936state->translate[0], state->translate[1],5937&gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);5938#if GFX_VER >= 75939crocus_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp)5940#else5941crocus_pack_state(GENX(SF_VIEWPORT), vp_map, vp)5942#endif5943{5944vp.ViewportMatrixElementm00 = state->scale[0];5945vp.ViewportMatrixElementm11 = state->scale[1];5946vp.ViewportMatrixElementm22 = state->scale[2];5947vp.ViewportMatrixElementm30 = state->translate[0];5948vp.ViewportMatrixElementm31 = state->translate[1];5949vp.ViewportMatrixElementm32 = state->translate[2];5950#if GFX_VER < 65951struct pipe_scissor_state scissor;5952crocus_fill_scissor_rect(ice, 0, &scissor);5953vp.ScissorRectangle.ScissorRectangleXMin = scissor.minx;5954vp.ScissorRectangle.ScissorRectangleXMax = scissor.maxx;5955vp.ScissorRectangle.ScissorRectangleYMin = scissor.miny;5956vp.ScissorRectangle.ScissorRectangleYMax = scissor.maxy;5957#endif59585959#if GFX_VER >= 75960vp.XMinClipGuardband = gb_xmin;5961vp.XMaxClipGuardband = gb_xmax;5962vp.YMinClipGuardband = gb_ymin;5963vp.YMaxClipGuardband = gb_ymax;5964#endif5965#if GFX_VER == 85966vp.XMinViewPort = MAX2(vp_xmin, 0);5967vp.XMaxViewPort = MIN2(vp_xmax, cso_fb->width) - 1;5968vp.YMinViewPort = MAX2(vp_ymin, 0);5969vp.YMaxViewPort = MIN2(vp_ymax, cso_fb->height) - 1;5970#endif5971}5972#if GFX_VER < 75973crocus_pack_state(GENX(CLIP_VIEWPORT), clip_map, clip) {5974clip.XMinClipGuardband = gb_xmin;5975clip.XMaxClipGuardband = gb_xmax;5976clip.YMinClipGuardband = gb_ymin;5977clip.YMaxClipGuardband = gb_ymax;5978}5979#endif5980#if GFX_VER >= 75981vp_map += GENX(SF_CLIP_VIEWPORT_length);5982#else5983vp_map += GENX(SF_VIEWPORT_length);5984clip_map += GENX(CLIP_VIEWPORT_length);5985#endif5986}5987#if GFX_VER >= 75988crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {5989ptr.SFClipViewportPointer = sf_cl_vp_address;5990}5991#elif GFX_VER == 65992crocus_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {5993vp.SFViewportStateChange = 1;5994vp.CLIPViewportStateChange = 1;5995vp.PointertoCLIP_VIEWPORT = ice->state.clip_vp_address;5996vp.PointertoSF_VIEWPORT = ice->state.sf_vp_address;5997}5998#endif5999}60006001#if GFX_VER >= 66002if (dirty & CROCUS_DIRTY_GEN6_URB) {6003#if GFX_VER == 66004bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL6005|| ice->shaders.ff_gs_prog;60066007struct brw_vue_prog_data *vue_prog_data =6008(void *) ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;6009const unsigned vs_size = vue_prog_data->urb_entry_size;6010unsigned gs_size = vs_size;6011if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {6012struct brw_vue_prog_data *gs_vue_prog_data =6013(void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;6014gs_size = gs_vue_prog_data->urb_entry_size;6015}60166017genX(crocus_upload_urb)(batch, vs_size, gs_present, gs_size);6018#endif6019#if GFX_VER >= 76020const struct intel_device_info *devinfo = &batch->screen->devinfo;6021bool gs_present = ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL;6022bool tess_present = ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL;6023unsigned entry_size[4];60246025for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {6026if (!ice->shaders.prog[i]) {6027entry_size[i] = 1;6028} else {6029struct brw_vue_prog_data *vue_prog_data =6030(void *) ice->shaders.prog[i]->prog_data;6031entry_size[i] = vue_prog_data->urb_entry_size;6032}6033assert(entry_size[i] != 0);6034}60356036/* If we're just switching between programs with the same URB requirements,6037* skip the rest of the logic.6038*/6039bool no_change = false;6040if (ice->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&6041ice->urb.gs_present == gs_present &&6042ice->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&6043ice->urb.tess_present == tess_present &&6044ice->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&6045ice->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {6046no_change = true;6047}60486049if (!no_change) {6050ice->urb.vsize = entry_size[MESA_SHADER_VERTEX];6051ice->urb.gs_present = gs_present;6052ice->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];6053ice->urb.tess_present = tess_present;6054ice->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];6055ice->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];60566057unsigned entries[4];6058unsigned start[4];6059bool constrained;6060intel_get_urb_config(devinfo,6061batch->screen->l3_config_3d,6062tess_present,6063gs_present,6064entry_size,6065entries, start, NULL, &constrained);60666067#if GFX_VER == 76068if (GFX_VERx10 < 75 && !devinfo->is_baytrail)6069gen7_emit_vs_workaround_flush(batch);6070#endif6071for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {6072crocus_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {6073urb._3DCommandSubOpcode += i;6074urb.VSURBStartingAddress = start[i];6075urb.VSURBEntryAllocationSize = entry_size[i] - 1;6076urb.VSNumberofURBEntries = entries[i];6077}6078}6079}6080#endif6081}60826083if (dirty & CROCUS_DIRTY_GEN6_BLEND_STATE) {6084struct crocus_blend_state *cso_blend = ice->state.cso_blend;6085struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;6086struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;60876088STATIC_ASSERT(GENX(BLEND_STATE_ENTRY_length) == 2);6089int rt_dwords =6090MAX2(cso_fb->nr_cbufs, 1) * GENX(BLEND_STATE_ENTRY_length);6091#if GFX_VER >= 86092rt_dwords += GENX(BLEND_STATE_length);6093#endif6094uint32_t blend_offset;6095uint32_t *blend_map =6096stream_state(batch,60974 * rt_dwords, 64, &blend_offset);60986099#if GFX_VER >= 86100struct GENX(BLEND_STATE) be = { 0 };6101{6102#else6103for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {6104struct GENX(BLEND_STATE_ENTRY) entry = { 0 };6105#define be entry6106#endif61076108be.AlphaTestEnable = cso_zsa->cso.alpha_enabled;6109be.AlphaTestFunction = translate_compare_func(cso_zsa->cso.alpha_func);6110be.AlphaToCoverageEnable = cso_blend->cso.alpha_to_coverage;6111be.AlphaToOneEnable = cso_blend->cso.alpha_to_one;6112be.AlphaToCoverageDitherEnable = GFX_VER >= 7 && cso_blend->cso.alpha_to_coverage;6113be.ColorDitherEnable = cso_blend->cso.dither;61146115#if GFX_VER >= 86116for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {6117struct GENX(BLEND_STATE_ENTRY) entry = { 0 };6118#else6119{6120#endif6121const struct pipe_rt_blend_state *rt =6122&cso_blend->cso.rt[cso_blend->cso.independent_blend_enable ? i : 0];61236124be.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &entry, cso_blend, i) ||6125be.IndependentAlphaBlendEnable;61266127if (GFX_VER >= 8 || can_emit_logic_op(ice)) {6128entry.LogicOpEnable = cso_blend->cso.logicop_enable;6129entry.LogicOpFunction = cso_blend->cso.logicop_func;6130}61316132entry.ColorClampRange = COLORCLAMP_RTFORMAT;6133entry.PreBlendColorClampEnable = true;6134entry.PostBlendColorClampEnable = true;61356136entry.WriteDisableRed = !(rt->colormask & PIPE_MASK_R);6137entry.WriteDisableGreen = !(rt->colormask & PIPE_MASK_G);6138entry.WriteDisableBlue = !(rt->colormask & PIPE_MASK_B);6139entry.WriteDisableAlpha = !(rt->colormask & PIPE_MASK_A);61406141#if GFX_VER >= 86142GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);6143#else6144GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);6145#endif6146}6147}6148#if GFX_VER >= 86149GENX(BLEND_STATE_pack)(NULL, blend_map, &be);6150#endif6151#if GFX_VER < 76152crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {6153ptr.PointertoBLEND_STATE = blend_offset;6154ptr.BLEND_STATEChange = true;6155}6156#else6157crocus_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {6158ptr.BlendStatePointer = blend_offset;6159#if GFX_VER >= 86160ptr.BlendStatePointerValid = true;6161#endif6162}6163#endif6164}6165#endif61666167if (dirty & CROCUS_DIRTY_COLOR_CALC_STATE) {6168struct crocus_depth_stencil_alpha_state *cso = ice->state.cso_zsa;6169UNUSED struct crocus_blend_state *cso_blend = ice->state.cso_blend;6170struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;6171uint32_t cc_offset;6172void *cc_map =6173stream_state(batch,6174sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),617564, &cc_offset);6176#if GFX_VER <= 56177dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;6178#endif6179_crocus_pack_state(batch, GENX(COLOR_CALC_STATE), cc_map, cc) {6180cc.AlphaTestFormat = ALPHATEST_FLOAT32;6181cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;61826183#if GFX_VER <= 561846185set_depth_stencil_bits(ice, &cc);61866187if (cso_blend->cso.logicop_enable) {6188if (can_emit_logic_op(ice)) {6189cc.LogicOpEnable = cso_blend->cso.logicop_enable;6190cc.LogicOpFunction = cso_blend->cso.logicop_func;6191}6192}6193cc.ColorDitherEnable = cso_blend->cso.dither;61946195cc.IndependentAlphaBlendEnable = set_blend_entry_bits(batch, &cc, cso_blend, 0);61966197if (cso->cso.alpha_enabled && ice->state.framebuffer.nr_cbufs <= 1) {6198cc.AlphaTestEnable = cso->cso.alpha_enabled;6199cc.AlphaTestFunction = translate_compare_func(cso->cso.alpha_func);6200}6201cc.StatisticsEnable = ice->state.stats_wm ? 1 : 0;6202cc.CCViewportStatePointer = ro_bo(batch->state.bo, ice->state.cc_vp_address);6203#else6204cc.AlphaTestFormat = ALPHATEST_FLOAT32;6205cc.AlphaReferenceValueAsFLOAT32 = cso->cso.alpha_ref_value;62066207cc.BlendConstantColorRed = ice->state.blend_color.color[0];6208cc.BlendConstantColorGreen = ice->state.blend_color.color[1];6209cc.BlendConstantColorBlue = ice->state.blend_color.color[2];6210cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];6211#endif6212cc.StencilReferenceValue = p_stencil_refs->ref_value[0];6213cc.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];6214}6215ice->shaders.cc_offset = cc_offset;6216#if GFX_VER >= 66217crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {6218ptr.ColorCalcStatePointer = cc_offset;6219#if GFX_VER != 76220ptr.ColorCalcStatePointerValid = true;6221#endif6222}6223#endif6224}6225#if GFX_VER <= 56226if (dirty & CROCUS_DIRTY_GEN4_CONSTANT_COLOR) {6227crocus_emit_cmd(batch, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {6228blend_cc.BlendConstantColorRed = ice->state.blend_color.color[0];6229blend_cc.BlendConstantColorGreen = ice->state.blend_color.color[1];6230blend_cc.BlendConstantColorBlue = ice->state.blend_color.color[2];6231blend_cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];6232}6233}6234#endif6235for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {6236if (!(stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << stage)))6237continue;62386239struct crocus_shader_state *shs = &ice->state.shaders[stage];6240struct crocus_compiled_shader *shader = ice->shaders.prog[stage];62416242if (!shader)6243continue;62446245if (shs->sysvals_need_upload)6246upload_sysvals(ice, stage);62476248#if GFX_VER <= 56249dirty |= CROCUS_DIRTY_GEN4_CURBE;6250#endif6251#if GFX_VER >= 76252struct push_bos push_bos = {};6253setup_constant_buffers(ice, batch, stage, &push_bos);62546255emit_push_constant_packets(ice, batch, stage, &push_bos);6256#endif6257}62586259for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {6260if (stage_dirty & (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage)) {6261if (ice->shaders.prog[stage]) {6262#if GFX_VER <= 66263dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;6264#endif6265crocus_populate_binding_table(ice, batch, stage, false);6266ice->shaders.prog[stage]->bind_bo_offset =6267crocus_upload_binding_table(ice, batch,6268ice->shaders.prog[stage]->surf_offset,6269ice->shaders.prog[stage]->bt.size_bytes);62706271#if GFX_VER >= 76272crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {6273ptr._3DCommandSubOpcode = 38 + stage;6274ptr.PointertoVSBindingTable = ice->shaders.prog[stage]->bind_bo_offset;6275}6276#endif6277#if GFX_VER == 66278} else if (stage == MESA_SHADER_GEOMETRY && ice->shaders.ff_gs_prog) {6279dirty |= CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS;6280crocus_populate_binding_table(ice, batch, stage, true);6281ice->shaders.ff_gs_prog->bind_bo_offset =6282crocus_upload_binding_table(ice, batch,6283ice->shaders.ff_gs_prog->surf_offset,6284ice->shaders.ff_gs_prog->bt.size_bytes);6285#endif6286}6287}6288}6289#if GFX_VER <= 66290if (dirty & CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS) {6291struct crocus_compiled_shader *gs = ice->shaders.prog[MESA_SHADER_GEOMETRY];6292if (gs == NULL)6293gs = ice->shaders.ff_gs_prog;6294crocus_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), ptr) {6295ptr.PointertoVSBindingTable = ice->shaders.prog[MESA_SHADER_VERTEX]->bind_bo_offset;6296ptr.PointertoPSBindingTable = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bind_bo_offset;6297#if GFX_VER == 66298ptr.VSBindingTableChange = true;6299ptr.PSBindingTableChange = true;6300ptr.GSBindingTableChange = gs ? true : false;6301ptr.PointertoGSBindingTable = gs ? gs->bind_bo_offset : 0;6302#endif6303}6304}6305#endif63066307bool sampler_updates = dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS;6308for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {6309if (!(stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage)) ||6310!ice->shaders.prog[stage])6311continue;63126313crocus_upload_sampler_states(ice, batch, stage);63146315sampler_updates = true;63166317#if GFX_VER >= 76318struct crocus_shader_state *shs = &ice->state.shaders[stage];63196320crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {6321ptr._3DCommandSubOpcode = 43 + stage;6322ptr.PointertoVSSamplerState = shs->sampler_offset;6323}6324#endif6325}63266327if (sampler_updates) {6328#if GFX_VER == 66329struct crocus_shader_state *shs_vs = &ice->state.shaders[MESA_SHADER_VERTEX];6330struct crocus_shader_state *shs_gs = &ice->state.shaders[MESA_SHADER_GEOMETRY];6331struct crocus_shader_state *shs_fs = &ice->state.shaders[MESA_SHADER_FRAGMENT];6332crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ptr) {6333if (ice->shaders.prog[MESA_SHADER_VERTEX] &&6334(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||6335stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_VERTEX))) {6336ptr.VSSamplerStateChange = true;6337ptr.PointertoVSSamplerState = shs_vs->sampler_offset;6338}6339if (ice->shaders.prog[MESA_SHADER_GEOMETRY] &&6340(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||6341stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_GEOMETRY))) {6342ptr.GSSamplerStateChange = true;6343ptr.PointertoGSSamplerState = shs_gs->sampler_offset;6344}6345if (ice->shaders.prog[MESA_SHADER_FRAGMENT] &&6346(dirty & CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS ||6347stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << MESA_SHADER_FRAGMENT))) {6348ptr.PSSamplerStateChange = true;6349ptr.PointertoPSSamplerState = shs_fs->sampler_offset;6350}6351}6352#endif6353}63546355#if GFX_VER >= 66356if (dirty & CROCUS_DIRTY_GEN6_MULTISAMPLE) {6357crocus_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {6358ms.PixelLocation =6359ice->state.cso_rast->cso.half_pixel_center ? CENTER : UL_CORNER;6360if (ice->state.framebuffer.samples > 0)6361ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;6362#if GFX_VER == 66363INTEL_SAMPLE_POS_4X(ms.Sample);6364#elif GFX_VER == 76365switch (ice->state.framebuffer.samples) {6366case 1:6367INTEL_SAMPLE_POS_1X(ms.Sample);6368break;6369case 2:6370INTEL_SAMPLE_POS_2X(ms.Sample);6371break;6372case 4:6373INTEL_SAMPLE_POS_4X(ms.Sample);6374break;6375case 8:6376INTEL_SAMPLE_POS_8X(ms.Sample);6377break;6378default:6379break;6380}6381#endif6382}6383}63846385if (dirty & CROCUS_DIRTY_GEN6_SAMPLE_MASK) {6386crocus_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {6387ms.SampleMask = determine_sample_mask(ice);6388}6389}6390#endif63916392#if GFX_VER >= 76393struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];6394if ((stage_dirty & CROCUS_STAGE_DIRTY_FS) && shader) {6395struct brw_stage_prog_data *prog_data = shader->prog_data;6396struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;63976398crocus_emit_cmd(batch, GENX(3DSTATE_PS), ps) {63996400/* Initialize the execution mask with VMask. Otherwise, derivatives are6401* incorrect for subspans where some of the pixels are unlit. We believe6402* the bit just didn't take effect in previous generations.6403*/6404ps.VectorMaskEnable = GFX_VER >= 8;64056406ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;6407ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;6408ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;64096410ps.DispatchGRFStartRegisterForConstantSetupData0 =6411brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);6412ps.DispatchGRFStartRegisterForConstantSetupData1 =6413brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);6414ps.DispatchGRFStartRegisterForConstantSetupData2 =6415brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);64166417ps.KernelStartPointer0 = KSP(ice, shader) +6418brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);6419ps.KernelStartPointer1 = KSP(ice, shader) +6420brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);6421ps.KernelStartPointer2 = KSP(ice, shader) +6422brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);64236424#if GFX_VERx10 == 756425ps.SampleMask = determine_sample_mask(ice);6426#endif6427// XXX: WABTPPrefetchDisable, see above, drop at C06428ps.BindingTableEntryCount = shader->bt.size_bytes / 4;6429ps.FloatingPointMode = prog_data->use_alt_mode;6430#if GFX_VER >= 86431ps.MaximumNumberofThreadsPerPSD = 64 - 2;6432#else6433ps.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;6434#endif64356436ps.PushConstantEnable = prog_data->ubo_ranges[0].length > 0;64376438#if GFX_VER < 86439ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;6440ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend && ice->state.cso_blend->dual_color_blending;6441ps.AttributeEnable = (wm_prog_data->num_varying_inputs != 0);6442#endif6443/* From the documentation for this packet:6444* "If the PS kernel does not need the Position XY Offsets to6445* compute a Position Value, then this field should be programmed6446* to POSOFFSET_NONE."6447*6448* "SW Recommendation: If the PS kernel needs the Position Offsets6449* to compute a Position XY value, this field should match Position6450* ZW Interpolation Mode to ensure a consistent position.xyzw6451* computation."6452*6453* We only require XY sample offsets. So, this recommendation doesn't6454* look useful at the moment. We might need this in future.6455*/6456ps.PositionXYOffsetSelect =6457wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;64586459if (wm_prog_data->base.total_scratch) {6460struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT);6461ps.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;6462ps.ScratchSpaceBasePointer = rw_bo(bo, 0);6463}6464}6465#if GFX_VER == 86466const struct shader_info *fs_info =6467crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);6468crocus_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {6469psx.PixelShaderValid = true;6470psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;6471psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;6472psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;6473psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;6474psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;6475psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;64766477/* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */6478if (wm_prog_data->uses_sample_mask)6479psx.PixelShaderUsesInputCoverageMask = true;64806481psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;64826483/* The stricter cross-primitive coherency guarantees that the hardware6484* gives us with the "Accesses UAV" bit set for at least one shader stage6485* and the "UAV coherency required" bit set on the 3DPRIMITIVE command6486* are redundant within the current image, atomic counter and SSBO GL6487* APIs, which all have very loose ordering and coherency requirements6488* and generally rely on the application to insert explicit barriers when6489* a shader invocation is expected to see the memory writes performed by6490* the invocations of some previous primitive. Regardless of the value6491* of "UAV coherency required", the "Accesses UAV" bits will implicitly6492* cause an in most cases useless DC flush when the lowermost stage with6493* the bit set finishes execution.6494*6495* It would be nice to disable it, but in some cases we can't because on6496* Gfx8+ it also has an influence on rasterization via the PS UAV-only6497* signal (which could be set independently from the coherency mechanism6498* in the 3DSTATE_WM command on Gfx7), and because in some cases it will6499* determine whether the hardware skips execution of the fragment shader6500* or not via the ThreadDispatchEnable signal. However if we know that6501* GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and6502* GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any6503* difference so we may just disable it here.6504*6505* Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't6506* take into account KillPixels when no depth or stencil writes are6507* enabled. In order for occlusion queries to work correctly with no6508* attachments, we need to force-enable here.6509*6510*/6511if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&6512!(has_writeable_rt(ice->state.cso_blend, fs_info)))6513psx.PixelShaderHasUAV = true;6514}6515#endif6516}6517#endif65186519#if GFX_VER >= 76520if (ice->state.streamout_active) {6521if (dirty & CROCUS_DIRTY_GEN7_SO_BUFFERS) {6522for (int i = 0; i < 4; i++) {6523struct crocus_stream_output_target *tgt =6524(void *) ice->state.so_target[i];65256526if (!tgt) {6527crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {6528sob.SOBufferIndex = i;6529}6530continue;6531}6532struct crocus_resource *res = (void *) tgt->base.buffer;6533uint32_t start = tgt->base.buffer_offset;6534#if GFX_VER < 86535uint32_t end = ALIGN(start + tgt->base.buffer_size, 4);6536#endif6537crocus_emit_cmd(batch, GENX(3DSTATE_SO_BUFFER), sob) {6538sob.SOBufferIndex = i;65396540sob.SurfaceBaseAddress = rw_bo(res->bo, start);6541#if GFX_VER < 86542sob.SurfacePitch = tgt->stride;6543sob.SurfaceEndAddress = rw_bo(res->bo, end);6544#else6545sob.SOBufferEnable = true;6546sob.StreamOffsetWriteEnable = true;6547sob.StreamOutputBufferOffsetAddressEnable = true;6548sob.MOCS = crocus_mocs(res->bo, &batch->screen->isl_dev);65496550sob.SurfaceSize = MAX2(tgt->base.buffer_size / 4, 1) - 1;6551sob.StreamOutputBufferOffsetAddress =6552rw_bo(crocus_resource_bo(&tgt->offset_res->base.b), tgt->offset_offset);6553if (tgt->zero_offset) {6554sob.StreamOffset = 0;6555tgt->zero_offset = false;6556} else6557sob.StreamOffset = 0xFFFFFFFF; /* not offset, see above */6558#endif6559}6560}6561}65626563if ((dirty & CROCUS_DIRTY_SO_DECL_LIST) && ice->state.streamout) {6564uint32_t *decl_list =6565ice->state.streamout + GENX(3DSTATE_STREAMOUT_length);6566crocus_batch_emit(batch, decl_list, 4 * ((decl_list[0] & 0xff) + 2));6567}65686569if (dirty & CROCUS_DIRTY_STREAMOUT) {6570const struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;65716572uint32_t dynamic_sol[GENX(3DSTATE_STREAMOUT_length)];6573crocus_pack_command(GENX(3DSTATE_STREAMOUT), dynamic_sol, sol) {6574sol.SOFunctionEnable = true;6575sol.SOStatisticsEnable = true;65766577sol.RenderingDisable = cso_rast->cso.rasterizer_discard &&6578!ice->state.prims_generated_query_active;6579sol.ReorderMode = cso_rast->cso.flatshade_first ? LEADING : TRAILING;6580}65816582assert(ice->state.streamout);65836584crocus_emit_merge(batch, ice->state.streamout, dynamic_sol,6585GENX(3DSTATE_STREAMOUT_length));6586}6587} else {6588if (dirty & CROCUS_DIRTY_STREAMOUT) {6589crocus_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), sol);6590}6591}6592#endif6593#if GFX_VER == 66594if (ice->state.streamout_active) {6595if (dirty & CROCUS_DIRTY_GEN6_SVBI) {6596crocus_emit_so_svbi(ice);6597}6598}6599#endif66006601if (dirty & CROCUS_DIRTY_CLIP) {6602#if GFX_VER < 66603const struct brw_clip_prog_data *clip_prog_data = (struct brw_clip_prog_data *)ice->shaders.clip_prog->prog_data;6604struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;66056606uint32_t *clip_ptr = stream_state(batch, GENX(CLIP_STATE_length) * 4, 32, &ice->shaders.clip_offset);6607dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;6608_crocus_pack_state(batch, GENX(CLIP_STATE), clip_ptr, clip) {6609clip.KernelStartPointer = KSP(ice, ice->shaders.clip_prog);6610clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;6611clip.SingleProgramFlow = true;6612clip.GRFRegisterCount = DIV_ROUND_UP(clip_prog_data->total_grf, 16) - 1;66136614clip.VertexURBEntryReadLength = clip_prog_data->urb_read_length;6615clip.ConstantURBEntryReadLength = clip_prog_data->curb_read_length;66166617clip.DispatchGRFStartRegisterForURBData = 1;6618clip.VertexURBEntryReadOffset = 0;6619clip.ConstantURBEntryReadOffset = ice->curbe.clip_start * 2;66206621clip.NumberofURBEntries = batch->ice->urb.nr_clip_entries;6622clip.URBEntryAllocationSize = batch->ice->urb.vsize - 1;66236624if (batch->ice->urb.nr_clip_entries >= 10) {6625/* Half of the URB entries go to each thread, and it has to be an6626* even number.6627*/6628assert(batch->ice->urb.nr_clip_entries % 2 == 0);66296630/* Although up to 16 concurrent Clip threads are allowed on Ironlake,6631* only 2 threads can output VUEs at a time.6632*/6633clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1;6634} else {6635assert(batch->ice->urb.nr_clip_entries >= 5);6636clip.MaximumNumberofThreads = 1 - 1;6637}6638clip.VertexPositionSpace = VPOS_NDCSPACE;6639clip.UserClipFlagsMustClipEnable = true;6640clip.GuardbandClipTestEnable = true;66416642clip.ClipperViewportStatePointer = ro_bo(batch->state.bo, ice->state.clip_vp_address);6643clip.ScreenSpaceViewportXMin = -1.0;6644clip.ScreenSpaceViewportXMax = 1.0;6645clip.ScreenSpaceViewportYMin = -1.0;6646clip.ScreenSpaceViewportYMax = 1.0;6647clip.ViewportXYClipTestEnable = true;6648clip.ViewportZClipTestEnable = (cso_state->depth_clip_near || cso_state->depth_clip_far);66496650#if GFX_VER == 5 || GFX_VERx10 == 456651clip.UserClipDistanceClipTestEnableBitmask = cso_state->clip_plane_enable;6652#else6653/* Up to 6 actual clip flags, plus the 7th for the negative RHW6654* workaround.6655*/6656clip.UserClipDistanceClipTestEnableBitmask = (cso_state->clip_plane_enable & 0x3f) | 0x40;6657#endif66586659clip.APIMode = cso_state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;6660clip.GuardbandClipTestEnable = true;66616662clip.ClipMode = clip_prog_data->clip_mode;6663#if GFX_VERx10 == 456664clip.NegativeWClipTestEnable = true;6665#endif6666}66676668#else //if GFX_VER >= 66669struct crocus_rasterizer_state *cso_rast = ice->state.cso_rast;6670const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data );6671struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;6672bool gs_or_tes = ice->shaders.prog[MESA_SHADER_GEOMETRY] ||6673ice->shaders.prog[MESA_SHADER_TESS_EVAL];6674bool points_or_lines = cso_rast->fill_mode_point_or_line ||6675(gs_or_tes ? ice->shaders.output_topology_is_points_or_lines6676: ice->state.prim_is_points_or_lines);6677uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];6678crocus_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {6679cl.StatisticsEnable = ice->state.statistics_counters_enabled;6680if (cso_rast->cso.rasterizer_discard)6681cl.ClipMode = CLIPMODE_REJECT_ALL;6682else if (ice->state.window_space_position)6683cl.ClipMode = CLIPMODE_ACCEPT_ALL;6684else6685cl.ClipMode = CLIPMODE_NORMAL;66866687cl.PerspectiveDivideDisable = ice->state.window_space_position;6688cl.ViewportXYClipTestEnable = !points_or_lines;66896690cl.UserClipDistanceCullTestEnableBitmask =6691brw_vue_prog_data(ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data)->cull_distance_mask;66926693if (wm_prog_data->barycentric_interp_modes &6694BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)6695cl.NonPerspectiveBarycentricEnable = true;66966697cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;6698cl.MaximumVPIndex = ice->state.num_viewports - 1;6699}6700crocus_emit_merge(batch, cso_rast->clip, dynamic_clip,6701ARRAY_SIZE(cso_rast->clip));6702#endif6703}67046705if (stage_dirty & CROCUS_STAGE_DIRTY_VS) {6706struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_VERTEX];6707const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);6708const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;6709#if GFX_VER == 76710if (batch->screen->devinfo.is_ivybridge)6711gen7_emit_vs_workaround_flush(batch);6712#endif671367146715#if GFX_VER == 66716struct push_bos push_bos = {};6717setup_constant_buffers(ice, batch, MESA_SHADER_VERTEX, &push_bos);67186719emit_push_constant_packets(ice, batch, MESA_SHADER_VERTEX, &push_bos);6720#endif6721#if GFX_VER >= 66722crocus_emit_cmd(batch, GENX(3DSTATE_VS), vs)6723#else6724uint32_t *vs_ptr = stream_state(batch,6725GENX(VS_STATE_length) * 4, 32, &ice->shaders.vs_offset);6726dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;6727_crocus_pack_state(batch, GENX(VS_STATE), vs_ptr, vs)6728#endif6729{6730INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);67316732vs.MaximumNumberofThreads = batch->screen->devinfo.max_vs_threads - 1;67336734#if GFX_VER < 66735vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;6736vs.ConstantURBEntryReadLength = vue_prog_data->base.curb_read_length;6737vs.ConstantURBEntryReadOffset = ice->curbe.vs_start * 2;67386739vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0);6740vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;67416742vs.MaximumNumberofThreads =6743CLAMP(batch->ice->urb.nr_vs_entries / 2, 1, batch->screen->devinfo.max_vs_threads) - 1;6744vs.StatisticsEnable = false;6745vs.SamplerStatePointer = ro_bo(batch->state.bo, ice->state.shaders[MESA_SHADER_VERTEX].sampler_offset);6746#endif6747#if GFX_VER == 56748/* Force single program flow on Ironlake. We cannot reliably get6749* all applications working without it. See:6750* https://bugs.freedesktop.org/show_bug.cgi?id=291726751*6752* The most notable and reliably failing application is the Humus6753* demo "CelShading"6754*/6755vs.SingleProgramFlow = true;6756vs.SamplerCount = 0; /* hardware requirement */67576758#endif6759#if GFX_VER >= 86760vs.SIMD8DispatchEnable =6761vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;67626763vs.UserClipDistanceCullTestEnableBitmask =6764vue_prog_data->cull_distance_mask;6765#endif6766}67676768#if GFX_VER == 66769crocus_emit_pipe_control_flush(batch,6770"post VS const",6771PIPE_CONTROL_DEPTH_STALL |6772PIPE_CONTROL_INSTRUCTION_INVALIDATE |6773PIPE_CONTROL_STATE_CACHE_INVALIDATE);6774#endif6775}67766777if (stage_dirty & CROCUS_STAGE_DIRTY_GS) {6778struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_GEOMETRY];6779bool active = GFX_VER >= 6 && shader;6780#if GFX_VER == 66781struct push_bos push_bos = {};6782if (shader)6783setup_constant_buffers(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);67846785emit_push_constant_packets(ice, batch, MESA_SHADER_GEOMETRY, &push_bos);6786#endif6787#if GFX_VER >= 66788crocus_emit_cmd(batch, GENX(3DSTATE_GS), gs)6789#else6790uint32_t *gs_ptr = stream_state(batch,6791GENX(GS_STATE_length) * 4, 32, &ice->shaders.gs_offset);6792dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;6793_crocus_pack_state(batch, GENX(GS_STATE), gs_ptr, gs)6794#endif6795{6796#if GFX_VER >= 66797if (active) {6798const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(shader->prog_data);6799const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);6800const struct brw_stage_prog_data *prog_data = &gs_prog_data->base.base;68016802INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);6803#if GFX_VER >= 76804gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;6805gs.OutputTopology = gs_prog_data->output_topology;6806gs.ControlDataHeaderSize =6807gs_prog_data->control_data_header_size_hwords;68086809gs.InstanceControl = gs_prog_data->invocations - 1;6810gs.DispatchMode = vue_prog_data->dispatch_mode;68116812gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;68136814gs.ControlDataFormat = gs_prog_data->control_data_format;6815#endif68166817/* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between6818* Ivy Bridge and Haswell.6819*6820* On Ivy Bridge, setting this bit causes the vertices of a triangle6821* strip to be delivered to the geometry shader in an order that does6822* not strictly follow the OpenGL spec, but preserves triangle6823* orientation. For example, if the vertices are (1, 2, 3, 4, 5), then6824* the geometry shader sees triangles:6825*6826* (1, 2, 3), (2, 4, 3), (3, 4, 5)6827*6828* (Clearing the bit is even worse, because it fails to preserve6829* orientation).6830*6831* Triangle strips with adjacency always ordered in a way that preserves6832* triangle orientation but does not strictly follow the OpenGL spec,6833* regardless of the setting of this bit.6834*6835* On Haswell, both triangle strips and triangle strips with adjacency6836* are always ordered in a way that preserves triangle orientation.6837* Setting this bit causes the ordering to strictly follow the OpenGL6838* spec.6839*6840* So in either case we want to set the bit. Unfortunately on Ivy6841* Bridge this will get the order close to correct but not perfect.6842*/6843gs.ReorderMode = TRAILING;6844gs.MaximumNumberofThreads =6845GFX_VER == 8 ? (batch->screen->devinfo.max_gs_threads / 2 - 1) :6846(batch->screen->devinfo.max_gs_threads - 1);6847#if GFX_VER < 76848gs.SOStatisticsEnable = true;6849if (gs_prog_data->num_transform_feedback_bindings)6850gs.SVBIPayloadEnable = ice->state.streamout_active;68516852/* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it6853* was previously done for gen6.6854*6855* TODO: test with both disabled to see if the HW is behaving6856* as expected, like in gen7.6857*/6858gs.SingleProgramFlow = true;6859gs.VectorMaskEnable = true;6860#endif6861#if GFX_VER >= 86862gs.ExpectedVertexCount = gs_prog_data->vertices_in;68636864if (gs_prog_data->static_vertex_count != -1) {6865gs.StaticOutput = true;6866gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;6867}6868gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;68696870gs.UserClipDistanceCullTestEnableBitmask =6871vue_prog_data->cull_distance_mask;68726873const int urb_entry_write_offset = 1;6874const uint32_t urb_entry_output_length =6875DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -6876urb_entry_write_offset;68776878gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;6879gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);6880#endif6881}6882#endif6883#if GFX_VER <= 66884if (!active && ice->shaders.ff_gs_prog) {6885const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data;6886/* In gen6, transform feedback for the VS stage is done with an6887* ad-hoc GS program. This function provides the needed 3DSTATE_GS6888* for this.6889*/6890gs.KernelStartPointer = KSP(ice, ice->shaders.ff_gs_prog);6891gs.SingleProgramFlow = true;6892gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1;6893gs.VertexURBEntryReadLength = gs_prog_data->urb_read_length;68946895#if GFX_VER <= 56896gs.GRFRegisterCount =6897DIV_ROUND_UP(gs_prog_data->total_grf, 16) - 1;6898/* BRW_NEW_URB_FENCE */6899gs.NumberofURBEntries = batch->ice->urb.nr_gs_entries;6900gs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;6901gs.MaximumNumberofThreads = batch->ice->urb.nr_gs_entries >= 8 ? 1 : 0;6902gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;6903#else6904gs.Enable = true;6905gs.VectorMaskEnable = true;6906gs.SVBIPayloadEnable = true;6907gs.SVBIPostIncrementEnable = true;6908gs.SVBIPostIncrementValue = gs_prog_data->svbi_postincrement_value;6909gs.SOStatisticsEnable = true;6910gs.MaximumNumberofThreads = batch->screen->devinfo.max_gs_threads - 1;6911#endif6912}6913#endif6914if (!active && !ice->shaders.ff_gs_prog) {6915#if GFX_VER < 86916gs.DispatchGRFStartRegisterForURBData = 1;6917#if GFX_VER >= 76918gs.IncludeVertexHandles = true;6919#endif6920#endif6921}6922#if GFX_VER >= 66923gs.StatisticsEnable = true;6924#endif6925#if GFX_VER == 5 || GFX_VER == 66926gs.RenderingEnabled = true;6927#endif6928#if GFX_VER <= 56929gs.MaximumVPIndex = ice->state.num_viewports - 1;6930#endif6931}6932}69336934#if GFX_VER >= 76935if (stage_dirty & CROCUS_STAGE_DIRTY_TCS) {6936struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_CTRL];69376938if (shader) {6939const struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(shader->prog_data);6940const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);6941const struct brw_stage_prog_data *prog_data = &tcs_prog_data->base.base;69426943crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs) {6944INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);6945hs.InstanceCount = tcs_prog_data->instances - 1;6946hs.IncludeVertexHandles = true;6947hs.MaximumNumberofThreads = batch->screen->devinfo.max_tcs_threads - 1;6948}6949} else {6950crocus_emit_cmd(batch, GENX(3DSTATE_HS), hs);6951}69526953}69546955if (stage_dirty & CROCUS_STAGE_DIRTY_TES) {6956struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_TESS_EVAL];6957if (shader) {6958const struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(shader->prog_data);6959const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(shader->prog_data);6960const struct brw_stage_prog_data *prog_data = &tes_prog_data->base.base;69616962crocus_emit_cmd(batch, GENX(3DSTATE_TE), te) {6963te.Partitioning = tes_prog_data->partitioning;6964te.OutputTopology = tes_prog_data->output_topology;6965te.TEDomain = tes_prog_data->domain;6966te.TEEnable = true;6967te.MaximumTessellationFactorOdd = 63.0;6968te.MaximumTessellationFactorNotOdd = 64.0;6969};6970crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds) {6971INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);69726973ds.MaximumNumberofThreads = batch->screen->devinfo.max_tes_threads - 1;6974ds.ComputeWCoordinateEnable =6975tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;69766977#if GFX_VER >= 86978if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)6979ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;6980ds.UserClipDistanceCullTestEnableBitmask =6981vue_prog_data->cull_distance_mask;6982#endif6983};6984} else {6985crocus_emit_cmd(batch, GENX(3DSTATE_TE), te);6986crocus_emit_cmd(batch, GENX(3DSTATE_DS), ds);6987}6988}6989#endif6990if (dirty & CROCUS_DIRTY_RASTER) {69916992#if GFX_VER < 66993const struct brw_sf_prog_data *sf_prog_data = (struct brw_sf_prog_data *)ice->shaders.sf_prog->prog_data;6994struct pipe_rasterizer_state *cso_state = &ice->state.cso_rast->cso;6995uint32_t *sf_ptr = stream_state(batch,6996GENX(SF_STATE_length) * 4, 32, &ice->shaders.sf_offset);6997dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;6998_crocus_pack_state(batch, GENX(SF_STATE), sf_ptr, sf) {6999sf.KernelStartPointer = KSP(ice, ice->shaders.sf_prog);7000sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;7001sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;7002sf.DispatchGRFStartRegisterForURBData = 3;7003sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;7004sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;7005sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;7006sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;7007sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;70087009sf.SetupViewportStateOffset = ro_bo(batch->state.bo, ice->state.sf_vp_address);70107011sf.MaximumNumberofThreads =7012MIN2(GFX_VER == 5 ? 48 : 24, batch->ice->urb.nr_sf_entries) - 1;70137014sf.SpritePointEnable = cso_state->point_quad_rasterization;7015sf.DestinationOriginHorizontalBias = 0.5;7016sf.DestinationOriginVerticalBias = 0.5;70177018sf.LastPixelEnable = cso_state->line_last_pixel;7019sf.LineWidth = get_line_width(cso_state);7020sf.PointWidth = cso_state->point_size;7021sf.PointWidthSource = cso_state->point_size_per_vertex ? Vertex : State;7022#if GFX_VERx10 == 45 || GFX_VER >= 57023sf.AALineDistanceMode = AALINEDISTANCE_TRUE;7024#endif7025sf.ViewportTransformEnable = true;7026sf.FrontWinding = cso_state->front_ccw ? 1 : 0;7027sf.ScissorRectangleEnable = true;7028sf.CullMode = translate_cull_mode(cso_state->cull_face);70297030if (cso_state->flatshade_first) {7031sf.TriangleFanProvokingVertexSelect = 1;7032} else {7033sf.TriangleStripListProvokingVertexSelect = 2;7034sf.TriangleFanProvokingVertexSelect = 2;7035sf.LineStripListProvokingVertexSelect = 1;7036}7037}7038#else7039struct crocus_rasterizer_state *cso = ice->state.cso_rast;7040uint32_t dynamic_sf[GENX(3DSTATE_SF_length)];7041crocus_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) {7042sf.ViewportTransformEnable = !ice->state.window_space_position;70437044#if GFX_VER == 67045const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);7046uint32_t urb_entry_read_length;7047uint32_t urb_entry_read_offset;7048uint32_t point_sprite_enables;7049calculate_attr_overrides(ice, sf.Attribute, &point_sprite_enables,7050&urb_entry_read_length,7051&urb_entry_read_offset);7052sf.VertexURBEntryReadLength = urb_entry_read_length;7053sf.VertexURBEntryReadOffset = urb_entry_read_offset;7054sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;7055sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;7056sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;7057#endif70587059#if GFX_VER >= 6 && GFX_VER < 87060if (ice->state.framebuffer.samples > 1 && ice->state.cso_rast->cso.multisample)7061sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;7062#endif7063#if GFX_VER == 77064if (ice->state.framebuffer.zsbuf) {7065struct crocus_resource *zres, *sres;7066crocus_get_depth_stencil_resources(&batch->screen->devinfo,7067ice->state.framebuffer.zsbuf->texture,7068&zres, &sres);7069/* ANV thinks that the stencil-ness doesn't matter, this is just7070* about handling polygon offset scaling.7071*/7072sf.DepthBufferSurfaceFormat = zres ? isl_format_get_depth_format(zres->surf.format, false) : D16_UNORM;7073}7074#endif7075}7076crocus_emit_merge(batch, cso->sf, dynamic_sf,7077ARRAY_SIZE(dynamic_sf));7078#if GFX_VER == 87079crocus_batch_emit(batch, cso->raster, sizeof(cso->raster));7080#endif7081#endif7082}70837084if (dirty & CROCUS_DIRTY_WM) {7085struct crocus_rasterizer_state *cso = ice->state.cso_rast;7086const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);7087UNUSED bool writes_depth = wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;7088UNUSED const struct shader_info *fs_info =7089crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);70907091#if GFX_VER == 67092struct push_bos push_bos = {};7093setup_constant_buffers(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);70947095emit_push_constant_packets(ice, batch, MESA_SHADER_FRAGMENT, &push_bos);7096#endif7097#if GFX_VER >= 67098crocus_emit_cmd(batch, GENX(3DSTATE_WM), wm)7099#else7100uint32_t *wm_ptr = stream_state(batch,7101GENX(WM_STATE_length) * 4, 32, &ice->shaders.wm_offset);71027103dirty |= CROCUS_DIRTY_GEN5_PIPELINED_POINTERS;71047105_crocus_pack_state(batch, GENX(WM_STATE), wm_ptr, wm)7106#endif7107{7108#if GFX_VER <= 67109wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;7110wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;7111wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;7112#endif7113#if GFX_VER == 47114/* On gen4, we only have one shader kernel */7115if (brw_wm_state_has_ksp(wm, 0)) {7116wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]);7117wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);7118wm.DispatchGRFStartRegisterForConstantSetupData0 =7119wm_prog_data->base.dispatch_grf_start_reg;7120}7121#elif GFX_VER == 57122wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7123brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);7124wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7125brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);7126wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7127brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);71287129wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);7130wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);7131wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);71327133wm.DispatchGRFStartRegisterForConstantSetupData0 =7134wm_prog_data->base.dispatch_grf_start_reg;7135#elif GFX_VER == 67136wm.KernelStartPointer0 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7137brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);7138wm.KernelStartPointer1 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7139brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);7140wm.KernelStartPointer2 = KSP(ice, ice->shaders.prog[MESA_SHADER_FRAGMENT]) +7141brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);71427143wm.DispatchGRFStartRegisterForConstantSetupData0 =7144brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);7145wm.DispatchGRFStartRegisterForConstantSetupData1 =7146brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);7147wm.DispatchGRFStartRegisterForConstantSetupData2 =7148brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);7149#endif7150#if GFX_VER <= 57151wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;7152wm.ConstantURBEntryReadOffset = ice->curbe.wm_start * 2;7153wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;7154wm.SetupURBEntryReadOffset = 0;7155wm.EarlyDepthTestEnable = true;7156wm.LineAntialiasingRegionWidth = _05pixels;7157wm.LineEndCapAntialiasingRegionWidth = _10pixels;7158wm.DepthCoefficientURBReadOffset = 1;71597160if (cso->cso.offset_tri) {7161wm.GlobalDepthOffsetEnable = true;71627163/* Something weird going on with legacy_global_depth_bias,7164* offset_constant, scaling and MRD. This value passes glean7165* but gives some odd results elsewere (eg. the7166* quad-offset-units test).7167*/7168wm.GlobalDepthOffsetConstant = cso->cso.offset_units * 2;7169wm.GlobalDepthOffsetScale = cso->cso.offset_scale;7170}7171wm.SamplerStatePointer = ro_bo(batch->state.bo,7172ice->state.shaders[MESA_SHADER_FRAGMENT].sampler_offset);7173#endif71747175wm.StatisticsEnable = (GFX_VER >= 6 || ice->state.stats_wm) ?7176ice->state.statistics_counters_enabled : 0;71777178#if GFX_VER >= 67179wm.LineAntialiasingRegionWidth = _10pixels;7180wm.LineEndCapAntialiasingRegionWidth = _05pixels;71817182wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;7183wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;7184#endif7185#if GFX_VER == 67186wm.DualSourceBlendEnable = wm_prog_data->dual_src_blend &&7187ice->state.cso_blend->dual_color_blending;7188wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;7189wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;71907191/* From the SNB PRM, volume 2 part 1, page 281:7192* "If the PS kernel does not need the Position XY Offsets7193* to compute a Position XY value, then this field should be7194* programmed to POSOFFSET_NONE."7195*7196* "SW Recommendation: If the PS kernel needs the Position Offsets7197* to compute a Position XY value, this field should match Position7198* ZW Interpolation Mode to ensure a consistent position.xyzw7199* computation."7200* We only require XY sample offsets. So, this recommendation doesn't7201* look useful at the moment. We might need this in future.7202*/7203if (wm_prog_data->uses_pos_offset)7204wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;7205else7206wm.PositionXYOffsetSelect = POSOFFSET_NONE;7207#endif7208wm.LineStippleEnable = cso->cso.line_stipple_enable;7209wm.PolygonStippleEnable = cso->cso.poly_stipple_enable;72107211#if GFX_VER < 77212if (wm_prog_data->base.use_alt_mode)7213wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;7214wm.BindingTableEntryCount = ice->shaders.prog[MESA_SHADER_FRAGMENT]->bt.size_bytes / 4;7215wm.MaximumNumberofThreads = batch->screen->devinfo.max_wm_threads - 1;7216#endif72177218#if GFX_VER < 87219#if GFX_VER >= 67220wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;72217222struct pipe_framebuffer_state *fb = &ice->state.framebuffer;7223if (fb->samples > 1) {7224if (cso->cso.multisample)7225wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;7226else7227wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;72287229if (wm_prog_data->persample_dispatch)7230wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;7231else7232wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;7233} else {7234wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;7235wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;7236}7237#endif72387239wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;72407241if (wm_prog_data->uses_kill ||7242ice->state.cso_zsa->cso.alpha_enabled ||7243ice->state.cso_blend->cso.alpha_to_coverage ||7244(GFX_VER >= 6 && wm_prog_data->uses_omask))7245wm.PixelShaderKillsPixel = true;72467247if (has_writeable_rt(ice->state.cso_blend, fs_info) ||7248writes_depth || wm.PixelShaderKillsPixel ||7249(GFX_VER >= 6 && wm_prog_data->has_side_effects))7250wm.ThreadDispatchEnable = true;72517252#if GFX_VER >= 77253wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;7254wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;7255#else7256if (wm_prog_data->base.total_scratch) {7257struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch,7258MESA_SHADER_FRAGMENT);7259wm.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch) - 11;7260wm.ScratchSpaceBasePointer = rw_bo(bo, 0);7261}72627263wm.PixelShaderComputedDepth = writes_depth;72647265#endif7266/* The "UAV access enable" bits are unnecessary on HSW because they only7267* seem to have an effect on the HW-assisted coherency mechanism which we7268* don't need, and the rasterization-related UAV_ONLY flag and the7269* DISPATCH_ENABLE bit can be set independently from it.7270* C.f. gen8_upload_ps_extra().7271*7272* BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |7273* _NEW_COLOR7274*/7275#if GFX_VERx10 == 757276if (!(has_writeable_rt(ice->state.cso_blend, fs_info) || writes_depth) &&7277wm_prog_data->has_side_effects)7278wm.PSUAVonly = ON;7279#endif7280#endif7281#if GFX_VER >= 77282/* BRW_NEW_FS_PROG_DATA */7283if (wm_prog_data->early_fragment_tests)7284wm.EarlyDepthStencilControl = EDSC_PREPS;7285else if (wm_prog_data->has_side_effects)7286wm.EarlyDepthStencilControl = EDSC_PSEXEC;7287#endif7288#if GFX_VER == 87289/* We could skip this bit if color writes are enabled. */7290if (wm_prog_data->has_side_effects || wm_prog_data->uses_kill)7291wm.ForceThreadDispatchEnable = ForceON;7292#endif7293};72947295#if GFX_VER <= 57296if (ice->state.global_depth_offset_clamp != cso->cso.offset_clamp) {7297crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {7298clamp.GlobalDepthOffsetClamp = cso->cso.offset_clamp;7299}7300ice->state.global_depth_offset_clamp = cso->cso.offset_clamp;7301}7302#endif7303}73047305#if GFX_VER >= 77306if (dirty & CROCUS_DIRTY_GEN7_SBE) {7307crocus_emit_sbe(batch, ice);7308}7309#endif73107311#if GFX_VER >= 87312if (dirty & CROCUS_DIRTY_GEN8_PS_BLEND) {7313struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_FRAGMENT];7314struct crocus_blend_state *cso_blend = ice->state.cso_blend;7315struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;7316struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;7317const struct shader_info *fs_info =7318crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);7319uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];7320crocus_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {7321pb.HasWriteableRT = has_writeable_rt(cso_blend, fs_info);7322pb.AlphaTestEnable = cso_zsa->cso.alpha_enabled;7323pb.ColorBufferBlendEnable = (cso_blend->blend_enables & 1) &&7324(!cso_blend->dual_color_blending || wm_prog_data->dual_src_blend);7325}7326crocus_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,7327ARRAY_SIZE(cso_blend->ps_blend));7328}7329#endif73307331#if GFX_VER >= 67332if (dirty & CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL) {73337334#if GFX_VER >= 87335crocus_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {7336set_depth_stencil_bits(ice, &wmds);7337}7338#else7339uint32_t ds_offset;7340void *ds_map = stream_state(batch,7341sizeof(uint32_t) * GENX(DEPTH_STENCIL_STATE_length),734264, &ds_offset);7343_crocus_pack_state(batch, GENX(DEPTH_STENCIL_STATE), ds_map, ds) {7344set_depth_stencil_bits(ice, &ds);7345}73467347#if GFX_VER == 67348crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {7349ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;7350ptr.DEPTH_STENCIL_STATEChange = true;7351}7352#else7353crocus_emit_cmd(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {7354ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;7355}7356#endif7357#endif7358}73597360if (dirty & CROCUS_DIRTY_GEN6_SCISSOR_RECT) {7361/* Align to 64-byte boundary as per anv. */7362uint32_t scissor_offset;7363struct pipe_scissor_state *scissor_map = (void *)7364stream_state(batch, sizeof(struct pipe_scissor_state) * ice->state.num_viewports,736564, &scissor_offset);7366for (int i = 0; i < ice->state.num_viewports; i++) {7367struct pipe_scissor_state scissor;7368crocus_fill_scissor_rect(ice, i, &scissor);7369scissor_map[i] = scissor;7370}73717372crocus_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {7373ptr.ScissorRectPointer = scissor_offset;7374}7375}7376#endif73777378if (dirty & CROCUS_DIRTY_DEPTH_BUFFER) {7379struct isl_device *isl_dev = &batch->screen->isl_dev;7380#if GFX_VER >= 67381crocus_emit_depth_stall_flushes(batch);7382#endif7383void *batch_ptr;7384struct crocus_resource *zres, *sres;7385struct pipe_framebuffer_state *cso = &ice->state.framebuffer;7386batch_ptr = crocus_get_command_space(batch, isl_dev->ds.size);73877388struct isl_view view = {7389.base_level = 0,7390.levels = 1,7391.base_array_layer = 0,7392.array_len = 1,7393.swizzle = ISL_SWIZZLE_IDENTITY,7394};7395struct isl_depth_stencil_hiz_emit_info info = { .view = &view };73967397if (cso->zsbuf) {7398crocus_get_depth_stencil_resources(&batch->screen->devinfo, cso->zsbuf->texture, &zres, &sres);7399struct crocus_surface *zsbuf = (struct crocus_surface *)cso->zsbuf;7400if (zsbuf->align_res) {7401zres = (struct crocus_resource *)zsbuf->align_res;7402}7403view.base_level = cso->zsbuf->u.tex.level;7404view.base_array_layer = cso->zsbuf->u.tex.first_layer;7405view.array_len = cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1;74067407if (zres) {7408view.usage |= ISL_SURF_USAGE_DEPTH_BIT;74097410info.depth_surf = &zres->surf;7411info.depth_address = crocus_command_reloc(batch,7412(batch_ptr - batch->command.map) + isl_dev->ds.depth_offset,7413zres->bo, 0, RELOC_32BIT);74147415info.mocs = crocus_mocs(zres->bo, isl_dev);7416view.format = zres->surf.format;74177418if (crocus_resource_level_has_hiz(zres, view.base_level)) {7419info.hiz_usage = zres->aux.usage;7420info.hiz_surf = &zres->aux.surf;7421uint32_t hiz_offset = 0;74227423#if GFX_VER == 67424/* HiZ surfaces on Sandy Bridge technically don't support7425* mip-mapping. However, we can fake it by offsetting to the7426* first slice of LOD0 in the HiZ surface.7427*/7428isl_surf_get_image_offset_B_tile_sa(&zres->aux.surf,7429view.base_level, 0, 0,7430&hiz_offset, NULL, NULL);7431#endif7432info.hiz_address = crocus_command_reloc(batch,7433(batch_ptr - batch->command.map) + isl_dev->ds.hiz_offset,7434zres->aux.bo, zres->aux.offset + hiz_offset,7435RELOC_32BIT);7436info.depth_clear_value = crocus_resource_get_clear_color(zres).f32[0];7437}7438}74397440#if GFX_VER >= 67441if (sres) {7442view.usage |= ISL_SURF_USAGE_STENCIL_BIT;7443info.stencil_aux_usage = sres->aux.usage;7444info.stencil_surf = &sres->surf;74457446uint32_t stencil_offset = 0;7447#if GFX_VER == 67448/* Stencil surfaces on Sandy Bridge technically don't support7449* mip-mapping. However, we can fake it by offsetting to the7450* first slice of LOD0 in the stencil surface.7451*/7452isl_surf_get_image_offset_B_tile_sa(&sres->surf,7453view.base_level, 0, 0,7454&stencil_offset, NULL, NULL);7455#endif74567457info.stencil_address = crocus_command_reloc(batch,7458(batch_ptr - batch->command.map) + isl_dev->ds.stencil_offset,7459sres->bo, stencil_offset, RELOC_32BIT);7460if (!zres) {7461view.format = sres->surf.format;7462info.mocs = crocus_mocs(sres->bo, isl_dev);7463}7464}7465#endif7466}7467isl_emit_depth_stencil_hiz_s(isl_dev, batch_ptr, &info);7468}74697470/* TODO: Disable emitting this until something uses a stipple. */7471if (dirty & CROCUS_DIRTY_POLYGON_STIPPLE) {7472crocus_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {7473for (int i = 0; i < 32; i++) {7474poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];7475}7476}7477}74787479if (dirty & CROCUS_DIRTY_LINE_STIPPLE) {7480struct crocus_rasterizer_state *cso = ice->state.cso_rast;7481crocus_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));7482}74837484#if GFX_VER >= 87485if (dirty & CROCUS_DIRTY_GEN8_VF_TOPOLOGY) {7486crocus_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {7487topo.PrimitiveTopologyType =7488translate_prim_type(draw->mode, draw->vertices_per_patch);7489}7490}7491#endif74927493#if GFX_VER <= 57494if (dirty & CROCUS_DIRTY_GEN5_PIPELINED_POINTERS) {7495upload_pipelined_state_pointers(batch, ice->shaders.ff_gs_prog ? true : false, ice->shaders.gs_offset,7496ice->shaders.vs_offset, ice->shaders.sf_offset,7497ice->shaders.clip_offset, ice->shaders.wm_offset, ice->shaders.cc_offset);7498crocus_upload_urb_fence(batch);74997500crocus_emit_cmd(batch, GENX(CS_URB_STATE), cs) {7501cs.NumberofURBEntries = ice->urb.nr_cs_entries;7502cs.URBEntryAllocationSize = ice->urb.csize - 1;7503}7504dirty |= CROCUS_DIRTY_GEN4_CURBE;7505}7506#endif7507if (dirty & CROCUS_DIRTY_DRAWING_RECTANGLE) {7508struct pipe_framebuffer_state *fb = &ice->state.framebuffer;7509if (fb->width && fb->height) {7510crocus_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {7511rect.ClippedDrawingRectangleXMax = fb->width - 1;7512rect.ClippedDrawingRectangleYMax = fb->height - 1;7513}7514}7515}75167517if (dirty & CROCUS_DIRTY_VERTEX_BUFFERS) {7518const uint32_t user_count = util_bitcount(ice->state.bound_vertex_buffers);7519const uint32_t count = user_count +7520ice->state.vs_uses_draw_params + ice->state.vs_uses_derived_draw_params;7521uint32_t dynamic_bound = ice->state.bound_vertex_buffers;75227523if (count) {7524const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);75257526uint32_t *map =7527crocus_get_command_space(batch, 4 * (1 + vb_dwords * count));7528_crocus_pack_command(batch, GENX(3DSTATE_VERTEX_BUFFERS), map, vb) {7529vb.DWordLength = (vb_dwords * count + 1) - 2;7530}7531map += 1;75327533uint32_t bound = dynamic_bound;7534int i;7535while (bound) {7536i = u_bit_scan(&bound);7537struct pipe_vertex_buffer *buf = &ice->state.vertex_buffers[i];7538struct crocus_bo *bo = crocus_resource_bo(buf->buffer.resource);7539uint32_t step_rate = ice->state.cso_vertex_elements->step_rate[i];75407541emit_vertex_buffer_state(batch, i, bo,7542buf->buffer_offset,7543ice->state.vb_end[i],7544buf->stride,7545step_rate,7546&map);7547}7548i = user_count;7549if (ice->state.vs_uses_draw_params) {7550struct crocus_resource *res = (struct crocus_resource *)ice->draw.draw_params.res;7551emit_vertex_buffer_state(batch, i++,7552res->bo,7553ice->draw.draw_params.offset,7554ice->draw.draw_params.res->width0,75550, 0, &map);7556}7557if (ice->state.vs_uses_derived_draw_params) {7558struct crocus_resource *res = (struct crocus_resource *)ice->draw.derived_draw_params.res;7559emit_vertex_buffer_state(batch, i++,7560res->bo,7561ice->draw.derived_draw_params.offset,7562ice->draw.derived_draw_params.res->width0,75630, 0, &map);7564}7565}7566}75677568if (dirty & CROCUS_DIRTY_VERTEX_ELEMENTS) {7569struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;7570const unsigned entries = MAX2(cso->count, 1);7571if (!(ice->state.vs_needs_sgvs_element ||7572ice->state.vs_uses_derived_draw_params ||7573ice->state.vs_needs_edge_flag)) {7574crocus_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *7575(1 + entries * GENX(VERTEX_ELEMENT_STATE_length)));7576} else {7577uint32_t dynamic_ves[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];7578const unsigned dyn_count = cso->count +7579ice->state.vs_needs_sgvs_element +7580ice->state.vs_uses_derived_draw_params;75817582crocus_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS),7583&dynamic_ves, ve) {7584ve.DWordLength =75851 + GENX(VERTEX_ELEMENT_STATE_length) * dyn_count - 2;7586}7587memcpy(&dynamic_ves[1], &cso->vertex_elements[1],7588(cso->count - ice->state.vs_needs_edge_flag) *7589GENX(VERTEX_ELEMENT_STATE_length) * sizeof(uint32_t));7590uint32_t *ve_pack_dest =7591&dynamic_ves[1 + (cso->count - ice->state.vs_needs_edge_flag) *7592GENX(VERTEX_ELEMENT_STATE_length)];75937594if (ice->state.vs_needs_sgvs_element) {7595uint32_t base_ctrl = ice->state.vs_uses_draw_params ?7596VFCOMP_STORE_SRC : VFCOMP_STORE_0;7597crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {7598ve.Valid = true;7599ve.VertexBufferIndex =7600util_bitcount64(ice->state.bound_vertex_buffers);7601ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;7602ve.Component0Control = base_ctrl;7603ve.Component1Control = base_ctrl;7604#if GFX_VER < 87605ve.Component2Control = ice->state.vs_uses_vertexid ? VFCOMP_STORE_VID : VFCOMP_STORE_0;7606ve.Component3Control = ice->state.vs_uses_instanceid ? VFCOMP_STORE_IID : VFCOMP_STORE_0;7607#else7608ve.Component2Control = VFCOMP_STORE_0;7609ve.Component3Control = VFCOMP_STORE_0;7610#endif7611#if GFX_VER < 57612ve.DestinationElementOffset = cso->count * 4;7613#endif7614}7615ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);7616}7617if (ice->state.vs_uses_derived_draw_params) {7618crocus_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {7619ve.Valid = true;7620ve.VertexBufferIndex =7621util_bitcount64(ice->state.bound_vertex_buffers) +7622ice->state.vs_uses_draw_params;7623ve.SourceElementFormat = ISL_FORMAT_R32G32_UINT;7624ve.Component0Control = VFCOMP_STORE_SRC;7625ve.Component1Control = VFCOMP_STORE_SRC;7626ve.Component2Control = VFCOMP_STORE_0;7627ve.Component3Control = VFCOMP_STORE_0;7628#if GFX_VER < 57629ve.DestinationElementOffset = (cso->count + ice->state.vs_needs_sgvs_element) * 4;7630#endif7631}7632ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);7633}7634if (ice->state.vs_needs_edge_flag) {7635for (int i = 0; i < GENX(VERTEX_ELEMENT_STATE_length); i++)7636ve_pack_dest[i] = cso->edgeflag_ve[i];7637}76387639crocus_batch_emit(batch, &dynamic_ves, sizeof(uint32_t) *7640(1 + dyn_count * GENX(VERTEX_ELEMENT_STATE_length)));7641}76427643#if GFX_VER == 87644if (!ice->state.vs_needs_edge_flag) {7645crocus_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) *7646entries * GENX(3DSTATE_VF_INSTANCING_length));7647} else {7648assert(cso->count > 0);7649const unsigned edgeflag_index = cso->count - 1;7650uint32_t dynamic_vfi[33 * GENX(3DSTATE_VF_INSTANCING_length)];7651memcpy(&dynamic_vfi[0], cso->vf_instancing, edgeflag_index *7652GENX(3DSTATE_VF_INSTANCING_length) * sizeof(uint32_t));76537654uint32_t *vfi_pack_dest = &dynamic_vfi[0] +7655edgeflag_index * GENX(3DSTATE_VF_INSTANCING_length);7656crocus_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {7657vi.VertexElementIndex = edgeflag_index +7658ice->state.vs_needs_sgvs_element +7659ice->state.vs_uses_derived_draw_params;7660}7661for (int i = 0; i < GENX(3DSTATE_VF_INSTANCING_length); i++)7662vfi_pack_dest[i] |= cso->edgeflag_vfi[i];76637664crocus_batch_emit(batch, &dynamic_vfi[0], sizeof(uint32_t) *7665entries * GENX(3DSTATE_VF_INSTANCING_length));7666}7667#endif7668}76697670#if GFX_VER == 87671if (dirty & CROCUS_DIRTY_GEN8_VF_SGVS) {7672const struct brw_vs_prog_data *vs_prog_data = (void *)7673ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;7674struct crocus_vertex_element_state *cso = ice->state.cso_vertex_elements;76757676crocus_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgv) {7677if (vs_prog_data->uses_vertexid) {7678sgv.VertexIDEnable = true;7679sgv.VertexIDComponentNumber = 2;7680sgv.VertexIDElementOffset =7681cso->count - ice->state.vs_needs_edge_flag;7682}76837684if (vs_prog_data->uses_instanceid) {7685sgv.InstanceIDEnable = true;7686sgv.InstanceIDComponentNumber = 3;7687sgv.InstanceIDElementOffset =7688cso->count - ice->state.vs_needs_edge_flag;7689}7690}7691}7692#endif7693#if GFX_VERx10 >= 757694if (dirty & CROCUS_DIRTY_GEN75_VF) {7695crocus_emit_cmd(batch, GENX(3DSTATE_VF), vf) {7696if (draw->primitive_restart) {7697vf.IndexedDrawCutIndexEnable = true;7698vf.CutIndex = draw->restart_index;7699}7700}7701}7702#endif77037704#if GFX_VER == 87705if (dirty & CROCUS_DIRTY_GEN8_PMA_FIX) {7706bool enable = want_pma_fix(ice);7707genX(crocus_update_pma_fix)(ice, batch, enable);7708}7709#endif77107711#if GFX_VER <= 57712if (dirty & CROCUS_DIRTY_GEN4_CURBE) {7713gen4_upload_curbe(batch);7714}7715#endif7716}77177718static void7719crocus_upload_render_state(struct crocus_context *ice,7720struct crocus_batch *batch,7721const struct pipe_draw_info *draw,7722unsigned drawid_offset,7723const struct pipe_draw_indirect_info *indirect,7724const struct pipe_draw_start_count_bias *sc)7725{7726#if GFX_VER >= 77727bool use_predicate = ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT;7728#endif77297730batch->no_wrap = true;7731batch->contains_draw = true;77327733crocus_update_surface_base_address(batch);77347735crocus_upload_dirty_render_state(ice, batch, draw);77367737batch->no_wrap = false;7738if (draw->index_size > 0) {7739unsigned offset;7740unsigned size;7741bool emit_index = false;77427743if (draw->has_user_indices) {7744unsigned start_offset = draw->index_size * sc->start;7745u_upload_data(ice->ctx.stream_uploader, 0,7746sc->count * draw->index_size, 4,7747(char *)draw->index.user + start_offset,7748&offset, &ice->state.index_buffer.res);7749offset -= start_offset;7750size = start_offset + sc->count * draw->index_size;7751emit_index = true;7752} else {7753struct crocus_resource *res = (void *) draw->index.resource;77547755if (ice->state.index_buffer.res != draw->index.resource) {7756res->bind_history |= PIPE_BIND_INDEX_BUFFER;7757pipe_resource_reference(&ice->state.index_buffer.res,7758draw->index.resource);7759emit_index = true;7760}7761offset = 0;7762size = draw->index.resource->width0;7763}77647765if (!emit_index &&7766(ice->state.index_buffer.size != size ||7767ice->state.index_buffer.index_size != draw->index_size7768#if GFX_VERx10 < 757769|| ice->state.index_buffer.prim_restart != draw->primitive_restart7770#endif7771)7772)7773emit_index = true;77747775if (emit_index) {7776struct crocus_bo *bo = crocus_resource_bo(ice->state.index_buffer.res);77777778crocus_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {7779#if GFX_VERx10 < 757780ib.CutIndexEnable = draw->primitive_restart;7781#endif7782ib.IndexFormat = draw->index_size >> 1;7783ib.BufferStartingAddress = ro_bo(bo, offset);7784#if GFX_VER >= 87785ib.MOCS = crocus_mocs(bo, &batch->screen->isl_dev);7786ib.BufferSize = bo->size - offset;7787#else7788ib.BufferEndingAddress = ro_bo(bo, offset + size - 1);7789#endif7790}7791ice->state.index_buffer.size = size;7792ice->state.index_buffer.offset = offset;7793ice->state.index_buffer.index_size = draw->index_size;7794#if GFX_VERx10 < 757795ice->state.index_buffer.prim_restart = draw->primitive_restart;7796#endif7797}7798}77997800#define _3DPRIM_END_OFFSET 0x24207801#define _3DPRIM_START_VERTEX 0x24307802#define _3DPRIM_VERTEX_COUNT 0x24347803#define _3DPRIM_INSTANCE_COUNT 0x24387804#define _3DPRIM_START_INSTANCE 0x243C7805#define _3DPRIM_BASE_VERTEX 0x244078067807#if GFX_VER >= 77808if (indirect && !indirect->count_from_stream_output) {7809if (indirect->indirect_draw_count) {7810use_predicate = true;78117812struct crocus_bo *draw_count_bo =7813crocus_resource_bo(indirect->indirect_draw_count);7814unsigned draw_count_offset =7815indirect->indirect_draw_count_offset;78167817crocus_emit_pipe_control_flush(batch,7818"ensure indirect draw buffer is flushed",7819PIPE_CONTROL_FLUSH_ENABLE);7820if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {7821#if GFX_VERx10 >= 757822struct mi_builder b;7823mi_builder_init(&b, &batch->screen->devinfo, batch);78247825/* comparison = draw id < draw count */7826struct mi_value comparison =7827mi_ult(&b, mi_imm(drawid_offset),7828mi_mem32(ro_bo(draw_count_bo,7829draw_count_offset)));7830#if GFX_VER == 87831/* predicate = comparison & conditional rendering predicate */7832mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),7833mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));7834#else7835/* predicate = comparison & conditional rendering predicate */7836struct mi_value pred = mi_iand(&b, comparison,7837mi_reg32(CS_GPR(15)));78387839mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), pred);7840mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));78417842unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |7843MI_PREDICATE_COMBINEOP_SET |7844MI_PREDICATE_COMPAREOP_SRCS_EQUAL;78457846crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));7847#endif7848#endif7849} else {7850uint32_t mi_predicate;78517852/* Upload the id of the current primitive to MI_PREDICATE_SRC1. */7853crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, drawid_offset);7854/* Upload the current draw count from the draw parameters buffer7855* to MI_PREDICATE_SRC0.7856*/7857crocus_load_register_mem32(batch, MI_PREDICATE_SRC0,7858draw_count_bo, draw_count_offset);7859/* Zero the top 32-bits of MI_PREDICATE_SRC0 */7860crocus_load_register_imm32(batch, MI_PREDICATE_SRC0 + 4, 0);78617862if (drawid_offset == 0) {7863mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |7864MI_PREDICATE_COMBINEOP_SET |7865MI_PREDICATE_COMPAREOP_SRCS_EQUAL;7866} else {7867/* While draw_index < draw_count the predicate's result will be7868* (draw_index == draw_count) ^ TRUE = TRUE7869* When draw_index == draw_count the result is7870* (TRUE) ^ TRUE = FALSE7871* After this all results will be:7872* (FALSE) ^ FALSE = FALSE7873*/7874mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |7875MI_PREDICATE_COMBINEOP_XOR |7876MI_PREDICATE_COMPAREOP_SRCS_EQUAL;7877}7878crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));7879}7880}78817882#if GFX_VER >= 77883struct crocus_bo *bo = crocus_resource_bo(indirect->buffer);7884assert(bo);78857886crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7887lrm.RegisterAddress = _3DPRIM_VERTEX_COUNT;7888lrm.MemoryAddress = ro_bo(bo, indirect->offset + 0);7889}7890crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7891lrm.RegisterAddress = _3DPRIM_INSTANCE_COUNT;7892lrm.MemoryAddress = ro_bo(bo, indirect->offset + 4);7893}7894crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7895lrm.RegisterAddress = _3DPRIM_START_VERTEX;7896lrm.MemoryAddress = ro_bo(bo, indirect->offset + 8);7897}7898if (draw->index_size) {7899crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7900lrm.RegisterAddress = _3DPRIM_BASE_VERTEX;7901lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);7902}7903crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7904lrm.RegisterAddress = _3DPRIM_START_INSTANCE;7905lrm.MemoryAddress = ro_bo(bo, indirect->offset + 16);7906}7907} else {7908crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {7909lrm.RegisterAddress = _3DPRIM_START_INSTANCE;7910lrm.MemoryAddress = ro_bo(bo, indirect->offset + 12);7911}7912crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {7913lri.RegisterOffset = _3DPRIM_BASE_VERTEX;7914lri.DataDWord = 0;7915}7916}7917#endif7918} else if (indirect && indirect->count_from_stream_output) {7919#if GFX_VERx10 >= 757920struct crocus_stream_output_target *so =7921(void *) indirect->count_from_stream_output;79227923/* XXX: Replace with actual cache tracking */7924crocus_emit_pipe_control_flush(batch,7925"draw count from stream output stall",7926PIPE_CONTROL_CS_STALL);79277928struct mi_builder b;7929mi_builder_init(&b, &batch->screen->devinfo, batch);79307931struct crocus_address addr =7932ro_bo(crocus_resource_bo(&so->offset_res->base.b), so->offset_offset);7933struct mi_value offset =7934mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);79357936mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),7937mi_udiv32_imm(&b, offset, so->stride));79387939_crocus_emit_lri(batch, _3DPRIM_START_VERTEX, 0);7940_crocus_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0);7941_crocus_emit_lri(batch, _3DPRIM_START_INSTANCE, 0);7942_crocus_emit_lri(batch, _3DPRIM_INSTANCE_COUNT, draw->instance_count);7943#endif7944}7945#else7946assert(!indirect);7947#endif79487949crocus_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {7950prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;7951#if GFX_VER >= 77952prim.PredicateEnable = use_predicate;7953#endif79547955prim.PrimitiveTopologyType = translate_prim_type(ice->state.prim_mode, draw->vertices_per_patch);7956if (indirect) {7957// XXX Probably have to do something for gen6 here?7958#if GFX_VER >= 77959prim.IndirectParameterEnable = true;7960#endif7961} else {7962#if GFX_VER >= 57963prim.StartInstanceLocation = draw->start_instance;7964#endif7965prim.InstanceCount = draw->instance_count;7966prim.VertexCountPerInstance = sc->count;79677968prim.StartVertexLocation = sc->start;79697970if (draw->index_size) {7971prim.BaseVertexLocation += sc->index_bias;7972}7973}7974}7975}79767977#if GFX_VER >= 779787979static void7980crocus_upload_compute_state(struct crocus_context *ice,7981struct crocus_batch *batch,7982const struct pipe_grid_info *grid)7983{7984const uint64_t stage_dirty = ice->state.stage_dirty;7985struct crocus_screen *screen = batch->screen;7986const struct intel_device_info *devinfo = &screen->devinfo;7987struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];7988struct crocus_compiled_shader *shader =7989ice->shaders.prog[MESA_SHADER_COMPUTE];7990struct brw_stage_prog_data *prog_data = shader->prog_data;7991struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;7992const struct brw_cs_dispatch_info dispatch =7993brw_cs_get_dispatch_info(devinfo, cs_prog_data, grid->block);79947995crocus_update_surface_base_address(batch);7996if ((stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS) && shs->sysvals_need_upload)7997upload_sysvals(ice, MESA_SHADER_COMPUTE);79987999if (stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_CS) {8000crocus_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);8001ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset =8002crocus_upload_binding_table(ice, batch,8003ice->shaders.prog[MESA_SHADER_COMPUTE]->surf_offset,8004ice->shaders.prog[MESA_SHADER_COMPUTE]->bt.size_bytes);8005}80068007if (stage_dirty & CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS)8008crocus_upload_sampler_states(ice, batch, MESA_SHADER_COMPUTE);80098010if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||8011cs_prog_data->local_size[0] == 0 /* Variable local group size */) {8012/* The MEDIA_VFE_STATE documentation for Gen8+ says:8013*8014* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless8015* the only bits that are changed are scoreboard related: Scoreboard8016* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For8017* these scoreboard related states, a MEDIA_STATE_FLUSH is8018* sufficient."8019*/8020crocus_emit_pipe_control_flush(batch,8021"workaround: stall before MEDIA_VFE_STATE",8022PIPE_CONTROL_CS_STALL);80238024crocus_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {8025if (prog_data->total_scratch) {8026struct crocus_bo *bo =8027crocus_get_scratch_space(ice, prog_data->total_scratch,8028MESA_SHADER_COMPUTE);8029#if GFX_VER == 88030/* Broadwell's Per Thread Scratch Space is in the range [0, 11]8031* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.8032*/8033vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;8034#elif GFX_VERx10 == 758035/* Haswell's Per Thread Scratch Space is in the range [0, 10]8036* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.8037*/8038vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 12;8039#else8040/* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]8041* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.8042*/8043vfe.PerThreadScratchSpace = prog_data->total_scratch / 1024 - 1;8044#endif8045vfe.ScratchSpaceBasePointer = rw_bo(bo, 0);8046}80478048vfe.MaximumNumberofThreads =8049devinfo->max_cs_threads * screen->subslice_total - 1;8050vfe.ResetGatewayTimer =8051Resettingrelativetimerandlatchingtheglobaltimestamp;8052vfe.BypassGatewayControl = true;8053#if GFX_VER == 78054vfe.GPGPUMode = 1;8055#endif8056#if GFX_VER == 88057vfe.BypassGatewayControl = true;8058#endif8059vfe.NumberofURBEntries = GFX_VER == 8 ? 2 : 0;8060vfe.URBEntryAllocationSize = GFX_VER == 8 ? 2 : 0;80618062vfe.CURBEAllocationSize =8063ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +8064cs_prog_data->push.cross_thread.regs, 2);8065}8066}80678068/* TODO: Combine subgroup-id with cbuf0 so we can push regular uniforms */8069if ((stage_dirty & CROCUS_STAGE_DIRTY_CS) ||8070cs_prog_data->local_size[0] == 0 /* Variable local group size */) {8071uint32_t curbe_data_offset = 0;8072assert(cs_prog_data->push.cross_thread.dwords == 0 &&8073cs_prog_data->push.per_thread.dwords == 1 &&8074cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);8075const unsigned push_const_size =8076brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);8077uint32_t *curbe_data_map =8078stream_state(batch,8079ALIGN(push_const_size, 64), 64,8080&curbe_data_offset);8081assert(curbe_data_map);8082memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64));8083crocus_fill_cs_push_const_buffer(cs_prog_data, dispatch.threads,8084curbe_data_map);80858086crocus_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {8087curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);8088curbe.CURBEDataStartAddress = curbe_data_offset;8089}8090}80918092if (stage_dirty & (CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS |8093CROCUS_STAGE_DIRTY_BINDINGS_CS |8094CROCUS_STAGE_DIRTY_CONSTANTS_CS |8095CROCUS_STAGE_DIRTY_CS)) {8096uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];8097const uint64_t ksp = KSP(ice,shader) + brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size);8098crocus_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {8099idd.KernelStartPointer = ksp;8100idd.SamplerStatePointer = shs->sampler_offset;8101idd.BindingTablePointer = ice->shaders.prog[MESA_SHADER_COMPUTE]->bind_bo_offset;8102idd.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31);8103idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;8104idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;8105idd.BarrierEnable = cs_prog_data->uses_barrier;8106idd.SharedLocalMemorySize = encode_slm_size(GFX_VER,8107prog_data->total_shared);8108#if GFX_VERx10 >= 758109idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs;8110#endif8111}81128113crocus_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {8114load.InterfaceDescriptorTotalLength =8115GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);8116load.InterfaceDescriptorDataStartAddress =8117emit_state(batch, desc, sizeof(desc), 64);8118}8119}81208121#define GPGPU_DISPATCHDIMX 0x25008122#define GPGPU_DISPATCHDIMY 0x25048123#define GPGPU_DISPATCHDIMZ 0x250881248125if (grid->indirect) {8126struct crocus_state_ref *grid_size = &ice->state.grid_size;8127struct crocus_bo *bo = crocus_resource_bo(grid_size->res);8128crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {8129lrm.RegisterAddress = GPGPU_DISPATCHDIMX;8130lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);8131}8132crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {8133lrm.RegisterAddress = GPGPU_DISPATCHDIMY;8134lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);8135}8136crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {8137lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;8138lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);8139}81408141#if GFX_VER == 78142/* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */8143_crocus_emit_lri(batch, MI_PREDICATE_SRC0 + 4, 0);8144crocus_load_register_imm64(batch, MI_PREDICATE_SRC1, 0);81458146/* Load compute_dispatch_indirect_x_size into SRC0 */8147crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 0);81488149/* predicate = (compute_dispatch_indirect_x_size == 0); */8150crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {8151mip.LoadOperation = LOAD_LOAD;8152mip.CombineOperation = COMBINE_SET;8153mip.CompareOperation = COMPARE_SRCS_EQUAL;8154};81558156/* Load compute_dispatch_indirect_y_size into SRC0 */8157crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 4);81588159/* predicate = (compute_dispatch_indirect_y_size == 0); */8160crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {8161mip.LoadOperation = LOAD_LOAD;8162mip.CombineOperation = COMBINE_OR;8163mip.CompareOperation = COMPARE_SRCS_EQUAL;8164};81658166/* Load compute_dispatch_indirect_z_size into SRC0 */8167crocus_load_register_mem32(batch, MI_PREDICATE_SRC0, bo, grid_size->offset + 8);81688169/* predicate = (compute_dispatch_indirect_z_size == 0); */8170crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {8171mip.LoadOperation = LOAD_LOAD;8172mip.CombineOperation = COMBINE_OR;8173mip.CompareOperation = COMPARE_SRCS_EQUAL;8174};81758176/* predicate = !predicate; */8177#define COMPARE_FALSE 18178crocus_emit_cmd(batch, GENX(MI_PREDICATE), mip) {8179mip.LoadOperation = LOAD_LOADINV;8180mip.CombineOperation = COMBINE_OR;8181mip.CompareOperation = COMPARE_FALSE;8182}8183#endif8184}81858186crocus_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {8187ggw.IndirectParameterEnable = grid->indirect != NULL;8188ggw.PredicateEnable = GFX_VER <= 7 && grid->indirect != NULL;8189ggw.SIMDSize = dispatch.simd_size / 16;8190ggw.ThreadDepthCounterMaximum = 0;8191ggw.ThreadHeightCounterMaximum = 0;8192ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;8193ggw.ThreadGroupIDXDimension = grid->grid[0];8194ggw.ThreadGroupIDYDimension = grid->grid[1];8195ggw.ThreadGroupIDZDimension = grid->grid[2];8196ggw.RightExecutionMask = dispatch.right_mask;8197ggw.BottomExecutionMask = 0xffffffff;8198}81998200crocus_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf);82018202batch->contains_draw = true;8203}82048205#endif /* GFX_VER >= 7 */82068207/**8208* State module teardown.8209*/8210static void8211crocus_destroy_state(struct crocus_context *ice)8212{8213pipe_resource_reference(&ice->draw.draw_params.res, NULL);8214pipe_resource_reference(&ice->draw.derived_draw_params.res, NULL);82158216free(ice->state.genx);82178218for (int i = 0; i < 4; i++) {8219pipe_so_target_reference(&ice->state.so_target[i], NULL);8220}82218222for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {8223pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);8224}8225pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);82268227for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {8228struct crocus_shader_state *shs = &ice->state.shaders[stage];8229for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {8230pipe_resource_reference(&shs->constbufs[i].buffer, NULL);8231}8232for (int i = 0; i < PIPE_MAX_SHADER_IMAGES; i++) {8233pipe_resource_reference(&shs->image[i].base.resource, NULL);8234}8235for (int i = 0; i < PIPE_MAX_SHADER_BUFFERS; i++) {8236pipe_resource_reference(&shs->ssbo[i].buffer, NULL);8237}8238for (int i = 0; i < CROCUS_MAX_TEXTURE_SAMPLERS; i++) {8239pipe_sampler_view_reference((struct pipe_sampler_view **)8240&shs->textures[i], NULL);8241}8242}82438244for (int i = 0; i < 16; i++)8245pipe_resource_reference(&ice->state.vertex_buffers[i].buffer.resource, NULL);8246pipe_resource_reference(&ice->state.grid_size.res, NULL);82478248pipe_resource_reference(&ice->state.index_buffer.res, NULL);8249}82508251/* ------------------------------------------------------------------- */82528253static void8254crocus_rebind_buffer(struct crocus_context *ice,8255struct crocus_resource *res)8256{8257struct pipe_context *ctx = &ice->ctx;82588259assert(res->base.b.target == PIPE_BUFFER);82608261/* Buffers can't be framebuffer attachments, nor display related,8262* and we don't have upstream Clover support.8263*/8264assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |8265PIPE_BIND_RENDER_TARGET |8266PIPE_BIND_BLENDABLE |8267PIPE_BIND_DISPLAY_TARGET |8268PIPE_BIND_CURSOR |8269PIPE_BIND_COMPUTE_RESOURCE |8270PIPE_BIND_GLOBAL)));82718272if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {8273uint64_t bound_vbs = ice->state.bound_vertex_buffers;8274while (bound_vbs) {8275const int i = u_bit_scan64(&bound_vbs);8276struct pipe_vertex_buffer *buffer = &ice->state.vertex_buffers[i];82778278if (!buffer->is_user_buffer && &res->base.b == buffer->buffer.resource)8279ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS;8280}8281}82828283if ((res->bind_history & PIPE_BIND_INDEX_BUFFER) &&8284ice->state.index_buffer.res) {8285if (res->bo == crocus_resource_bo(ice->state.index_buffer.res))8286pipe_resource_reference(&ice->state.index_buffer.res, NULL);8287}8288/* There is no need to handle these:8289* - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)8290* - PIPE_BIND_QUERY_BUFFER (no persistent state references)8291*/82928293if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {8294/* XXX: be careful about resetting vs appending... */8295for (int i = 0; i < 4; i++) {8296if (ice->state.so_target[i] &&8297(ice->state.so_target[i]->buffer == &res->base.b)) {8298#if GFX_VER == 68299ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_GS;8300#else8301ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;8302#endif8303}8304}8305}83068307for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {8308struct crocus_shader_state *shs = &ice->state.shaders[s];8309enum pipe_shader_type p_stage = stage_to_pipe(s);83108311if (!(res->bind_stages & (1 << s)))8312continue;83138314if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {8315/* Skip constant buffer 0, it's for regular uniforms, not UBOs */8316uint32_t bound_cbufs = shs->bound_cbufs & ~1u;8317while (bound_cbufs) {8318const int i = u_bit_scan(&bound_cbufs);8319struct pipe_constant_buffer *cbuf = &shs->constbufs[i];83208321if (res->bo == crocus_resource_bo(cbuf->buffer)) {8322ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS << s;8323}8324}8325}83268327if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {8328uint32_t bound_ssbos = shs->bound_ssbos;8329while (bound_ssbos) {8330const int i = u_bit_scan(&bound_ssbos);8331struct pipe_shader_buffer *ssbo = &shs->ssbo[i];83328333if (res->bo == crocus_resource_bo(ssbo->buffer)) {8334struct pipe_shader_buffer buf = {8335.buffer = &res->base.b,8336.buffer_offset = ssbo->buffer_offset,8337.buffer_size = ssbo->buffer_size,8338};8339crocus_set_shader_buffers(ctx, p_stage, i, 1, &buf,8340(shs->writable_ssbos >> i) & 1);8341}8342}8343}83448345if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {8346uint32_t bound_sampler_views = shs->bound_sampler_views;8347while (bound_sampler_views) {8348const int i = u_bit_scan(&bound_sampler_views);8349struct crocus_sampler_view *isv = shs->textures[i];8350struct crocus_bo *bo = isv->res->bo;83518352if (res->bo == bo) {8353ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;8354}8355}8356}83578358if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {8359uint32_t bound_image_views = shs->bound_image_views;8360while (bound_image_views) {8361const int i = u_bit_scan(&bound_image_views);8362struct crocus_image_view *iv = &shs->image[i];8363struct crocus_bo *bo = crocus_resource_bo(iv->base.resource);83648365if (res->bo == bo)8366ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << s;8367}8368}8369}8370}83718372/* ------------------------------------------------------------------- */83738374static unsigned8375flags_to_post_sync_op(uint32_t flags)8376{8377if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)8378return WriteImmediateData;83798380if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)8381return WritePSDepthCount;83828383if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)8384return WriteTimestamp;83858386return 0;8387}83888389/*8390* Do the given flags have a Post Sync or LRI Post Sync operation?8391*/8392static enum pipe_control_flags8393get_post_sync_flags(enum pipe_control_flags flags)8394{8395flags &= PIPE_CONTROL_WRITE_IMMEDIATE |8396PIPE_CONTROL_WRITE_DEPTH_COUNT |8397PIPE_CONTROL_WRITE_TIMESTAMP |8398PIPE_CONTROL_LRI_POST_SYNC_OP;83998400/* Only one "Post Sync Op" is allowed, and it's mutually exclusive with8401* "LRI Post Sync Operation". So more than one bit set would be illegal.8402*/8403assert(util_bitcount(flags) <= 1);84048405return flags;8406}84078408#define IS_COMPUTE_PIPELINE(batch) (batch->name == CROCUS_BATCH_COMPUTE)84098410/**8411* Emit a series of PIPE_CONTROL commands, taking into account any8412* workarounds necessary to actually accomplish the caller's request.8413*8414* Unless otherwise noted, spec quotations in this function come from:8415*8416* Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming8417* Restrictions for PIPE_CONTROL.8418*8419* You should not use this function directly. Use the helpers in8420* crocus_pipe_control.c instead, which may split the pipe control further.8421*/8422static void8423crocus_emit_raw_pipe_control(struct crocus_batch *batch,8424const char *reason,8425uint32_t flags,8426struct crocus_bo *bo,8427uint32_t offset,8428uint64_t imm)8429{8430UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;8431enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);8432UNUSED enum pipe_control_flags non_lri_post_sync_flags =8433post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;84348435/* Recursive PIPE_CONTROL workarounds --------------------------------8436* (http://knowyourmeme.com/memes/xzibit-yo-dawg)8437*8438* We do these first because we want to look at the original operation,8439* rather than any workarounds we set.8440*/84418442/* "Flush Types" workarounds ---------------------------------------------8443* We do these now because they may add post-sync operations or CS stalls.8444*/84458446if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {8447/* Hardware workaround: SNB B-Spec says:8448*8449* "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush8450* Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is8451* required."8452*/8453crocus_emit_post_sync_nonzero_flush(batch);8454}84558456#if GFX_VER == 88457if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {8458/* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate8459*8460* "'Post Sync Operation' must be enabled to 'Write Immediate Data' or8461* 'Write PS Depth Count' or 'Write Timestamp'."8462*/8463if (!bo) {8464flags |= PIPE_CONTROL_WRITE_IMMEDIATE;8465post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;8466non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;8467bo = batch->ice->workaround_bo;8468offset = batch->ice->workaround_offset;8469}8470}8471#endif84728473#if GFX_VERx10 < 758474if (flags & PIPE_CONTROL_DEPTH_STALL) {8475/* Project: PRE-HSW / Argument: Depth Stall8476*8477* "The following bits must be clear:8478* - Render Target Cache Flush Enable ([12] of DW1)8479* - Depth Cache Flush Enable ([0] of DW1)"8480*/8481assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |8482PIPE_CONTROL_DEPTH_CACHE_FLUSH)));8483}8484#endif8485if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {8486/* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):8487*8488* "This bit must be DISABLED for operations other than writing8489* PS_DEPTH_COUNT."8490*8491* This seems like nonsense. An Ivybridge workaround requires us to8492* emit a PIPE_CONTROL with a depth stall and write immediate post-sync8493* operation. Gen8+ requires us to emit depth stalls and depth cache8494* flushes together. So, it's hard to imagine this means anything other8495* than "we originally intended this to be used for PS_DEPTH_COUNT".8496*8497* We ignore the supposed restriction and do nothing.8498*/8499}85008501if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {8502/* Project: PRE-HSW / Argument: Depth Cache Flush8503*8504* "Depth Stall must be clear ([13] of DW1)."8505*/8506assert(!(flags & PIPE_CONTROL_DEPTH_STALL));8507}85088509if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |8510PIPE_CONTROL_STALL_AT_SCOREBOARD)) {8511/* From the PIPE_CONTROL instruction table, bit 12 and bit 1:8512*8513* "This bit must be DISABLED for End-of-pipe (Read) fences,8514* PS_DEPTH_COUNT or TIMESTAMP queries."8515*8516* TODO: Implement end-of-pipe checking.8517*/8518assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |8519PIPE_CONTROL_WRITE_TIMESTAMP)));8520}85218522if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) {8523/* From the PIPE_CONTROL instruction table, bit 1:8524*8525* "This bit is ignored if Depth Stall Enable is set.8526* Further, the render cache is not flushed even if Write Cache8527* Flush Enable bit is set."8528*8529* We assert that the caller doesn't do this combination, to try and8530* prevent mistakes. It shouldn't hurt the GPU, though.8531*8532* We skip this check on Gen11+ as the "Stall at Pixel Scoreboard"8533* and "Render Target Flush" combo is explicitly required for BTI8534* update workarounds.8535*/8536assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |8537PIPE_CONTROL_RENDER_TARGET_FLUSH)));8538}85398540/* PIPE_CONTROL page workarounds ------------------------------------- */85418542if (GFX_VER >= 7 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {8543/* From the PIPE_CONTROL page itself:8544*8545* "IVB, HSW, BDW8546* Restriction: Pipe_control with CS-stall bit set must be issued8547* before a pipe-control command that has the State Cache8548* Invalidate bit set."8549*/8550flags |= PIPE_CONTROL_CS_STALL;8551}85528553if ((GFX_VERx10 == 75)) {8554/* From the PIPE_CONTROL page itself:8555*8556* "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:8557* Prior to programming a PIPECONTROL command with any of the RO8558* cache invalidation bit set, program a PIPECONTROL flush command8559* with “CS stall” bit and “HDC Flush” bit set."8560*8561* TODO: Actually implement this. What's an HDC Flush?8562*/8563}85648565if (flags & PIPE_CONTROL_FLUSH_LLC) {8566/* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):8567*8568* "Project: ALL8569* SW must always program Post-Sync Operation to "Write Immediate8570* Data" when Flush LLC is set."8571*8572* For now, we just require the caller to do it.8573*/8574assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);8575}85768577/* "Post-Sync Operation" workarounds -------------------------------- */85788579/* Project: All / Argument: Global Snapshot Count Reset [19]8580*8581* "This bit must not be exercised on any product.8582* Requires stall bit ([20] of DW1) set."8583*8584* We don't use this, so we just assert that it isn't used. The8585* PIPE_CONTROL instruction page indicates that they intended this8586* as a debug feature and don't think it is useful in production,8587* but it may actually be usable, should we ever want to.8588*/8589assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);85908591if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |8592PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {8593/* Project: All / Arguments:8594*8595* - Generic Media State Clear [16]8596* - Indirect State Pointers Disable [16]8597*8598* "Requires stall bit ([20] of DW1) set."8599*8600* Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media8601* State Clear) says:8602*8603* "PIPECONTROL command with “Command Streamer Stall Enable” must be8604* programmed prior to programming a PIPECONTROL command with "Media8605* State Clear" set in GPGPU mode of operation"8606*8607* This is a subset of the earlier rule, so there's nothing to do.8608*/8609flags |= PIPE_CONTROL_CS_STALL;8610}86118612if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {8613/* Project: All / Argument: Store Data Index8614*8615* "Post-Sync Operation ([15:14] of DW1) must be set to something other8616* than '0'."8617*8618* For now, we just assert that the caller does this. We might want to8619* automatically add a write to the workaround BO...8620*/8621assert(non_lri_post_sync_flags != 0);8622}86238624if (flags & PIPE_CONTROL_SYNC_GFDT) {8625/* Project: All / Argument: Sync GFDT8626*8627* "Post-Sync Operation ([15:14] of DW1) must be set to something other8628* than '0' or 0x2520[13] must be set."8629*8630* For now, we just assert that the caller does this.8631*/8632assert(non_lri_post_sync_flags != 0);8633}86348635if (GFX_VER >= 6 && GFX_VER < 8 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {8636/* Project: SNB, IVB, HSW / Argument: TLB inv8637*8638* "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)8639* must be set to something other than '0'."8640*8641* For now, we just assert that the caller does this.8642*/8643assert(non_lri_post_sync_flags != 0);8644}86458646if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {8647/* Project: IVB+ / Argument: TLB inv8648*8649* "Requires stall bit ([20] of DW1) set."8650*8651* Also, from the PIPE_CONTROL instruction table:8652*8653* "Project: SKL+8654* Post Sync Operation or CS stall must be set to ensure a TLB8655* invalidation occurs. Otherwise no cycle will occur to the TLB8656* cache to invalidate."8657*8658* This is not a subset of the earlier rule, so there's nothing to do.8659*/8660flags |= PIPE_CONTROL_CS_STALL;8661}8662#if GFX_VER == 88663if (IS_COMPUTE_PIPELINE(batch)) {8664if (post_sync_flags ||8665(flags & (PIPE_CONTROL_NOTIFY_ENABLE |8666PIPE_CONTROL_DEPTH_STALL |8667PIPE_CONTROL_RENDER_TARGET_FLUSH |8668PIPE_CONTROL_DEPTH_CACHE_FLUSH |8669PIPE_CONTROL_DATA_CACHE_FLUSH))) {8670/* Project: BDW / Arguments:8671*8672* - LRI Post Sync Operation [23]8673* - Post Sync Op [15:14]8674* - Notify En [8]8675* - Depth Stall [13]8676* - Render Target Cache Flush [12]8677* - Depth Cache Flush [0]8678* - DC Flush Enable [5]8679*8680* "Requires stall bit ([20] of DW) set for all GPGPU and Media8681* Workloads."8682*8683* (The docs have separate table rows for each bit, with essentially8684* the same workaround text. We've combined them here.)8685*/8686flags |= PIPE_CONTROL_CS_STALL;86878688/* Also, from the PIPE_CONTROL instruction table, bit 20:8689*8690* "Project: BDW8691* This bit must be always set when PIPE_CONTROL command is8692* programmed by GPGPU and MEDIA workloads, except for the cases8693* when only Read Only Cache Invalidation bits are set (State8694* Cache Invalidation Enable, Instruction cache Invalidation8695* Enable, Texture Cache Invalidation Enable, Constant Cache8696* Invalidation Enable). This is to WA FFDOP CG issue, this WA8697* need not implemented when FF_DOP_CG is disable via "Fixed8698* Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."8699*8700* It sounds like we could avoid CS stalls in some cases, but we8701* don't currently bother. This list isn't exactly the list above,8702* either...8703*/8704}8705}8706#endif8707/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:8708*8709* "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with8710* only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."8711*8712* Note that the kernel does CS stalls between batches, so we only need8713* to count them within a batch. We currently naively count every 4, and8714* don't skip the ones with only read-cache-invalidate bits set. This8715* may or may not be a problem...8716*/8717if (GFX_VER == 7 && !(GFX_VERx10 == 75)) {8718if (flags & PIPE_CONTROL_CS_STALL) {8719/* If we're doing a CS stall, reset the counter and carry on. */8720batch->pipe_controls_since_last_cs_stall = 0;8721}87228723/* If this is the fourth pipe control without a CS stall, do one now. */8724if (++batch->pipe_controls_since_last_cs_stall == 4) {8725batch->pipe_controls_since_last_cs_stall = 0;8726flags |= PIPE_CONTROL_CS_STALL;8727}8728}87298730/* "Stall" workarounds ----------------------------------------------8731* These have to come after the earlier ones because we may have added8732* some additional CS stalls above.8733*/87348735if (flags & PIPE_CONTROL_CS_STALL) {8736/* Project: PRE-SKL, VLV, CHV8737*8738* "[All Stepping][All SKUs]:8739*8740* One of the following must also be set:8741*8742* - Render Target Cache Flush Enable ([12] of DW1)8743* - Depth Cache Flush Enable ([0] of DW1)8744* - Stall at Pixel Scoreboard ([1] of DW1)8745* - Depth Stall ([13] of DW1)8746* - Post-Sync Operation ([13] of DW1)8747* - DC Flush Enable ([5] of DW1)"8748*8749* If we don't already have one of those bits set, we choose to add8750* "Stall at Pixel Scoreboard". Some of the other bits require a8751* CS stall as a workaround (see above), which would send us into8752* an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"8753* appears to be safe, so we choose that.8754*/8755const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |8756PIPE_CONTROL_DEPTH_CACHE_FLUSH |8757PIPE_CONTROL_WRITE_IMMEDIATE |8758PIPE_CONTROL_WRITE_DEPTH_COUNT |8759PIPE_CONTROL_WRITE_TIMESTAMP |8760PIPE_CONTROL_STALL_AT_SCOREBOARD |8761PIPE_CONTROL_DEPTH_STALL |8762PIPE_CONTROL_DATA_CACHE_FLUSH;8763if (!(flags & wa_bits))8764flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;8765}87668767/* Emit --------------------------------------------------------------- */87688769if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) {8770fprintf(stderr,8771" PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",8772(flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",8773(flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",8774(flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",8775(flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) ? "VF " : "",8776(flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ? "RT " : "",8777(flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE) ? "Const " : "",8778(flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",8779(flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",8780(flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",8781(flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",8782(flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",8783(flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",8784(flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE) ? "Inst " : "",8785(flags & PIPE_CONTROL_MEDIA_STATE_CLEAR) ? "MediaClear " : "",8786(flags & PIPE_CONTROL_NOTIFY_ENABLE) ? "Notify " : "",8787(flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) ?8788"SnapRes" : "",8789(flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE) ?8790"ISPDis" : "",8791(flags & PIPE_CONTROL_WRITE_IMMEDIATE) ? "WriteImm " : "",8792(flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) ? "WriteZCount " : "",8793(flags & PIPE_CONTROL_WRITE_TIMESTAMP) ? "WriteTimestamp " : "",8794imm, reason);8795}87968797crocus_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {8798#if GFX_VER >= 78799pc.LRIPostSyncOperation = NoLRIOperation;8800pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;8801pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;8802#endif8803#if GFX_VER >= 68804pc.StoreDataIndex = 0;8805pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;8806pc.GlobalSnapshotCountReset =8807flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;8808pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;8809pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;8810pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;8811pc.RenderTargetCacheFlushEnable =8812flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;8813pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;8814pc.StateCacheInvalidationEnable =8815flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;8816pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;8817pc.ConstantCacheInvalidationEnable =8818flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;8819#else8820pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;8821#endif8822pc.PostSyncOperation = flags_to_post_sync_op(flags);8823pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;8824pc.InstructionCacheInvalidateEnable =8825flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;8826pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;8827#if GFX_VER >= 5 || GFX_VERx10 == 458828pc.IndirectStatePointersDisable =8829flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;8830#endif8831#if GFX_VER >= 68832pc.TextureCacheInvalidationEnable =8833flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;8834#elif GFX_VER == 5 || GFX_VERx10 == 458835pc.TextureCacheFlushEnable =8836flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;8837#endif8838pc.Address = ggtt_bo(bo, offset);8839if (GFX_VER < 7 && bo)8840pc.DestinationAddressType = DAT_GGTT;8841pc.ImmediateData = imm;8842}8843}88448845#if GFX_VER == 68846void8847genX(crocus_upload_urb)(struct crocus_batch *batch,8848unsigned vs_size,8849bool gs_present,8850unsigned gs_size)8851{8852struct crocus_context *ice = batch->ice;8853int nr_vs_entries, nr_gs_entries;8854int total_urb_size = ice->urb.size * 1024; /* in bytes */8855const struct intel_device_info *devinfo = &batch->screen->devinfo;88568857/* Calculate how many entries fit in each stage's section of the URB */8858if (gs_present) {8859nr_vs_entries = (total_urb_size/2) / (vs_size * 128);8860nr_gs_entries = (total_urb_size/2) / (gs_size * 128);8861} else {8862nr_vs_entries = total_urb_size / (vs_size * 128);8863nr_gs_entries = 0;8864}88658866/* Then clamp to the maximum allowed by the hardware */8867if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX])8868nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX];88698870if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY])8871nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY];88728873/* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */8874ice->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);8875ice->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);88768877assert(ice->urb.nr_vs_entries >=8878devinfo->urb.min_entries[MESA_SHADER_VERTEX]);8879assert(ice->urb.nr_vs_entries % 4 == 0);8880assert(ice->urb.nr_gs_entries % 4 == 0);8881assert(vs_size <= 5);8882assert(gs_size <= 5);88838884crocus_emit_cmd(batch, GENX(3DSTATE_URB), urb) {8885urb.VSNumberofURBEntries = ice->urb.nr_vs_entries;8886urb.VSURBEntryAllocationSize = vs_size - 1;88878888urb.GSNumberofURBEntries = ice->urb.nr_gs_entries;8889urb.GSURBEntryAllocationSize = gs_size - 1;8890};8891/* From the PRM Volume 2 part 1, section 1.4.7:8892*8893* Because of a urb corruption caused by allocating a previous gsunit’s8894* urb entry to vsunit software is required to send a "GS NULL8895* Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus8896* a dummy DRAW call before any case where VS will be taking over GS URB8897* space.8898*8899* It is not clear exactly what this means ("URB fence" is a command that8900* doesn't exist on Gen6). So for now we just do a full pipeline flush as8901* a workaround.8902*/8903if (ice->urb.gs_present && !gs_present)8904crocus_emit_mi_flush(batch);8905ice->urb.gs_present = gs_present;8906}8907#endif89088909static void8910crocus_lost_genx_state(struct crocus_context *ice, struct crocus_batch *batch)8911{8912}89138914static void8915crocus_emit_mi_report_perf_count(struct crocus_batch *batch,8916struct crocus_bo *bo,8917uint32_t offset_in_bytes,8918uint32_t report_id)8919{8920#if GFX_VER >= 78921crocus_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {8922mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes);8923mi_rpc.ReportID = report_id;8924}8925#endif8926}89278928/**8929* From the PRM, Volume 2a:8930*8931* "Indirect State Pointers Disable8932*8933* At the completion of the post-sync operation associated with this pipe8934* control packet, the indirect state pointers in the hardware are8935* considered invalid; the indirect pointers are not saved in the context.8936* If any new indirect state commands are executed in the command stream8937* while the pipe control is pending, the new indirect state commands are8938* preserved.8939*8940* [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context8941* restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant8942* commands are only considered as Indirect State Pointers. Once ISP is8943* issued in a context, SW must initialize by programming push constant8944* commands for all the shaders (at least to zero length) before attempting8945* any rendering operation for the same context."8946*8947* 3DSTATE_CONSTANT_* packets are restored during a context restore,8948* even though they point to a BO that has been already unreferenced at8949* the end of the previous batch buffer. This has been fine so far since8950* we are protected by these scratch page (every address not covered by8951* a BO should be pointing to the scratch page). But on CNL, it is8952* causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*8953* instruction.8954*8955* The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the8956* hardware to ignore previous 3DSTATE_CONSTANT_* packets during a8957* context restore, so the mentioned hang doesn't happen. However,8958* software must program push constant commands for all stages prior to8959* rendering anything, so we flag them as dirty.8960*8961* Finally, we also make sure to stall at pixel scoreboard to make sure the8962* constants have been loaded into the EUs prior to disable the push constants8963* so that it doesn't hang a previous 3DPRIMITIVE.8964*/8965#if GFX_VER >= 78966static void8967gen7_emit_isp_disable(struct crocus_batch *batch)8968{8969crocus_emit_raw_pipe_control(batch, "isp disable",8970PIPE_CONTROL_STALL_AT_SCOREBOARD |8971PIPE_CONTROL_CS_STALL,8972NULL, 0, 0);8973crocus_emit_raw_pipe_control(batch, "isp disable",8974PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |8975PIPE_CONTROL_CS_STALL,8976NULL, 0, 0);89778978struct crocus_context *ice = batch->ice;8979ice->state.stage_dirty |= (CROCUS_STAGE_DIRTY_CONSTANTS_VS |8980CROCUS_STAGE_DIRTY_CONSTANTS_TCS |8981CROCUS_STAGE_DIRTY_CONSTANTS_TES |8982CROCUS_STAGE_DIRTY_CONSTANTS_GS |8983CROCUS_STAGE_DIRTY_CONSTANTS_FS);8984}8985#endif89868987#if GFX_VER >= 78988static void8989crocus_state_finish_batch(struct crocus_batch *batch)8990{8991#if GFX_VERx10 == 758992if (batch->name == CROCUS_BATCH_RENDER) {8993crocus_emit_mi_flush(batch);8994crocus_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {8995ptr.ColorCalcStatePointer = batch->ice->shaders.cc_offset;8996}89978998crocus_emit_pipe_control_flush(batch, "hsw wa", PIPE_CONTROL_RENDER_TARGET_FLUSH |8999PIPE_CONTROL_CS_STALL);9000}9001#endif9002gen7_emit_isp_disable(batch);9003}9004#endif90059006static void9007crocus_batch_reset_dirty(struct crocus_batch *batch)9008{9009/* unreference any index buffer so it get reemitted. */9010pipe_resource_reference(&batch->ice->state.index_buffer.res, NULL);90119012/* for GEN4/5 need to reemit anything that ends up in the state batch that points to anything in the state batch9013* as the old state batch won't still be available.9014*/9015batch->ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER |9016CROCUS_DIRTY_COLOR_CALC_STATE;90179018batch->ice->state.dirty |= CROCUS_DIRTY_VERTEX_ELEMENTS | CROCUS_DIRTY_VERTEX_BUFFERS;90199020batch->ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;9021batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS;9022batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES;9023batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS;9024batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS;9025batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS;9026batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS;90279028batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_VS;9029batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES;9030batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;9031batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_GS;9032batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_FS;9033batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;90349035batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;9036batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS;9037batch->ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS;9038batch->ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT | CROCUS_DIRTY_SF_CL_VIEWPORT;90399040#if GFX_VER >= 69041/* SCISSOR_STATE */9042batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_BLEND_STATE;9043batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_SCISSOR_RECT;9044batch->ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;90459046#endif9047#if GFX_VER <= 59048/* dirty the SF state on gen4/5 */9049batch->ice->state.dirty |= CROCUS_DIRTY_RASTER;9050batch->ice->state.dirty |= CROCUS_DIRTY_GEN4_CURBE;9051batch->ice->state.dirty |= CROCUS_DIRTY_CLIP;9052batch->ice->state.dirty |= CROCUS_DIRTY_WM;9053#endif9054#if GFX_VER >= 79055/* Streamout dirty */9056batch->ice->state.dirty |= CROCUS_DIRTY_STREAMOUT;9057batch->ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST;9058batch->ice->state.dirty |= CROCUS_DIRTY_GEN7_SO_BUFFERS;9059#endif9060}90619062#if GFX_VERx10 == 759063struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ice)9064{9065return &ice->state.cso_rast->cso;9066}9067#endif90689069#if GFX_VER >= 69070static void update_so_strides(struct crocus_context *ice,9071uint16_t *strides)9072{9073for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {9074struct crocus_stream_output_target *so = (void *)ice->state.so_target[i];9075if (so)9076so->stride = strides[i] * sizeof(uint32_t);9077}9078}9079#endif90809081static void crocus_fill_clamp_mask(const struct crocus_sampler_state *samp,9082int s,9083uint32_t *clamp_mask)9084{9085#if GFX_VER < 89086if (samp->pstate.min_img_filter != PIPE_TEX_FILTER_NEAREST &&9087samp->pstate.mag_img_filter != PIPE_TEX_FILTER_NEAREST) {9088if (samp->pstate.wrap_s == PIPE_TEX_WRAP_CLAMP)9089clamp_mask[0] |= (1 << s);9090if (samp->pstate.wrap_t == PIPE_TEX_WRAP_CLAMP)9091clamp_mask[1] |= (1 << s);9092if (samp->pstate.wrap_r == PIPE_TEX_WRAP_CLAMP)9093clamp_mask[2] |= (1 << s);9094}9095#endif9096}90979098static void9099crocus_set_frontend_noop(struct pipe_context *ctx, bool enable)9100{9101struct crocus_context *ice = (struct crocus_context *) ctx;91029103if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_RENDER], enable)) {9104ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER;9105ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;9106}91079108if (ice->batch_count == 1)9109return;91109111if (crocus_batch_prepare_noop(&ice->batches[CROCUS_BATCH_COMPUTE], enable)) {9112ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;9113ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;9114}9115}91169117void9118genX(crocus_init_screen_state)(struct crocus_screen *screen)9119{9120assert(screen->devinfo.verx10 == GFX_VERx10);9121screen->vtbl.destroy_state = crocus_destroy_state;9122screen->vtbl.init_render_context = crocus_init_render_context;9123screen->vtbl.upload_render_state = crocus_upload_render_state;9124#if GFX_VER >= 79125screen->vtbl.init_compute_context = crocus_init_compute_context;9126screen->vtbl.upload_compute_state = crocus_upload_compute_state;9127#endif9128screen->vtbl.emit_raw_pipe_control = crocus_emit_raw_pipe_control;9129screen->vtbl.emit_mi_report_perf_count = crocus_emit_mi_report_perf_count;9130screen->vtbl.rebind_buffer = crocus_rebind_buffer;9131#if GFX_VERx10 >= 759132screen->vtbl.load_register_reg32 = crocus_load_register_reg32;9133screen->vtbl.load_register_reg64 = crocus_load_register_reg64;9134screen->vtbl.load_register_imm32 = crocus_load_register_imm32;9135screen->vtbl.load_register_imm64 = crocus_load_register_imm64;9136screen->vtbl.store_data_imm32 = crocus_store_data_imm32;9137screen->vtbl.store_data_imm64 = crocus_store_data_imm64;9138#endif9139#if GFX_VER >= 79140screen->vtbl.load_register_mem32 = crocus_load_register_mem32;9141screen->vtbl.load_register_mem64 = crocus_load_register_mem64;9142screen->vtbl.copy_mem_mem = crocus_copy_mem_mem;9143screen->vtbl.create_so_decl_list = crocus_create_so_decl_list;9144#endif9145screen->vtbl.update_surface_base_address = crocus_update_surface_base_address;9146#if GFX_VER >= 69147screen->vtbl.store_register_mem32 = crocus_store_register_mem32;9148screen->vtbl.store_register_mem64 = crocus_store_register_mem64;9149#endif9150screen->vtbl.populate_vs_key = crocus_populate_vs_key;9151screen->vtbl.populate_tcs_key = crocus_populate_tcs_key;9152screen->vtbl.populate_tes_key = crocus_populate_tes_key;9153screen->vtbl.populate_gs_key = crocus_populate_gs_key;9154screen->vtbl.populate_fs_key = crocus_populate_fs_key;9155screen->vtbl.populate_cs_key = crocus_populate_cs_key;9156screen->vtbl.lost_genx_state = crocus_lost_genx_state;9157#if GFX_VER >= 79158screen->vtbl.finish_batch = crocus_state_finish_batch;9159#endif9160#if GFX_VER <= 59161screen->vtbl.upload_urb_fence = crocus_upload_urb_fence;9162screen->vtbl.calculate_urb_fence = crocus_calculate_urb_fence;9163#endif9164screen->vtbl.fill_clamp_mask = crocus_fill_clamp_mask;9165screen->vtbl.batch_reset_dirty = crocus_batch_reset_dirty;9166screen->vtbl.translate_prim_type = translate_prim_type;9167#if GFX_VER >= 69168screen->vtbl.update_so_strides = update_so_strides;9169screen->vtbl.get_so_offset = crocus_get_so_offset;9170#endif91719172genX(crocus_init_blt)(screen);9173}91749175void9176genX(crocus_init_state)(struct crocus_context *ice)9177{9178struct pipe_context *ctx = &ice->ctx;91799180ctx->create_blend_state = crocus_create_blend_state;9181ctx->create_depth_stencil_alpha_state = crocus_create_zsa_state;9182ctx->create_rasterizer_state = crocus_create_rasterizer_state;9183ctx->create_sampler_state = crocus_create_sampler_state;9184ctx->create_sampler_view = crocus_create_sampler_view;9185ctx->create_surface = crocus_create_surface;9186ctx->create_vertex_elements_state = crocus_create_vertex_elements;9187ctx->bind_blend_state = crocus_bind_blend_state;9188ctx->bind_depth_stencil_alpha_state = crocus_bind_zsa_state;9189ctx->bind_sampler_states = crocus_bind_sampler_states;9190ctx->bind_rasterizer_state = crocus_bind_rasterizer_state;9191ctx->bind_vertex_elements_state = crocus_bind_vertex_elements_state;9192ctx->delete_blend_state = crocus_delete_state;9193ctx->delete_depth_stencil_alpha_state = crocus_delete_state;9194ctx->delete_rasterizer_state = crocus_delete_state;9195ctx->delete_sampler_state = crocus_delete_state;9196ctx->delete_vertex_elements_state = crocus_delete_state;9197ctx->set_blend_color = crocus_set_blend_color;9198ctx->set_clip_state = crocus_set_clip_state;9199ctx->set_constant_buffer = crocus_set_constant_buffer;9200ctx->set_shader_buffers = crocus_set_shader_buffers;9201ctx->set_shader_images = crocus_set_shader_images;9202ctx->set_sampler_views = crocus_set_sampler_views;9203ctx->set_tess_state = crocus_set_tess_state;9204ctx->set_framebuffer_state = crocus_set_framebuffer_state;9205ctx->set_polygon_stipple = crocus_set_polygon_stipple;9206ctx->set_sample_mask = crocus_set_sample_mask;9207ctx->set_scissor_states = crocus_set_scissor_states;9208ctx->set_stencil_ref = crocus_set_stencil_ref;9209ctx->set_vertex_buffers = crocus_set_vertex_buffers;9210ctx->set_viewport_states = crocus_set_viewport_states;9211ctx->sampler_view_destroy = crocus_sampler_view_destroy;9212ctx->surface_destroy = crocus_surface_destroy;9213ctx->draw_vbo = crocus_draw_vbo;9214ctx->launch_grid = crocus_launch_grid;92159216ctx->set_frontend_noop = crocus_set_frontend_noop;92179218#if GFX_VER >= 69219ctx->create_stream_output_target = crocus_create_stream_output_target;9220ctx->stream_output_target_destroy = crocus_stream_output_target_destroy;9221ctx->set_stream_output_targets = crocus_set_stream_output_targets;9222#endif92239224ice->state.dirty = ~0ull;9225ice->state.stage_dirty = ~0ull;92269227ice->state.statistics_counters_enabled = true;92289229ice->state.sample_mask = 0xff;9230ice->state.num_viewports = 1;9231ice->state.prim_mode = PIPE_PRIM_MAX;9232ice->state.genx = calloc(1, sizeof(struct crocus_genx_state));9233ice->draw.derived_params.drawid = -1;92349235/* Default all scissor rectangles to be empty regions. */9236for (int i = 0; i < CROCUS_MAX_VIEWPORTS; i++) {9237ice->state.scissors[i] = (struct pipe_scissor_state) {9238.minx = 1, .maxx = 0, .miny = 1, .maxy = 0,9239};9240}9241}924292439244