Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_program.c
4570 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file crocus_program.c24*25* This file contains the driver interface for compiling shaders.26*27* See crocus_program_cache.c for the in-memory program cache where the28* compiled shaders are stored.29*/3031#include <stdio.h>32#include <errno.h>33#include "pipe/p_defines.h"34#include "pipe/p_state.h"35#include "pipe/p_context.h"36#include "pipe/p_screen.h"37#include "util/u_atomic.h"38#include "util/u_upload_mgr.h"39#include "util/debug.h"40#include "util/u_prim.h"41#include "compiler/nir/nir.h"42#include "compiler/nir/nir_builder.h"43#include "compiler/nir/nir_serialize.h"44#include "intel/compiler/brw_compiler.h"45#include "intel/compiler/brw_nir.h"46#include "crocus_context.h"47#include "nir/tgsi_to_nir.h"4849#define KEY_INIT_NO_ID() \50.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \51.base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \52.base.tex.compressed_multisample_layout_mask = ~053#define KEY_INIT() .base.program_string_id = ish->program_id, KEY_INIT_NO_ID()5455static void56crocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key)57{58key->gather_channel_quirk_mask = 0;59for (unsigned s = 0; s < MAX_SAMPLERS; s++) {60key->swizzles[s] = SWIZZLE_NOOP;61key->gfx6_gather_wa[s] = 0;62}63}6465static uint32_t66crocus_get_texture_swizzle(const struct crocus_context *ice,67const struct crocus_sampler_view *t)68{69uint32_t swiz = 0;7071for (int i = 0; i < 4; i++) {72swiz |= t->swizzle[i] << (i * 3);73}74return swiz;75}7677static inline bool can_push_ubo(const struct intel_device_info *devinfo)78{79/* push works for everyone except SNB at the moment */80return devinfo->ver != 6;81}8283static uint8_t84gfx6_gather_workaround(enum pipe_format pformat)85{86switch (pformat) {87case PIPE_FORMAT_R8_SINT: return WA_SIGN | WA_8BIT;88case PIPE_FORMAT_R8_UINT: return WA_8BIT;89case PIPE_FORMAT_R16_SINT: return WA_SIGN | WA_16BIT;90case PIPE_FORMAT_R16_UINT: return WA_16BIT;91default:92/* Note that even though PIPE_FORMAT_R32_SINT and93* PIPE_FORMAT_R32_UINThave format overrides in94* the surface state, there is no shader w/a required.95*/96return 0;97}98}99100static const unsigned crocus_gfx6_swizzle_for_offset[4] = {101BRW_SWIZZLE4(0, 1, 2, 3),102BRW_SWIZZLE4(1, 2, 3, 3),103BRW_SWIZZLE4(2, 3, 3, 3),104BRW_SWIZZLE4(3, 3, 3, 3)105};106107static void108gfx6_gs_xfb_setup(const struct pipe_stream_output_info *so_info,109struct brw_gs_prog_data *gs_prog_data)110{111/* Make sure that the VUE slots won't overflow the unsigned chars in112* prog_data->transform_feedback_bindings[].113*/114STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);115116/* Make sure that we don't need more binding table entries than we've117* set aside for use in transform feedback. (We shouldn't, since we118* set aside enough binding table entries to have one per component).119*/120assert(so_info->num_outputs <= BRW_MAX_SOL_BINDINGS);121122gs_prog_data->num_transform_feedback_bindings = so_info->num_outputs;123for (unsigned i = 0; i < so_info->num_outputs; i++) {124gs_prog_data->transform_feedback_bindings[i] =125so_info->output[i].register_index;126gs_prog_data->transform_feedback_swizzles[i] =127crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component];128}129}130131static void132gfx6_ff_gs_xfb_setup(const struct pipe_stream_output_info *so_info,133struct brw_ff_gs_prog_key *key)134{135key->num_transform_feedback_bindings = so_info->num_outputs;136for (unsigned i = 0; i < so_info->num_outputs; i++) {137key->transform_feedback_bindings[i] =138so_info->output[i].register_index;139key->transform_feedback_swizzles[i] =140crocus_gfx6_swizzle_for_offset[so_info->output[i].start_component];141}142}143144static void145crocus_populate_sampler_prog_key_data(struct crocus_context *ice,146const struct intel_device_info *devinfo,147gl_shader_stage stage,148struct crocus_uncompiled_shader *ish,149bool uses_texture_gather,150struct brw_sampler_prog_key_data *key)151{152struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;153uint32_t mask = ish->nir->info.textures_used[0];154155while (mask) {156const int s = u_bit_scan(&mask);157158struct crocus_sampler_view *texture = ice->state.shaders[stage].textures[s];159key->swizzles[s] = SWIZZLE_NOOP;160key->scale_factors[s] = 0.0f;161162if (!texture)163continue;164if (texture->base.target == PIPE_BUFFER)165continue;166if (devinfo->verx10 < 75) {167key->swizzles[s] = crocus_get_texture_swizzle(ice, texture);168}169170screen->vtbl.fill_clamp_mask(ice->state.shaders[stage].samplers[s], s, key->gl_clamp_mask);171172/* gather4 for RG32* is broken in multiple ways on Gen7. */173if (devinfo->ver == 7 && uses_texture_gather) {174switch (texture->base.format) {175case PIPE_FORMAT_R32G32_UINT:176case PIPE_FORMAT_R32G32_SINT: {177/* We have to override the format to R32G32_FLOAT_LD.178* This means that SCS_ALPHA and SCS_ONE will return 0x3f8179* (1.0) rather than integer 1. This needs shader hacks.180*181* On Ivybridge, we whack W (alpha) to ONE in our key's182* swizzle. On Haswell, we look at the original texture183* swizzle, and use XYZW with channels overridden to ONE,184* leaving normal texture swizzling to SCS.185*/186unsigned src_swizzle = key->swizzles[s];187for (int i = 0; i < 4; i++) {188unsigned src_comp = GET_SWZ(src_swizzle, i);189if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {190key->swizzles[i] &= ~(0x7 << (3 * i));191key->swizzles[i] |= SWIZZLE_ONE << (3 * i);192}193}194}195FALLTHROUGH;196case PIPE_FORMAT_R32G32_FLOAT:197/* The channel select for green doesn't work - we have to198* request blue. Haswell can use SCS for this, but Ivybridge199* needs a shader workaround.200*/201if (devinfo->verx10 < 75)202key->gather_channel_quirk_mask |= 1 << s;203break;204default:205break;206}207}208if (devinfo->ver == 6 && uses_texture_gather) {209key->gfx6_gather_wa[s] = gfx6_gather_workaround(texture->base.format);210}211}212}213214static void215crocus_lower_swizzles(struct nir_shader *nir,216const struct brw_sampler_prog_key_data *key_tex)217{218struct nir_lower_tex_options tex_options = { 0 };219uint32_t mask = nir->info.textures_used[0];220221while (mask) {222const int s = u_bit_scan(&mask);223224if (key_tex->swizzles[s] == SWIZZLE_NOOP)225continue;226227tex_options.swizzle_result |= (1 << s);228for (unsigned c = 0; c < 4; c++)229tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);230}231if (tex_options.swizzle_result)232nir_lower_tex(nir, &tex_options);233}234235static unsigned236get_new_program_id(struct crocus_screen *screen)237{238return p_atomic_inc_return(&screen->program_id);239}240241static nir_ssa_def *242get_aoa_deref_offset(nir_builder *b,243nir_deref_instr *deref,244unsigned elem_size)245{246unsigned array_size = elem_size;247nir_ssa_def *offset = nir_imm_int(b, 0);248249while (deref->deref_type != nir_deref_type_var) {250assert(deref->deref_type == nir_deref_type_array);251252/* This level's element size is the previous level's array size */253nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);254assert(deref->arr.index.ssa);255offset = nir_iadd(b, offset,256nir_imul(b, index, nir_imm_int(b, array_size)));257258deref = nir_deref_instr_parent(deref);259assert(glsl_type_is_array(deref->type));260array_size *= glsl_get_length(deref->type);261}262263/* Accessing an invalid surface index with the dataport can result in a264* hang. According to the spec "if the index used to select an individual265* element is negative or greater than or equal to the size of the array,266* the results of the operation are undefined but may not lead to267* termination" -- which is one of the possible outcomes of the hang.268* Clamp the index to prevent access outside of the array bounds.269*/270return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));271}272273static void274crocus_lower_storage_image_derefs(nir_shader *nir)275{276nir_function_impl *impl = nir_shader_get_entrypoint(nir);277278nir_builder b;279nir_builder_init(&b, impl);280281nir_foreach_block(block, impl) {282nir_foreach_instr_safe(instr, block) {283if (instr->type != nir_instr_type_intrinsic)284continue;285286nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);287switch (intrin->intrinsic) {288case nir_intrinsic_image_deref_load:289case nir_intrinsic_image_deref_store:290case nir_intrinsic_image_deref_atomic_add:291case nir_intrinsic_image_deref_atomic_imin:292case nir_intrinsic_image_deref_atomic_umin:293case nir_intrinsic_image_deref_atomic_imax:294case nir_intrinsic_image_deref_atomic_umax:295case nir_intrinsic_image_deref_atomic_and:296case nir_intrinsic_image_deref_atomic_or:297case nir_intrinsic_image_deref_atomic_xor:298case nir_intrinsic_image_deref_atomic_exchange:299case nir_intrinsic_image_deref_atomic_comp_swap:300case nir_intrinsic_image_deref_size:301case nir_intrinsic_image_deref_samples:302case nir_intrinsic_image_deref_load_raw_intel:303case nir_intrinsic_image_deref_store_raw_intel: {304nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);305nir_variable *var = nir_deref_instr_get_variable(deref);306307b.cursor = nir_before_instr(&intrin->instr);308nir_ssa_def *index =309nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),310get_aoa_deref_offset(&b, deref, 1));311nir_rewrite_image_intrinsic(intrin, index, false);312break;313}314315default:316break;317}318}319}320}321322// XXX: need unify_interfaces() at link time...323324/**325* Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.326*/327static bool328crocus_fix_edge_flags(nir_shader *nir)329{330if (nir->info.stage != MESA_SHADER_VERTEX) {331nir_shader_preserve_all_metadata(nir);332return false;333}334335nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,336VARYING_SLOT_EDGE);337if (!var) {338nir_shader_preserve_all_metadata(nir);339return false;340}341342var->data.mode = nir_var_shader_temp;343nir->info.outputs_written &= ~VARYING_BIT_EDGE;344nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;345nir_fixup_deref_modes(nir);346347nir_foreach_function(f, nir) {348if (f->impl) {349nir_metadata_preserve(f->impl, nir_metadata_block_index |350nir_metadata_dominance |351nir_metadata_live_ssa_defs |352nir_metadata_loop_analysis);353} else {354nir_metadata_preserve(f->impl, nir_metadata_all);355}356}357358return true;359}360361/**362* Fix an uncompiled shader's stream output info.363*364* Core Gallium stores output->register_index as a "slot" number, where365* slots are assigned consecutively to all outputs in info->outputs_written.366* This naive packing of outputs doesn't work for us - we too have slots,367* but the layout is defined by the VUE map, which we won't have until we368* compile a specific shader variant. So, we remap these and simply store369* VARYING_SLOT_* in our copy's output->register_index fields.370*371* We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W372* components of our VUE header. See brw_vue_map.c for the layout.373*/374static void375update_so_info(struct pipe_stream_output_info *so_info,376uint64_t outputs_written)377{378uint8_t reverse_map[64] = {};379unsigned slot = 0;380while (outputs_written) {381reverse_map[slot++] = u_bit_scan64(&outputs_written);382}383384for (unsigned i = 0; i < so_info->num_outputs; i++) {385struct pipe_stream_output *output = &so_info->output[i];386387/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */388output->register_index = reverse_map[output->register_index];389390/* The VUE header contains three scalar fields packed together:391* - gl_PointSize is stored in VARYING_SLOT_PSIZ.w392* - gl_Layer is stored in VARYING_SLOT_PSIZ.y393* - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z394*/395switch (output->register_index) {396case VARYING_SLOT_LAYER:397assert(output->num_components == 1);398output->register_index = VARYING_SLOT_PSIZ;399output->start_component = 1;400break;401case VARYING_SLOT_VIEWPORT:402assert(output->num_components == 1);403output->register_index = VARYING_SLOT_PSIZ;404output->start_component = 2;405break;406case VARYING_SLOT_PSIZ:407assert(output->num_components == 1);408output->start_component = 3;409break;410}411412//info->outputs_written |= 1ull << output->register_index;413}414}415416static void417setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,418unsigned offset, unsigned n)419{420assert(offset % sizeof(uint32_t) == 0);421422for (unsigned i = 0; i < n; ++i)423sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);424425for (unsigned i = n; i < 4; ++i)426sysvals[i] = BRW_PARAM_BUILTIN_ZERO;427}428429/**430* Associate NIR uniform variables with the prog_data->param[] mechanism431* used by the backend. Also, decide which UBOs we'd like to push in an432* ideal situation (though the backend can reduce this).433*/434static void435crocus_setup_uniforms(const struct brw_compiler *compiler,436void *mem_ctx,437nir_shader *nir,438struct brw_stage_prog_data *prog_data,439enum brw_param_builtin **out_system_values,440unsigned *out_num_system_values,441unsigned *out_num_cbufs)442{443UNUSED const struct intel_device_info *devinfo = compiler->devinfo;444445const unsigned CROCUS_MAX_SYSTEM_VALUES =446PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;447enum brw_param_builtin *system_values =448rzalloc_array(mem_ctx, enum brw_param_builtin, CROCUS_MAX_SYSTEM_VALUES);449unsigned num_system_values = 0;450451unsigned patch_vert_idx = -1;452unsigned ucp_idx[CROCUS_MAX_CLIP_PLANES];453unsigned img_idx[PIPE_MAX_SHADER_IMAGES];454unsigned variable_group_size_idx = -1;455memset(ucp_idx, -1, sizeof(ucp_idx));456memset(img_idx, -1, sizeof(img_idx));457458nir_function_impl *impl = nir_shader_get_entrypoint(nir);459460nir_builder b;461nir_builder_init(&b, impl);462463b.cursor = nir_before_block(nir_start_block(impl));464nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);465nir_ssa_def *temp_const_ubo_name = NULL;466467/* Turn system value intrinsics into uniforms */468nir_foreach_block(block, impl) {469nir_foreach_instr_safe(instr, block) {470if (instr->type != nir_instr_type_intrinsic)471continue;472473nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);474nir_ssa_def *offset;475476switch (intrin->intrinsic) {477case nir_intrinsic_load_constant: {478/* This one is special because it reads from the shader constant479* data and not cbuf0 which gallium uploads for us.480*/481b.cursor = nir_before_instr(instr);482nir_ssa_def *offset =483nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),484nir_intrinsic_base(intrin));485486if (temp_const_ubo_name == NULL)487temp_const_ubo_name = nir_imm_int(&b, 0);488489nir_intrinsic_instr *load_ubo =490nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);491load_ubo->num_components = intrin->num_components;492load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);493load_ubo->src[1] = nir_src_for_ssa(offset);494nir_intrinsic_set_align(load_ubo, 4, 0);495nir_intrinsic_set_range_base(load_ubo, 0);496nir_intrinsic_set_range(load_ubo, ~0);497nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,498intrin->dest.ssa.num_components,499intrin->dest.ssa.bit_size,500NULL);501nir_builder_instr_insert(&b, &load_ubo->instr);502503nir_ssa_def_rewrite_uses(&intrin->dest.ssa,504&load_ubo->dest.ssa);505nir_instr_remove(&intrin->instr);506continue;507}508case nir_intrinsic_load_user_clip_plane: {509unsigned ucp = nir_intrinsic_ucp_id(intrin);510511if (ucp_idx[ucp] == -1) {512ucp_idx[ucp] = num_system_values;513num_system_values += 4;514}515516for (int i = 0; i < 4; i++) {517system_values[ucp_idx[ucp] + i] =518BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);519}520521b.cursor = nir_before_instr(instr);522offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));523break;524}525case nir_intrinsic_load_patch_vertices_in:526if (patch_vert_idx == -1)527patch_vert_idx = num_system_values++;528529system_values[patch_vert_idx] =530BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;531532b.cursor = nir_before_instr(instr);533offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));534break;535case nir_intrinsic_image_deref_load_param_intel: {536assert(devinfo->ver < 9);537nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);538nir_variable *var = nir_deref_instr_get_variable(deref);539540if (img_idx[var->data.binding] == -1) {541/* GL only allows arrays of arrays of images. */542assert(glsl_type_is_image(glsl_without_array(var->type)));543unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));544545for (int i = 0; i < num_images; i++) {546const unsigned img = var->data.binding + i;547548img_idx[img] = num_system_values;549num_system_values += BRW_IMAGE_PARAM_SIZE;550551uint32_t *img_sv = &system_values[img_idx[img]];552553setup_vec4_image_sysval(554img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,555offsetof(struct brw_image_param, offset), 2);556setup_vec4_image_sysval(557img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,558offsetof(struct brw_image_param, size), 3);559setup_vec4_image_sysval(560img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,561offsetof(struct brw_image_param, stride), 4);562setup_vec4_image_sysval(563img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,564offsetof(struct brw_image_param, tiling), 3);565setup_vec4_image_sysval(566img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,567offsetof(struct brw_image_param, swizzling), 2);568}569}570571b.cursor = nir_before_instr(instr);572offset = nir_iadd(&b,573get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),574nir_imm_int(&b, img_idx[var->data.binding] * 4 +575nir_intrinsic_base(intrin) * 16));576break;577}578case nir_intrinsic_load_workgroup_size: {579assert(nir->info.workgroup_size_variable);580if (variable_group_size_idx == -1) {581variable_group_size_idx = num_system_values;582num_system_values += 3;583for (int i = 0; i < 3; i++) {584system_values[variable_group_size_idx + i] =585BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;586}587}588589b.cursor = nir_before_instr(instr);590offset = nir_imm_int(&b,591variable_group_size_idx * sizeof(uint32_t));592break;593}594default:595continue;596}597598unsigned comps = nir_intrinsic_dest_components(intrin);599600nir_intrinsic_instr *load =601nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);602load->num_components = comps;603load->src[0] = nir_src_for_ssa(temp_ubo_name);604load->src[1] = nir_src_for_ssa(offset);605nir_intrinsic_set_align(load, 4, 0);606nir_intrinsic_set_range_base(load, 0);607nir_intrinsic_set_range(load, ~0);608nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);609nir_builder_instr_insert(&b, &load->instr);610nir_ssa_def_rewrite_uses(&intrin->dest.ssa,611&load->dest.ssa);612nir_instr_remove(instr);613}614}615616nir_validate_shader(nir, "before remapping");617618/* Uniforms are stored in constant buffer 0, the619* user-facing UBOs are indexed by one. So if any constant buffer is620* needed, the constant buffer 0 will be needed, so account for it.621*/622unsigned num_cbufs = nir->info.num_ubos;623if (num_cbufs || nir->num_uniforms)624num_cbufs++;625626/* Place the new params in a new cbuf. */627if (num_system_values > 0) {628unsigned sysval_cbuf_index = num_cbufs;629num_cbufs++;630631system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,632num_system_values);633634nir_foreach_block(block, impl) {635nir_foreach_instr_safe(instr, block) {636if (instr->type != nir_instr_type_intrinsic)637continue;638639nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);640641if (load->intrinsic != nir_intrinsic_load_ubo)642continue;643644b.cursor = nir_before_instr(instr);645646assert(load->src[0].is_ssa);647648if (load->src[0].ssa == temp_ubo_name) {649nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);650nir_instr_rewrite_src(instr, &load->src[0],651nir_src_for_ssa(imm));652}653}654}655656/* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */657nir_opt_constant_folding(nir);658} else {659ralloc_free(system_values);660system_values = NULL;661}662663assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);664nir_validate_shader(nir, "after remap");665666/* We don't use params[] but gallium leaves num_uniforms set. We use this667* to detect when cbuf0 exists but we don't need it anymore when we get668* here. Instead, zero it out so that the back-end doesn't get confused669* when nr_params * 4 != num_uniforms != nr_params * 4.670*/671nir->num_uniforms = 0;672673/* Constant loads (if any) need to go at the end of the constant buffers so674* we need to know num_cbufs before we can lower to them.675*/676if (temp_const_ubo_name != NULL) {677nir_load_const_instr *const_ubo_index =678nir_instr_as_load_const(temp_const_ubo_name->parent_instr);679assert(const_ubo_index->def.bit_size == 32);680const_ubo_index->value[0].u32 = num_cbufs;681}682683*out_system_values = system_values;684*out_num_system_values = num_system_values;685*out_num_cbufs = num_cbufs;686}687688static const char *surface_group_names[] = {689[CROCUS_SURFACE_GROUP_RENDER_TARGET] = "render target",690[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",691[CROCUS_SURFACE_GROUP_SOL] = "streamout",692[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",693[CROCUS_SURFACE_GROUP_TEXTURE] = "texture",694[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = "texture gather",695[CROCUS_SURFACE_GROUP_UBO] = "ubo",696[CROCUS_SURFACE_GROUP_SSBO] = "ssbo",697[CROCUS_SURFACE_GROUP_IMAGE] = "image",698};699700static void701crocus_print_binding_table(FILE *fp, const char *name,702const struct crocus_binding_table *bt)703{704STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == CROCUS_SURFACE_GROUP_COUNT);705706uint32_t total = 0;707uint32_t compacted = 0;708709for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {710uint32_t size = bt->sizes[i];711total += size;712if (size)713compacted += util_bitcount64(bt->used_mask[i]);714}715716if (total == 0) {717fprintf(fp, "Binding table for %s is empty\n\n", name);718return;719}720721if (total != compacted) {722fprintf(fp, "Binding table for %s "723"(compacted to %u entries from %u entries)\n",724name, compacted, total);725} else {726fprintf(fp, "Binding table for %s (%u entries)\n", name, total);727}728729uint32_t entry = 0;730for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {731uint64_t mask = bt->used_mask[i];732while (mask) {733int index = u_bit_scan64(&mask);734fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);735}736}737fprintf(fp, "\n");738}739740enum {741/* Max elements in a surface group. */742SURFACE_GROUP_MAX_ELEMENTS = 64,743};744745static void746rewrite_src_with_bti(nir_builder *b, struct crocus_binding_table *bt,747nir_instr *instr, nir_src *src,748enum crocus_surface_group group)749{750assert(bt->sizes[group] > 0);751752b->cursor = nir_before_instr(instr);753nir_ssa_def *bti;754if (nir_src_is_const(*src)) {755uint32_t index = nir_src_as_uint(*src);756bti = nir_imm_intN_t(b, crocus_group_index_to_bti(bt, group, index),757src->ssa->bit_size);758} else {759/* Indirect usage makes all the surfaces of the group to be available,760* so we can just add the base.761*/762assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));763bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);764}765nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));766}767768static void769mark_used_with_src(struct crocus_binding_table *bt, nir_src *src,770enum crocus_surface_group group)771{772assert(bt->sizes[group] > 0);773774if (nir_src_is_const(*src)) {775uint64_t index = nir_src_as_uint(*src);776assert(index < bt->sizes[group]);777bt->used_mask[group] |= 1ull << index;778} else {779/* There's an indirect usage, we need all the surfaces. */780bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);781}782}783784static bool785skip_compacting_binding_tables(void)786{787static int skip = -1;788if (skip < 0)789skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);790return skip;791}792793/**794* Set up the binding table indices and apply to the shader.795*/796static void797crocus_setup_binding_table(const struct intel_device_info *devinfo,798struct nir_shader *nir,799struct crocus_binding_table *bt,800unsigned num_render_targets,801unsigned num_system_values,802unsigned num_cbufs,803const struct brw_sampler_prog_key_data *key)804{805const struct shader_info *info = &nir->info;806807memset(bt, 0, sizeof(*bt));808809/* Set the sizes for each surface group. For some groups, we already know810* upfront how many will be used, so mark them.811*/812if (info->stage == MESA_SHADER_FRAGMENT) {813bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;814/* All render targets used. */815bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET] =816BITFIELD64_MASK(num_render_targets);817818/* Setup render target read surface group in order to support non-coherent819* framebuffer fetch on Gfx7820*/821if (devinfo->ver >= 6 && info->outputs_read) {822bt->sizes[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;823bt->used_mask[CROCUS_SURFACE_GROUP_RENDER_TARGET_READ] =824BITFIELD64_MASK(num_render_targets);825}826} else if (info->stage == MESA_SHADER_COMPUTE) {827bt->sizes[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;828} else if (info->stage == MESA_SHADER_GEOMETRY) {829/* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform830* feedback surfaces.831*/832if (devinfo->ver == 6) {833bt->sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS;834bt->used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1;835}836}837838bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);839bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];840841if (info->uses_texture_gather && devinfo->ver < 8) {842bt->sizes[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = BITSET_LAST_BIT(info->textures_used);843bt->used_mask[CROCUS_SURFACE_GROUP_TEXTURE_GATHER] = info->textures_used[0];844}845846bt->sizes[CROCUS_SURFACE_GROUP_IMAGE] = info->num_images;847848/* Allocate an extra slot in the UBO section for NIR constants.849* Binding table compaction will remove it if unnecessary.850*851* We don't include them in crocus_compiled_shader::num_cbufs because852* they are uploaded separately from shs->constbufs[], but from a shader853* point of view, they're another UBO (at the end of the section).854*/855bt->sizes[CROCUS_SURFACE_GROUP_UBO] = num_cbufs + 1;856857bt->sizes[CROCUS_SURFACE_GROUP_SSBO] = info->num_ssbos;858859for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++)860assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);861862/* Mark surfaces used for the cases we don't have the information available863* upfront.864*/865nir_function_impl *impl = nir_shader_get_entrypoint(nir);866nir_foreach_block (block, impl) {867nir_foreach_instr (instr, block) {868if (instr->type != nir_instr_type_intrinsic)869continue;870871nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);872switch (intrin->intrinsic) {873case nir_intrinsic_load_num_workgroups:874bt->used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;875break;876877case nir_intrinsic_load_output:878if (devinfo->ver >= 6) {879mark_used_with_src(bt, &intrin->src[0],880CROCUS_SURFACE_GROUP_RENDER_TARGET_READ);881}882break;883884case nir_intrinsic_image_size:885case nir_intrinsic_image_load:886case nir_intrinsic_image_store:887case nir_intrinsic_image_atomic_add:888case nir_intrinsic_image_atomic_imin:889case nir_intrinsic_image_atomic_umin:890case nir_intrinsic_image_atomic_imax:891case nir_intrinsic_image_atomic_umax:892case nir_intrinsic_image_atomic_and:893case nir_intrinsic_image_atomic_or:894case nir_intrinsic_image_atomic_xor:895case nir_intrinsic_image_atomic_exchange:896case nir_intrinsic_image_atomic_comp_swap:897case nir_intrinsic_image_load_raw_intel:898case nir_intrinsic_image_store_raw_intel:899mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_IMAGE);900break;901902case nir_intrinsic_load_ubo:903mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_UBO);904break;905906case nir_intrinsic_store_ssbo:907mark_used_with_src(bt, &intrin->src[1], CROCUS_SURFACE_GROUP_SSBO);908break;909910case nir_intrinsic_get_ssbo_size:911case nir_intrinsic_ssbo_atomic_add:912case nir_intrinsic_ssbo_atomic_imin:913case nir_intrinsic_ssbo_atomic_umin:914case nir_intrinsic_ssbo_atomic_imax:915case nir_intrinsic_ssbo_atomic_umax:916case nir_intrinsic_ssbo_atomic_and:917case nir_intrinsic_ssbo_atomic_or:918case nir_intrinsic_ssbo_atomic_xor:919case nir_intrinsic_ssbo_atomic_exchange:920case nir_intrinsic_ssbo_atomic_comp_swap:921case nir_intrinsic_ssbo_atomic_fmin:922case nir_intrinsic_ssbo_atomic_fmax:923case nir_intrinsic_ssbo_atomic_fcomp_swap:924case nir_intrinsic_load_ssbo:925mark_used_with_src(bt, &intrin->src[0], CROCUS_SURFACE_GROUP_SSBO);926break;927928default:929break;930}931}932}933934/* When disable we just mark everything as used. */935if (unlikely(skip_compacting_binding_tables())) {936for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++)937bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);938}939940/* Calculate the offsets and the binding table size based on the used941* surfaces. After this point, the functions to go between "group indices"942* and binding table indices can be used.943*/944uint32_t next = 0;945for (int i = 0; i < CROCUS_SURFACE_GROUP_COUNT; i++) {946if (bt->used_mask[i] != 0) {947bt->offsets[i] = next;948next += util_bitcount64(bt->used_mask[i]);949}950}951bt->size_bytes = next * 4;952953if (unlikely(INTEL_DEBUG & DEBUG_BT)) {954crocus_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);955}956957/* Apply the binding table indices. The backend compiler is not expected958* to change those, as we haven't set any of the *_start entries in brw959* binding_table.960*/961nir_builder b;962nir_builder_init(&b, impl);963964nir_foreach_block (block, impl) {965nir_foreach_instr (instr, block) {966if (instr->type == nir_instr_type_tex) {967nir_tex_instr *tex = nir_instr_as_tex(instr);968bool is_gather = devinfo->ver < 8 && tex->op == nir_texop_tg4;969970/* rewrite the tg4 component from green to blue before replacing the971texture index */972if (devinfo->verx10 == 70) {973if (tex->component == 1)974if (key->gather_channel_quirk_mask & (1 << tex->texture_index))975tex->component = 2;976}977978if (is_gather && devinfo->ver == 6 && key->gfx6_gather_wa[tex->texture_index]) {979b.cursor = nir_after_instr(instr);980enum gfx6_gather_sampler_wa wa = key->gfx6_gather_wa[tex->texture_index];981int width = (wa & WA_8BIT) ? 8 : 16;982983nir_ssa_def *val = nir_fmul_imm(&b, &tex->dest.ssa, (1 << width) - 1);984val = nir_f2u32(&b, val);985if (wa & WA_SIGN) {986val = nir_ishl(&b, val, nir_imm_int(&b, 32 - width));987val = nir_ishr(&b, val, nir_imm_int(&b, 32 - width));988}989nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, val, val->parent_instr);990}991992tex->texture_index =993crocus_group_index_to_bti(bt, is_gather ? CROCUS_SURFACE_GROUP_TEXTURE_GATHER : CROCUS_SURFACE_GROUP_TEXTURE,994tex->texture_index);995continue;996}997998if (instr->type != nir_instr_type_intrinsic)999continue;10001001nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);1002switch (intrin->intrinsic) {1003case nir_intrinsic_image_size:1004case nir_intrinsic_image_load:1005case nir_intrinsic_image_store:1006case nir_intrinsic_image_atomic_add:1007case nir_intrinsic_image_atomic_imin:1008case nir_intrinsic_image_atomic_umin:1009case nir_intrinsic_image_atomic_imax:1010case nir_intrinsic_image_atomic_umax:1011case nir_intrinsic_image_atomic_and:1012case nir_intrinsic_image_atomic_or:1013case nir_intrinsic_image_atomic_xor:1014case nir_intrinsic_image_atomic_exchange:1015case nir_intrinsic_image_atomic_comp_swap:1016case nir_intrinsic_image_load_raw_intel:1017case nir_intrinsic_image_store_raw_intel:1018rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],1019CROCUS_SURFACE_GROUP_IMAGE);1020break;10211022case nir_intrinsic_load_ubo:1023rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],1024CROCUS_SURFACE_GROUP_UBO);1025break;10261027case nir_intrinsic_store_ssbo:1028rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],1029CROCUS_SURFACE_GROUP_SSBO);1030break;10311032case nir_intrinsic_load_output:1033if (devinfo->ver >= 6) {1034rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],1035CROCUS_SURFACE_GROUP_RENDER_TARGET_READ);1036}1037break;10381039case nir_intrinsic_get_ssbo_size:1040case nir_intrinsic_ssbo_atomic_add:1041case nir_intrinsic_ssbo_atomic_imin:1042case nir_intrinsic_ssbo_atomic_umin:1043case nir_intrinsic_ssbo_atomic_imax:1044case nir_intrinsic_ssbo_atomic_umax:1045case nir_intrinsic_ssbo_atomic_and:1046case nir_intrinsic_ssbo_atomic_or:1047case nir_intrinsic_ssbo_atomic_xor:1048case nir_intrinsic_ssbo_atomic_exchange:1049case nir_intrinsic_ssbo_atomic_comp_swap:1050case nir_intrinsic_ssbo_atomic_fmin:1051case nir_intrinsic_ssbo_atomic_fmax:1052case nir_intrinsic_ssbo_atomic_fcomp_swap:1053case nir_intrinsic_load_ssbo:1054rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],1055CROCUS_SURFACE_GROUP_SSBO);1056break;10571058default:1059break;1060}1061}1062}1063}10641065static void1066crocus_debug_recompile(struct crocus_context *ice,1067struct shader_info *info,1068const struct brw_base_prog_key *key)1069{1070struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;1071const struct brw_compiler *c = screen->compiler;10721073if (!info)1074return;10751076c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",1077_mesa_shader_stage_to_string(info->stage),1078info->name ? info->name : "(no identifier)",1079info->label ? info->label : "");10801081const void *old_key =1082crocus_find_previous_compile(ice, info->stage, key->program_string_id);10831084brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);1085}10861087/**1088* Get the shader for the last enabled geometry stage.1089*1090* This stage is the one which will feed stream output and the rasterizer.1091*/1092static gl_shader_stage1093last_vue_stage(struct crocus_context *ice)1094{1095if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])1096return MESA_SHADER_GEOMETRY;10971098if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])1099return MESA_SHADER_TESS_EVAL;11001101return MESA_SHADER_VERTEX;1102}11031104static GLbitfield641105crocus_vs_outputs_written(struct crocus_context *ice,1106const struct brw_vs_prog_key *key,1107GLbitfield64 user_varyings)1108{1109struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1110const struct intel_device_info *devinfo = &screen->devinfo;1111GLbitfield64 outputs_written = user_varyings;11121113if (devinfo->ver < 6) {11141115if (key->copy_edgeflag)1116outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);11171118/* Put dummy slots into the VUE for the SF to put the replaced1119* point sprite coords in. We shouldn't need these dummy slots,1120* which take up precious URB space, but it would mean that the SF1121* doesn't get nice aligned pairs of input coords into output1122* coords, which would be a pain to handle.1123*/1124for (unsigned i = 0; i < 8; i++) {1125if (key->point_coord_replace & (1 << i))1126outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);1127}11281129/* if back colors are written, allocate slots for front colors too */1130if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))1131outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);1132if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))1133outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);1134}11351136/* In order for legacy clipping to work, we need to populate the clip1137* distance varying slots whenever clipping is enabled, even if the vertex1138* shader doesn't write to gl_ClipDistance.1139*/1140if (key->nr_userclip_plane_consts > 0) {1141outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);1142outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);1143}11441145return outputs_written;1146}11471148/*1149* If no edgeflags come from the user, gen4/51150* require giving the clip shader a default edgeflag.1151*1152* This will always be 1.0.1153*/1154static void1155crocus_lower_default_edgeflags(struct nir_shader *nir)1156{1157nir_function_impl *impl = nir_shader_get_entrypoint(nir);11581159nir_builder b;1160nir_builder_init(&b, impl);11611162b.cursor = nir_after_cf_list(&b.impl->body);1163nir_variable *var = nir_variable_create(nir, nir_var_shader_out,1164glsl_float_type(),1165"edgeflag");1166var->data.location = VARYING_SLOT_EDGE;1167nir_store_var(&b, var, nir_imm_float(&b, 1.0), 0x1);1168}11691170/**1171* Compile a vertex shader, and upload the assembly.1172*/1173static struct crocus_compiled_shader *1174crocus_compile_vs(struct crocus_context *ice,1175struct crocus_uncompiled_shader *ish,1176const struct brw_vs_prog_key *key)1177{1178struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1179const struct brw_compiler *compiler = screen->compiler;1180const struct intel_device_info *devinfo = &screen->devinfo;1181void *mem_ctx = ralloc_context(NULL);1182struct brw_vs_prog_data *vs_prog_data =1183rzalloc(mem_ctx, struct brw_vs_prog_data);1184struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;1185struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1186enum brw_param_builtin *system_values;1187unsigned num_system_values;1188unsigned num_cbufs;11891190nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);11911192if (key->nr_userclip_plane_consts) {1193nir_function_impl *impl = nir_shader_get_entrypoint(nir);1194nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true,1195false, NULL);1196nir_lower_io_to_temporaries(nir, impl, true, false);1197nir_lower_global_vars_to_local(nir);1198nir_lower_vars_to_ssa(nir);1199nir_shader_gather_info(nir, impl);1200}12011202prog_data->use_alt_mode = ish->use_alt_mode;12031204crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,1205&num_system_values, &num_cbufs);12061207crocus_lower_swizzles(nir, &key->base.tex);12081209if (devinfo->ver <= 5 &&1210!(nir->info.inputs_read & BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)))1211crocus_lower_default_edgeflags(nir);12121213struct crocus_binding_table bt;1214crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1215num_system_values, num_cbufs, &key->base.tex);12161217if (can_push_ubo(devinfo))1218brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);12191220uint64_t outputs_written =1221crocus_vs_outputs_written(ice, key, nir->info.outputs_written);1222brw_compute_vue_map(devinfo,1223&vue_prog_data->vue_map, outputs_written,1224nir->info.separate_shader, /* pos slots */ 1);12251226/* Don't tell the backend about our clip plane constants, we've already1227* lowered them in NIR and we don't want it doing it again.1228*/1229struct brw_vs_prog_key key_no_ucp = *key;1230key_no_ucp.nr_userclip_plane_consts = 0;1231key_no_ucp.copy_edgeflag = false;1232crocus_sanitize_tex_key(&key_no_ucp.base.tex);12331234struct brw_compile_vs_params params = {1235.nir = nir,1236.key = &key_no_ucp,1237.prog_data = vs_prog_data,1238.edgeflag_is_last = devinfo->ver < 6,1239.log_data = &ice->dbg,1240};1241const unsigned *program =1242brw_compile_vs(compiler, mem_ctx, ¶ms);1243if (program == NULL) {1244dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);1245ralloc_free(mem_ctx);1246return false;1247}12481249if (ish->compiled_once) {1250crocus_debug_recompile(ice, &nir->info, &key->base);1251} else {1252ish->compiled_once = true;1253}12541255uint32_t *so_decls = NULL;1256if (devinfo->ver > 6)1257so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,1258&vue_prog_data->vue_map);12591260struct crocus_compiled_shader *shader =1261crocus_upload_shader(ice, CROCUS_CACHE_VS, sizeof(*key), key, program,1262prog_data->program_size,1263prog_data, sizeof(*vs_prog_data), so_decls,1264system_values, num_system_values,1265num_cbufs, &bt);12661267crocus_disk_cache_store(screen->disk_cache, ish, shader,1268ice->shaders.cache_bo_map,1269key, sizeof(*key));12701271ralloc_free(mem_ctx);1272return shader;1273}12741275/**1276* Update the current vertex shader variant.1277*1278* Fill out the key, look in the cache, compile and bind if needed.1279*/1280static void1281crocus_update_compiled_vs(struct crocus_context *ice)1282{1283struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];1284struct crocus_uncompiled_shader *ish =1285ice->shaders.uncompiled[MESA_SHADER_VERTEX];1286struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1287const struct intel_device_info *devinfo = &screen->devinfo;1288struct brw_vs_prog_key key = { KEY_INIT() };12891290if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))1291crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_VERTEX, ish,1292ish->nir->info.uses_texture_gather, &key.base.tex);1293screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);12941295struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_VS];1296struct crocus_compiled_shader *shader =1297crocus_find_cached_shader(ice, CROCUS_CACHE_VS, sizeof(key), &key);12981299if (!shader)1300shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));13011302if (!shader)1303shader = crocus_compile_vs(ice, ish, &key);13041305if (old != shader) {1306ice->shaders.prog[CROCUS_CACHE_VS] = shader;1307if (devinfo->ver == 8)1308ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;1309ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS |1310CROCUS_STAGE_DIRTY_BINDINGS_VS |1311CROCUS_STAGE_DIRTY_CONSTANTS_VS;1312shs->sysvals_need_upload = true;13131314const struct brw_vs_prog_data *vs_prog_data =1315(void *) shader->prog_data;1316const bool uses_draw_params = vs_prog_data->uses_firstvertex ||1317vs_prog_data->uses_baseinstance;1318const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||1319vs_prog_data->uses_is_indexed_draw;1320const bool needs_sgvs_element = uses_draw_params ||1321vs_prog_data->uses_instanceid ||1322vs_prog_data->uses_vertexid;13231324if (ice->state.vs_uses_draw_params != uses_draw_params ||1325ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||1326ice->state.vs_needs_edge_flag != ish->needs_edge_flag ||1327ice->state.vs_uses_vertexid != vs_prog_data->uses_vertexid ||1328ice->state.vs_uses_instanceid != vs_prog_data->uses_instanceid) {1329ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |1330CROCUS_DIRTY_VERTEX_ELEMENTS;1331}1332ice->state.vs_uses_draw_params = uses_draw_params;1333ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;1334ice->state.vs_needs_sgvs_element = needs_sgvs_element;1335ice->state.vs_needs_edge_flag = ish->needs_edge_flag;1336ice->state.vs_uses_vertexid = vs_prog_data->uses_vertexid;1337ice->state.vs_uses_instanceid = vs_prog_data->uses_instanceid;1338}1339}13401341/**1342* Get the shader_info for a given stage, or NULL if the stage is disabled.1343*/1344const struct shader_info *1345crocus_get_shader_info(const struct crocus_context *ice, gl_shader_stage stage)1346{1347const struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[stage];13481349if (!ish)1350return NULL;13511352const nir_shader *nir = ish->nir;1353return &nir->info;1354}13551356/**1357* Get the union of TCS output and TES input slots.1358*1359* TCS and TES need to agree on a common URB entry layout. In particular,1360* the data for all patch vertices is stored in a single URB entry (unlike1361* GS which has one entry per input vertex). This means that per-vertex1362* array indexing needs a stride.1363*1364* SSO requires locations to match, but doesn't require the number of1365* outputs/inputs to match (in fact, the TCS often has extra outputs).1366* So, we need to take the extra step of unifying these on the fly.1367*/1368static void1369get_unified_tess_slots(const struct crocus_context *ice,1370uint64_t *per_vertex_slots,1371uint32_t *per_patch_slots)1372{1373const struct shader_info *tcs =1374crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);1375const struct shader_info *tes =1376crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL);13771378*per_vertex_slots = tes->inputs_read;1379*per_patch_slots = tes->patch_inputs_read;13801381if (tcs) {1382*per_vertex_slots |= tcs->outputs_written;1383*per_patch_slots |= tcs->patch_outputs_written;1384}1385}13861387/**1388* Compile a tessellation control shader, and upload the assembly.1389*/1390static struct crocus_compiled_shader *1391crocus_compile_tcs(struct crocus_context *ice,1392struct crocus_uncompiled_shader *ish,1393const struct brw_tcs_prog_key *key)1394{1395struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1396const struct brw_compiler *compiler = screen->compiler;1397const struct nir_shader_compiler_options *options =1398compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;1399void *mem_ctx = ralloc_context(NULL);1400struct brw_tcs_prog_data *tcs_prog_data =1401rzalloc(mem_ctx, struct brw_tcs_prog_data);1402struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;1403struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1404const struct intel_device_info *devinfo = &screen->devinfo;1405enum brw_param_builtin *system_values = NULL;1406unsigned num_system_values = 0;1407unsigned num_cbufs = 0;14081409nir_shader *nir;14101411struct crocus_binding_table bt;14121413if (ish) {1414nir = nir_shader_clone(mem_ctx, ish->nir);14151416crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,1417&num_system_values, &num_cbufs);14181419crocus_lower_swizzles(nir, &key->base.tex);1420crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1421num_system_values, num_cbufs, &key->base.tex);1422if (can_push_ubo(devinfo))1423brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);1424} else {1425nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);14261427/* Reserve space for passing the default tess levels as constants. */1428num_cbufs = 1;1429num_system_values = 8;1430system_values =1431rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);1432prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);1433prog_data->nr_params = num_system_values;14341435if (key->tes_primitive_mode == GL_QUADS) {1436for (int i = 0; i < 4; i++)1437system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;14381439system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;1440system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;1441} else if (key->tes_primitive_mode == GL_TRIANGLES) {1442for (int i = 0; i < 3; i++)1443system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;14441445system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;1446} else {1447assert(key->tes_primitive_mode == GL_ISOLINES);1448system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;1449system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;1450}14511452/* Manually setup the TCS binding table. */1453memset(&bt, 0, sizeof(bt));1454bt.sizes[CROCUS_SURFACE_GROUP_UBO] = 1;1455bt.used_mask[CROCUS_SURFACE_GROUP_UBO] = 1;1456bt.size_bytes = 4;14571458prog_data->ubo_ranges[0].length = 1;1459}14601461struct brw_tcs_prog_key key_clean = *key;1462crocus_sanitize_tex_key(&key_clean.base.tex);1463char *error_str = NULL;1464const unsigned *program =1465brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &key_clean, tcs_prog_data, nir,1466-1, NULL, &error_str);1467if (program == NULL) {1468dbg_printf("Failed to compile control shader: %s\n", error_str);1469ralloc_free(mem_ctx);1470return false;1471}14721473if (ish) {1474if (ish->compiled_once) {1475crocus_debug_recompile(ice, &nir->info, &key->base);1476} else {1477ish->compiled_once = true;1478}1479}14801481struct crocus_compiled_shader *shader =1482crocus_upload_shader(ice, CROCUS_CACHE_TCS, sizeof(*key), key, program,1483prog_data->program_size,1484prog_data, sizeof(*tcs_prog_data), NULL,1485system_values, num_system_values,1486num_cbufs, &bt);14871488if (ish)1489crocus_disk_cache_store(screen->disk_cache, ish, shader,1490ice->shaders.cache_bo_map,1491key, sizeof(*key));14921493ralloc_free(mem_ctx);1494return shader;1495}14961497/**1498* Update the current tessellation control shader variant.1499*1500* Fill out the key, look in the cache, compile and bind if needed.1501*/1502static void1503crocus_update_compiled_tcs(struct crocus_context *ice)1504{1505struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];1506struct crocus_uncompiled_shader *tcs =1507ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];1508struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1509const struct intel_device_info *devinfo = &screen->devinfo;15101511const struct shader_info *tes_info =1512crocus_get_shader_info(ice, MESA_SHADER_TESS_EVAL);1513struct brw_tcs_prog_key key = {1514KEY_INIT_NO_ID(),1515.base.program_string_id = tcs ? tcs->program_id : 0,1516.tes_primitive_mode = tes_info->tess.primitive_mode,1517.input_vertices = ice->state.vertices_per_patch,1518.quads_workaround = tes_info->tess.primitive_mode == GL_QUADS &&1519tes_info->tess.spacing == TESS_SPACING_EQUAL,1520};15211522if (tcs && tcs->nos & (1ull << CROCUS_NOS_TEXTURES))1523crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_CTRL, tcs,1524tcs->nir->info.uses_texture_gather, &key.base.tex);1525get_unified_tess_slots(ice, &key.outputs_written,1526&key.patch_outputs_written);1527screen->vtbl.populate_tcs_key(ice, &key);15281529struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TCS];1530struct crocus_compiled_shader *shader =1531crocus_find_cached_shader(ice, CROCUS_CACHE_TCS, sizeof(key), &key);15321533if (tcs && !shader)1534shader = crocus_disk_cache_retrieve(ice, tcs, &key, sizeof(key));15351536if (!shader)1537shader = crocus_compile_tcs(ice, tcs, &key);15381539if (old != shader) {1540ice->shaders.prog[CROCUS_CACHE_TCS] = shader;1541ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TCS |1542CROCUS_STAGE_DIRTY_BINDINGS_TCS |1543CROCUS_STAGE_DIRTY_CONSTANTS_TCS;1544shs->sysvals_need_upload = true;1545}1546}15471548/**1549* Compile a tessellation evaluation shader, and upload the assembly.1550*/1551static struct crocus_compiled_shader *1552crocus_compile_tes(struct crocus_context *ice,1553struct crocus_uncompiled_shader *ish,1554const struct brw_tes_prog_key *key)1555{1556struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1557const struct brw_compiler *compiler = screen->compiler;1558void *mem_ctx = ralloc_context(NULL);1559struct brw_tes_prog_data *tes_prog_data =1560rzalloc(mem_ctx, struct brw_tes_prog_data);1561struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;1562struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1563enum brw_param_builtin *system_values;1564const struct intel_device_info *devinfo = &screen->devinfo;1565unsigned num_system_values;1566unsigned num_cbufs;15671568nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);15691570if (key->nr_userclip_plane_consts) {1571nir_function_impl *impl = nir_shader_get_entrypoint(nir);1572nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true,1573false, NULL);1574nir_lower_io_to_temporaries(nir, impl, true, false);1575nir_lower_global_vars_to_local(nir);1576nir_lower_vars_to_ssa(nir);1577nir_shader_gather_info(nir, impl);1578}15791580crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,1581&num_system_values, &num_cbufs);1582crocus_lower_swizzles(nir, &key->base.tex);1583struct crocus_binding_table bt;1584crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1585num_system_values, num_cbufs, &key->base.tex);15861587if (can_push_ubo(devinfo))1588brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);15891590struct brw_vue_map input_vue_map;1591brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,1592key->patch_inputs_read);15931594struct brw_tes_prog_key key_clean = *key;1595crocus_sanitize_tex_key(&key_clean.base.tex);1596char *error_str = NULL;1597const unsigned *program =1598brw_compile_tes(compiler, &ice->dbg, mem_ctx, &key_clean, &input_vue_map,1599tes_prog_data, nir, -1, NULL, &error_str);1600if (program == NULL) {1601dbg_printf("Failed to compile evaluation shader: %s\n", error_str);1602ralloc_free(mem_ctx);1603return false;1604}16051606if (ish->compiled_once) {1607crocus_debug_recompile(ice, &nir->info, &key->base);1608} else {1609ish->compiled_once = true;1610}16111612uint32_t *so_decls = NULL;1613if (devinfo->ver > 6)1614so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,1615&vue_prog_data->vue_map);16161617struct crocus_compiled_shader *shader =1618crocus_upload_shader(ice, CROCUS_CACHE_TES, sizeof(*key), key, program,1619prog_data->program_size,1620prog_data, sizeof(*tes_prog_data), so_decls,1621system_values, num_system_values,1622num_cbufs, &bt);16231624crocus_disk_cache_store(screen->disk_cache, ish, shader,1625ice->shaders.cache_bo_map,1626key, sizeof(*key));16271628ralloc_free(mem_ctx);1629return shader;1630}16311632/**1633* Update the current tessellation evaluation shader variant.1634*1635* Fill out the key, look in the cache, compile and bind if needed.1636*/1637static void1638crocus_update_compiled_tes(struct crocus_context *ice)1639{1640struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];1641struct crocus_uncompiled_shader *ish =1642ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];1643struct brw_tes_prog_key key = { KEY_INIT() };1644struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1645const struct intel_device_info *devinfo = &screen->devinfo;16461647if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))1648crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_TESS_EVAL, ish,1649ish->nir->info.uses_texture_gather, &key.base.tex);1650get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);1651screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);16521653struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_TES];1654struct crocus_compiled_shader *shader =1655crocus_find_cached_shader(ice, CROCUS_CACHE_TES, sizeof(key), &key);16561657if (!shader)1658shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));16591660if (!shader)1661shader = crocus_compile_tes(ice, ish, &key);16621663if (old != shader) {1664ice->shaders.prog[CROCUS_CACHE_TES] = shader;1665ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_TES |1666CROCUS_STAGE_DIRTY_BINDINGS_TES |1667CROCUS_STAGE_DIRTY_CONSTANTS_TES;1668shs->sysvals_need_upload = true;1669}16701671/* TODO: Could compare and avoid flagging this. */1672const struct shader_info *tes_info = &ish->nir->info;1673if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {1674ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TES;1675ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;1676}1677}16781679/**1680* Compile a geometry shader, and upload the assembly.1681*/1682static struct crocus_compiled_shader *1683crocus_compile_gs(struct crocus_context *ice,1684struct crocus_uncompiled_shader *ish,1685const struct brw_gs_prog_key *key)1686{1687struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1688const struct brw_compiler *compiler = screen->compiler;1689const struct intel_device_info *devinfo = &screen->devinfo;1690void *mem_ctx = ralloc_context(NULL);1691struct brw_gs_prog_data *gs_prog_data =1692rzalloc(mem_ctx, struct brw_gs_prog_data);1693struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;1694struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1695enum brw_param_builtin *system_values;1696unsigned num_system_values;1697unsigned num_cbufs;16981699nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);17001701if (key->nr_userclip_plane_consts) {1702nir_function_impl *impl = nir_shader_get_entrypoint(nir);1703nir_lower_clip_gs(nir, (1 << key->nr_userclip_plane_consts) - 1, false,1704NULL);1705nir_lower_io_to_temporaries(nir, impl, true, false);1706nir_lower_global_vars_to_local(nir);1707nir_lower_vars_to_ssa(nir);1708nir_shader_gather_info(nir, impl);1709}17101711crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,1712&num_system_values, &num_cbufs);1713crocus_lower_swizzles(nir, &key->base.tex);1714struct crocus_binding_table bt;1715crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1716num_system_values, num_cbufs, &key->base.tex);17171718if (can_push_ubo(devinfo))1719brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);17201721brw_compute_vue_map(devinfo,1722&vue_prog_data->vue_map, nir->info.outputs_written,1723nir->info.separate_shader, /* pos slots */ 1);17241725if (devinfo->ver == 6)1726gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data);1727struct brw_gs_prog_key key_clean = *key;1728crocus_sanitize_tex_key(&key_clean.base.tex);17291730char *error_str = NULL;1731const unsigned *program =1732brw_compile_gs(compiler, &ice->dbg, mem_ctx, &key_clean, gs_prog_data, nir,1733-1, NULL, &error_str);1734if (program == NULL) {1735dbg_printf("Failed to compile geometry shader: %s\n", error_str);1736ralloc_free(mem_ctx);1737return false;1738}17391740if (ish->compiled_once) {1741crocus_debug_recompile(ice, &nir->info, &key->base);1742} else {1743ish->compiled_once = true;1744}17451746uint32_t *so_decls = NULL;1747if (devinfo->ver > 6)1748so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,1749&vue_prog_data->vue_map);17501751struct crocus_compiled_shader *shader =1752crocus_upload_shader(ice, CROCUS_CACHE_GS, sizeof(*key), key, program,1753prog_data->program_size,1754prog_data, sizeof(*gs_prog_data), so_decls,1755system_values, num_system_values,1756num_cbufs, &bt);17571758crocus_disk_cache_store(screen->disk_cache, ish, shader,1759ice->shaders.cache_bo_map,1760key, sizeof(*key));17611762ralloc_free(mem_ctx);1763return shader;1764}17651766/**1767* Update the current geometry shader variant.1768*1769* Fill out the key, look in the cache, compile and bind if needed.1770*/1771static void1772crocus_update_compiled_gs(struct crocus_context *ice)1773{1774struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];1775struct crocus_uncompiled_shader *ish =1776ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];1777struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_GS];1778struct crocus_compiled_shader *shader = NULL;17791780if (ish) {1781struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1782const struct intel_device_info *devinfo = &screen->devinfo;1783struct brw_gs_prog_key key = { KEY_INIT() };17841785if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))1786crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_GEOMETRY, ish,1787ish->nir->info.uses_texture_gather, &key.base.tex);1788screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);17891790shader =1791crocus_find_cached_shader(ice, CROCUS_CACHE_GS, sizeof(key), &key);17921793if (!shader)1794shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));17951796if (!shader)1797shader = crocus_compile_gs(ice, ish, &key);1798}17991800if (old != shader) {1801ice->shaders.prog[CROCUS_CACHE_GS] = shader;1802ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |1803CROCUS_STAGE_DIRTY_BINDINGS_GS |1804CROCUS_STAGE_DIRTY_CONSTANTS_GS;1805shs->sysvals_need_upload = true;1806}1807}18081809/**1810* Compile a fragment (pixel) shader, and upload the assembly.1811*/1812static struct crocus_compiled_shader *1813crocus_compile_fs(struct crocus_context *ice,1814struct crocus_uncompiled_shader *ish,1815const struct brw_wm_prog_key *key,1816struct brw_vue_map *vue_map)1817{1818struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1819const struct brw_compiler *compiler = screen->compiler;1820void *mem_ctx = ralloc_context(NULL);1821struct brw_wm_prog_data *fs_prog_data =1822rzalloc(mem_ctx, struct brw_wm_prog_data);1823struct brw_stage_prog_data *prog_data = &fs_prog_data->base;1824enum brw_param_builtin *system_values;1825const struct intel_device_info *devinfo = &screen->devinfo;1826unsigned num_system_values;1827unsigned num_cbufs;18281829nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);18301831prog_data->use_alt_mode = ish->use_alt_mode;18321833crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,1834&num_system_values, &num_cbufs);18351836/* Lower output variables to load_output intrinsics before setting up1837* binding tables, so crocus_setup_binding_table can map any load_output1838* intrinsics to CROCUS_SURFACE_GROUP_RENDER_TARGET_READ on Gen8 for1839* non-coherent framebuffer fetches.1840*/1841brw_nir_lower_fs_outputs(nir);18421843/* lower swizzles before binding table */1844crocus_lower_swizzles(nir, &key->base.tex);1845int null_rts = 1;18461847struct crocus_binding_table bt;1848crocus_setup_binding_table(devinfo, nir, &bt,1849MAX2(key->nr_color_regions, null_rts),1850num_system_values, num_cbufs,1851&key->base.tex);18521853if (can_push_ubo(devinfo))1854brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);18551856struct brw_wm_prog_key key_clean = *key;1857crocus_sanitize_tex_key(&key_clean.base.tex);18581859struct brw_compile_fs_params params = {1860.nir = nir,1861.key = &key_clean,1862.prog_data = fs_prog_data,18631864.allow_spilling = true,1865.vue_map = vue_map,18661867.log_data = &ice->dbg,1868};1869const unsigned *program =1870brw_compile_fs(compiler, mem_ctx, ¶ms);1871if (program == NULL) {1872dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);1873ralloc_free(mem_ctx);1874return false;1875}18761877if (ish->compiled_once) {1878crocus_debug_recompile(ice, &nir->info, &key->base);1879} else {1880ish->compiled_once = true;1881}18821883struct crocus_compiled_shader *shader =1884crocus_upload_shader(ice, CROCUS_CACHE_FS, sizeof(*key), key, program,1885prog_data->program_size,1886prog_data, sizeof(*fs_prog_data), NULL,1887system_values, num_system_values,1888num_cbufs, &bt);18891890crocus_disk_cache_store(screen->disk_cache, ish, shader,1891ice->shaders.cache_bo_map,1892key, sizeof(*key));18931894ralloc_free(mem_ctx);1895return shader;1896}18971898/**1899* Update the current fragment shader variant.1900*1901* Fill out the key, look in the cache, compile and bind if needed.1902*/1903static void1904crocus_update_compiled_fs(struct crocus_context *ice)1905{1906struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1907const struct intel_device_info *devinfo = &screen->devinfo;1908struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];1909struct crocus_uncompiled_shader *ish =1910ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];1911struct brw_wm_prog_key key = { KEY_INIT() };19121913if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))1914crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_FRAGMENT, ish,1915ish->nir->info.uses_texture_gather, &key.base.tex);1916screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);19171918if (ish->nos & (1ull << CROCUS_NOS_LAST_VUE_MAP))1919key.input_slots_valid = ice->shaders.last_vue_map->slots_valid;19201921struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_FS];1922struct crocus_compiled_shader *shader =1923crocus_find_cached_shader(ice, CROCUS_CACHE_FS, sizeof(key), &key);19241925if (!shader)1926shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));19271928if (!shader)1929shader = crocus_compile_fs(ice, ish, &key, ice->shaders.last_vue_map);19301931if (old != shader) {1932// XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE1933// toggles. might be able to avoid flagging SBE too.1934ice->shaders.prog[CROCUS_CACHE_FS] = shader;1935ice->state.dirty |= CROCUS_DIRTY_WM;1936/* gen4 clip/sf rely on fs prog_data */1937if (devinfo->ver < 6)1938ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;1939else1940ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_GEN6_BLEND_STATE;1941if (devinfo->ver == 6)1942ice->state.dirty |= CROCUS_DIRTY_RASTER;1943if (devinfo->ver >= 7)1944ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;1945ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS |1946CROCUS_STAGE_DIRTY_BINDINGS_FS |1947CROCUS_STAGE_DIRTY_CONSTANTS_FS;1948shs->sysvals_need_upload = true;1949}1950}19511952/**1953* Update the last enabled stage's VUE map.1954*1955* When the shader feeding the rasterizer's output interface changes, we1956* need to re-emit various packets.1957*/1958static void1959update_last_vue_map(struct crocus_context *ice,1960struct brw_stage_prog_data *prog_data)1961{1962struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;1963const struct intel_device_info *devinfo = &screen->devinfo;1964struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;1965struct brw_vue_map *vue_map = &vue_prog_data->vue_map;1966struct brw_vue_map *old_map = ice->shaders.last_vue_map;1967const uint64_t changed_slots =1968(old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;19691970if (changed_slots & VARYING_BIT_VIEWPORT) {1971ice->state.num_viewports =1972(vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? CROCUS_MAX_VIEWPORTS : 1;1973ice->state.dirty |= CROCUS_DIRTY_SF_CL_VIEWPORT |1974CROCUS_DIRTY_CC_VIEWPORT;1975if (devinfo->ver < 6)1976ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;19771978if (devinfo->ver <= 6)1979ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;19801981if (devinfo->ver >= 6)1982ice->state.dirty |= CROCUS_DIRTY_CLIP |1983CROCUS_DIRTY_GEN6_SCISSOR_RECT;;1984ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS |1985ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP];1986}19871988if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {1989ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;1990if (devinfo->ver < 6)1991ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;1992ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_FS;1993}19941995ice->shaders.last_vue_map = &vue_prog_data->vue_map;1996}19971998static void1999crocus_update_pull_constant_descriptors(struct crocus_context *ice,2000gl_shader_stage stage)2001{2002struct crocus_compiled_shader *shader = ice->shaders.prog[stage];20032004if (!shader || !shader->prog_data->has_ubo_pull)2005return;20062007struct crocus_shader_state *shs = &ice->state.shaders[stage];2008bool any_new_descriptors =2009shader->num_system_values > 0 && shs->sysvals_need_upload;20102011unsigned bound_cbufs = shs->bound_cbufs;20122013while (bound_cbufs) {2014const int i = u_bit_scan(&bound_cbufs);2015struct pipe_constant_buffer *cbuf = &shs->constbufs[i];2016if (cbuf->buffer) {2017any_new_descriptors = true;2018}2019}20202021if (any_new_descriptors)2022ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_VS << stage;2023}20242025/**2026* Get the prog_data for a given stage, or NULL if the stage is disabled.2027*/2028static struct brw_vue_prog_data *2029get_vue_prog_data(struct crocus_context *ice, gl_shader_stage stage)2030{2031if (!ice->shaders.prog[stage])2032return NULL;20332034return (void *) ice->shaders.prog[stage]->prog_data;2035}20362037static struct crocus_compiled_shader *2038crocus_compile_clip(struct crocus_context *ice, struct brw_clip_prog_key *key)2039{2040struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2041const struct brw_compiler *compiler = screen->compiler;2042void *mem_ctx;2043unsigned program_size;2044mem_ctx = ralloc_context(NULL);20452046struct brw_clip_prog_data *clip_prog_data =2047rzalloc(mem_ctx, struct brw_clip_prog_data);20482049const unsigned *program = brw_compile_clip(compiler, mem_ctx, key, clip_prog_data,2050ice->shaders.last_vue_map, &program_size);20512052if (program == NULL) {2053dbg_printf("failed to compile clip shader\n");2054ralloc_free(mem_ctx);2055return false;2056}2057struct crocus_binding_table bt;2058memset(&bt, 0, sizeof(bt));20592060struct crocus_compiled_shader *shader =2061crocus_upload_shader(ice, CROCUS_CACHE_CLIP, sizeof(*key), key, program,2062program_size,2063(struct brw_stage_prog_data *)clip_prog_data, sizeof(*clip_prog_data),2064NULL, NULL, 0, 0, &bt);2065ralloc_free(mem_ctx);2066return shader;2067}2068static void2069crocus_update_compiled_clip(struct crocus_context *ice)2070{2071struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2072struct brw_clip_prog_key key;2073struct crocus_compiled_shader *old = ice->shaders.clip_prog;2074memset(&key, 0, sizeof(key));20752076const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);2077if (wm_prog_data) {2078key.contains_flat_varying = wm_prog_data->contains_flat_varying;2079key.contains_noperspective_varying =2080wm_prog_data->contains_noperspective_varying;2081memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode));2082}20832084key.primitive = u_reduced_prim(ice->state.prim_mode);2085key.attrs = ice->shaders.last_vue_map->slots_valid;20862087struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);2088key.pv_first = rs_state->flatshade_first;20892090if (rs_state->clip_plane_enable)2091key.nr_userclip = util_logbase2(rs_state->clip_plane_enable) + 1;20922093if (screen->devinfo.ver == 5)2094key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;2095else2096key.clip_mode = BRW_CLIP_MODE_NORMAL;20972098if (key.primitive == PIPE_PRIM_TRIANGLES) {2099if (rs_state->cull_face == PIPE_FACE_FRONT_AND_BACK)2100key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;2101else {2102uint32_t fill_front = BRW_CLIP_FILL_MODE_CULL;2103uint32_t fill_back = BRW_CLIP_FILL_MODE_CULL;2104uint32_t offset_front = 0;2105uint32_t offset_back = 0;21062107if (!(rs_state->cull_face & PIPE_FACE_FRONT)) {2108switch (rs_state->fill_front) {2109case PIPE_POLYGON_MODE_FILL:2110fill_front = BRW_CLIP_FILL_MODE_FILL;2111offset_front = 0;2112break;2113case PIPE_POLYGON_MODE_LINE:2114fill_front = BRW_CLIP_FILL_MODE_LINE;2115offset_front = rs_state->offset_line;2116break;2117case PIPE_POLYGON_MODE_POINT:2118fill_front = BRW_CLIP_FILL_MODE_POINT;2119offset_front = rs_state->offset_point;2120break;2121}2122}21232124if (!(rs_state->cull_face & PIPE_FACE_BACK)) {2125switch (rs_state->fill_back) {2126case PIPE_POLYGON_MODE_FILL:2127fill_back = BRW_CLIP_FILL_MODE_FILL;2128offset_back = 0;2129break;2130case PIPE_POLYGON_MODE_LINE:2131fill_back = BRW_CLIP_FILL_MODE_LINE;2132offset_back = rs_state->offset_line;2133break;2134case PIPE_POLYGON_MODE_POINT:2135fill_back = BRW_CLIP_FILL_MODE_POINT;2136offset_back = rs_state->offset_point;2137break;2138}2139}21402141if (rs_state->fill_back != PIPE_POLYGON_MODE_FILL ||2142rs_state->fill_front != PIPE_POLYGON_MODE_FILL) {2143key.do_unfilled = 1;21442145/* Most cases the fixed function units will handle. Cases where2146* one or more polygon faces are unfilled will require help:2147*/2148key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;21492150if (offset_back || offset_front) {2151double mrd = 0.0;2152if (ice->state.framebuffer.zsbuf)2153mrd = util_get_depth_format_mrd(util_format_description(ice->state.framebuffer.zsbuf->format));2154key.offset_units = rs_state->offset_units * mrd * 2;2155key.offset_factor = rs_state->offset_scale * mrd;2156key.offset_clamp = rs_state->offset_clamp * mrd;2157}21582159if (!(rs_state->front_ccw ^ rs_state->bottom_edge_rule)) {2160key.fill_ccw = fill_front;2161key.fill_cw = fill_back;2162key.offset_ccw = offset_front;2163key.offset_cw = offset_back;2164if (rs_state->light_twoside &&2165key.fill_cw != BRW_CLIP_FILL_MODE_CULL)2166key.copy_bfc_cw = 1;2167} else {2168key.fill_cw = fill_front;2169key.fill_ccw = fill_back;2170key.offset_cw = offset_front;2171key.offset_ccw = offset_back;2172if (rs_state->light_twoside &&2173key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)2174key.copy_bfc_ccw = 1;2175}2176}2177}2178}2179struct crocus_compiled_shader *shader =2180crocus_find_cached_shader(ice, CROCUS_CACHE_CLIP, sizeof(key), &key);21812182if (!shader)2183shader = crocus_compile_clip(ice, &key);21842185if (old != shader) {2186ice->state.dirty |= CROCUS_DIRTY_CLIP;2187ice->shaders.clip_prog = shader;2188}2189}21902191static struct crocus_compiled_shader *2192crocus_compile_sf(struct crocus_context *ice, struct brw_sf_prog_key *key)2193{2194struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2195const struct brw_compiler *compiler = screen->compiler;2196void *mem_ctx;2197unsigned program_size;2198mem_ctx = ralloc_context(NULL);21992200struct brw_sf_prog_data *sf_prog_data =2201rzalloc(mem_ctx, struct brw_sf_prog_data);22022203const unsigned *program = brw_compile_sf(compiler, mem_ctx, key, sf_prog_data,2204ice->shaders.last_vue_map, &program_size);22052206if (program == NULL) {2207dbg_printf("failed to compile sf shader\n");2208ralloc_free(mem_ctx);2209return false;2210}22112212struct crocus_binding_table bt;2213memset(&bt, 0, sizeof(bt));2214struct crocus_compiled_shader *shader =2215crocus_upload_shader(ice, CROCUS_CACHE_SF, sizeof(*key), key, program,2216program_size,2217(struct brw_stage_prog_data *)sf_prog_data, sizeof(*sf_prog_data),2218NULL, NULL, 0, 0, &bt);2219ralloc_free(mem_ctx);2220return shader;2221}22222223static void2224crocus_update_compiled_sf(struct crocus_context *ice)2225{2226struct brw_sf_prog_key key;2227struct crocus_compiled_shader *old = ice->shaders.sf_prog;2228memset(&key, 0, sizeof(key));22292230key.attrs = ice->shaders.last_vue_map->slots_valid;22312232switch (u_reduced_prim(ice->state.prim_mode)) {2233case GL_TRIANGLES:2234default:2235if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))2236key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;2237else2238key.primitive = BRW_SF_PRIM_TRIANGLES;2239break;2240case GL_LINES:2241key.primitive = BRW_SF_PRIM_LINES;2242break;2243case GL_POINTS:2244key.primitive = BRW_SF_PRIM_POINTS;2245break;2246}22472248struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);2249key.userclip_active = rs_state->clip_plane_enable != 0;2250const struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data);2251if (wm_prog_data) {2252key.contains_flat_varying = wm_prog_data->contains_flat_varying;2253memcpy(key.interp_mode, wm_prog_data->interp_mode, sizeof(key.interp_mode));2254}22552256key.do_twoside_color = rs_state->light_twoside;22572258key.do_point_sprite = rs_state->point_quad_rasterization;2259if (key.do_point_sprite) {2260key.point_sprite_coord_replace = rs_state->sprite_coord_enable & 0xff;2261if (rs_state->sprite_coord_enable & (1 << 8))2262key.do_point_coord = 1;2263if (wm_prog_data && wm_prog_data->urb_setup[VARYING_SLOT_PNTC] != -1)2264key.do_point_coord = 1;2265}22662267key.sprite_origin_lower_left = rs_state->sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT;22682269if (key.do_twoside_color) {2270key.frontface_ccw = rs_state->front_ccw;2271}2272struct crocus_compiled_shader *shader =2273crocus_find_cached_shader(ice, CROCUS_CACHE_SF, sizeof(key), &key);22742275if (!shader)2276shader = crocus_compile_sf(ice, &key);22772278if (old != shader) {2279ice->state.dirty |= CROCUS_DIRTY_RASTER;2280ice->shaders.sf_prog = shader;2281}2282}22832284static struct crocus_compiled_shader *2285crocus_compile_ff_gs(struct crocus_context *ice, struct brw_ff_gs_prog_key *key)2286{2287struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2288struct brw_compiler *compiler = screen->compiler;2289void *mem_ctx;2290unsigned program_size;2291mem_ctx = ralloc_context(NULL);22922293struct brw_ff_gs_prog_data *ff_gs_prog_data =2294rzalloc(mem_ctx, struct brw_ff_gs_prog_data);22952296const unsigned *program = brw_compile_ff_gs_prog(compiler, mem_ctx, key, ff_gs_prog_data,2297ice->shaders.last_vue_map, &program_size);22982299if (program == NULL) {2300dbg_printf("failed to compile sf shader\n");2301ralloc_free(mem_ctx);2302return false;2303}23042305struct crocus_binding_table bt;2306memset(&bt, 0, sizeof(bt));23072308if (screen->devinfo.ver == 6) {2309bt.sizes[CROCUS_SURFACE_GROUP_SOL] = BRW_MAX_SOL_BINDINGS;2310bt.used_mask[CROCUS_SURFACE_GROUP_SOL] = (uint64_t)-1;23112312bt.size_bytes = BRW_MAX_SOL_BINDINGS * 4;2313}23142315struct crocus_compiled_shader *shader =2316crocus_upload_shader(ice, CROCUS_CACHE_FF_GS, sizeof(*key), key, program,2317program_size,2318(struct brw_stage_prog_data *)ff_gs_prog_data, sizeof(*ff_gs_prog_data),2319NULL, NULL, 0, 0, &bt);2320ralloc_free(mem_ctx);2321return shader;2322}23232324static void2325crocus_update_compiled_ff_gs(struct crocus_context *ice)2326{2327struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2328const struct intel_device_info *devinfo = &screen->devinfo;2329struct brw_ff_gs_prog_key key;2330struct crocus_compiled_shader *old = ice->shaders.ff_gs_prog;2331memset(&key, 0, sizeof(key));23322333assert(devinfo->ver < 7);23342335key.attrs = ice->shaders.last_vue_map->slots_valid;23362337key.primitive = screen->vtbl.translate_prim_type(ice->state.prim_mode, 0);23382339struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);2340key.pv_first = rs_state->flatshade_first;23412342if (key.primitive == _3DPRIM_QUADLIST && !rs_state->flatshade) {2343/* Provide consistenbbbbbt primitive order with brw_set_prim's2344* optimization of single quads to trifans.2345*/2346key.pv_first = true;2347}23482349if (devinfo->ver >= 6) {2350key.need_gs_prog = ice->state.streamout_active;2351if (key.need_gs_prog) {2352struct crocus_uncompiled_shader *vs =2353ice->shaders.uncompiled[MESA_SHADER_VERTEX];2354gfx6_ff_gs_xfb_setup(&vs->stream_output,2355&key);2356}2357} else {2358key.need_gs_prog = (key.primitive == _3DPRIM_QUADLIST ||2359key.primitive == _3DPRIM_QUADSTRIP ||2360key.primitive == _3DPRIM_LINELOOP);2361}23622363struct crocus_compiled_shader *shader = NULL;2364if (key.need_gs_prog) {2365shader = crocus_find_cached_shader(ice, CROCUS_CACHE_FF_GS,2366sizeof(key), &key);2367if (!shader)2368shader = crocus_compile_ff_gs(ice, &key);2369}2370if (old != shader) {2371ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS;2372if (!!old != !!shader)2373ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;2374ice->shaders.ff_gs_prog = shader;2375if (shader) {2376const struct brw_ff_gs_prog_data *gs_prog_data = (struct brw_ff_gs_prog_data *)ice->shaders.ff_gs_prog->prog_data;2377ice->state.last_xfb_verts_per_prim = gs_prog_data->svbi_postincrement_value;2378}2379}2380}23812382// XXX: crocus_compiled_shaders are space-leaking :(2383// XXX: do remember to unbind them if deleting them.23842385/**2386* Update the current shader variants for the given state.2387*2388* This should be called on every draw call to ensure that the correct2389* shaders are bound. It will also flag any dirty state triggered by2390* swapping out those shaders.2391*/2392bool2393crocus_update_compiled_shaders(struct crocus_context *ice)2394{2395struct crocus_screen *screen = (void *) ice->ctx.screen;2396const uint64_t stage_dirty = ice->state.stage_dirty;23972398struct brw_vue_prog_data *old_prog_datas[4];2399if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) {2400for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)2401old_prog_datas[i] = get_vue_prog_data(ice, i);2402}24032404if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |2405CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) {2406struct crocus_uncompiled_shader *tes =2407ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];2408if (tes) {2409crocus_update_compiled_tcs(ice);2410crocus_update_compiled_tes(ice);2411} else {2412ice->shaders.prog[CROCUS_CACHE_TCS] = NULL;2413ice->shaders.prog[CROCUS_CACHE_TES] = NULL;2414ice->state.stage_dirty |=2415CROCUS_STAGE_DIRTY_TCS | CROCUS_STAGE_DIRTY_TES |2416CROCUS_STAGE_DIRTY_BINDINGS_TCS | CROCUS_STAGE_DIRTY_BINDINGS_TES |2417CROCUS_STAGE_DIRTY_CONSTANTS_TCS | CROCUS_STAGE_DIRTY_CONSTANTS_TES;2418}2419}24202421if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_VS)2422crocus_update_compiled_vs(ice);2423if (stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_GS)2424crocus_update_compiled_gs(ice);24252426if (stage_dirty & (CROCUS_STAGE_DIRTY_UNCOMPILED_GS |2427CROCUS_STAGE_DIRTY_UNCOMPILED_TES)) {2428const struct crocus_compiled_shader *gs =2429ice->shaders.prog[MESA_SHADER_GEOMETRY];2430const struct crocus_compiled_shader *tes =2431ice->shaders.prog[MESA_SHADER_TESS_EVAL];24322433bool points_or_lines = false;24342435if (gs) {2436const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;2437points_or_lines =2438gs_prog_data->output_topology == _3DPRIM_POINTLIST ||2439gs_prog_data->output_topology == _3DPRIM_LINESTRIP;2440} else if (tes) {2441const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;2442points_or_lines =2443tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||2444tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;2445}24462447if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {2448/* Outbound to XY Clip enables */2449ice->shaders.output_topology_is_points_or_lines = points_or_lines;2450ice->state.dirty |= CROCUS_DIRTY_CLIP;2451}2452}24532454if (!ice->shaders.prog[MESA_SHADER_VERTEX])2455return false;24562457gl_shader_stage last_stage = last_vue_stage(ice);2458struct crocus_compiled_shader *shader = ice->shaders.prog[last_stage];2459struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];2460update_last_vue_map(ice, shader->prog_data);2461if (ice->state.streamout != shader->streamout) {2462ice->state.streamout = shader->streamout;2463ice->state.dirty |= CROCUS_DIRTY_SO_DECL_LIST | CROCUS_DIRTY_STREAMOUT;2464}24652466if (ice->state.streamout_active) {2467screen->vtbl.update_so_strides(ice, ish->stream_output.stride);2468}24692470/* use ice->state version as last_vue_map can dirty this bit */2471if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_FS)2472crocus_update_compiled_fs(ice);24732474if (screen->devinfo.ver <= 6) {2475if (ice->state.dirty & CROCUS_DIRTY_GEN4_FF_GS_PROG &&2476!ice->shaders.prog[MESA_SHADER_GEOMETRY])2477crocus_update_compiled_ff_gs(ice);2478}24792480if (screen->devinfo.ver < 6) {2481if (ice->state.dirty & CROCUS_DIRTY_GEN4_CLIP_PROG)2482crocus_update_compiled_clip(ice);2483if (ice->state.dirty & CROCUS_DIRTY_GEN4_SF_PROG)2484crocus_update_compiled_sf(ice);2485}248624872488/* Changing shader interfaces may require a URB configuration. */2489if (!(ice->state.dirty & CROCUS_DIRTY_GEN6_URB)) {2490for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {2491struct brw_vue_prog_data *old = old_prog_datas[i];2492struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);2493if (!!old != !!new ||2494(new && new->urb_entry_size != old->urb_entry_size)) {2495ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;2496break;2497}2498}2499}25002501if (ice->state.stage_dirty & CROCUS_RENDER_STAGE_DIRTY_CONSTANTS) {2502for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {2503if (ice->state.stage_dirty & (CROCUS_STAGE_DIRTY_CONSTANTS_VS << i))2504crocus_update_pull_constant_descriptors(ice, i);2505}2506}2507return true;2508}25092510static struct crocus_compiled_shader *2511crocus_compile_cs(struct crocus_context *ice,2512struct crocus_uncompiled_shader *ish,2513const struct brw_cs_prog_key *key)2514{2515struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2516const struct brw_compiler *compiler = screen->compiler;2517void *mem_ctx = ralloc_context(NULL);2518struct brw_cs_prog_data *cs_prog_data =2519rzalloc(mem_ctx, struct brw_cs_prog_data);2520struct brw_stage_prog_data *prog_data = &cs_prog_data->base;2521enum brw_param_builtin *system_values;2522const struct intel_device_info *devinfo = &screen->devinfo;2523unsigned num_system_values;2524unsigned num_cbufs;25252526nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);25272528NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);25292530crocus_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,2531&num_system_values, &num_cbufs);2532crocus_lower_swizzles(nir, &key->base.tex);2533struct crocus_binding_table bt;2534crocus_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,2535num_system_values, num_cbufs, &key->base.tex);25362537struct brw_compile_cs_params params = {2538.nir = nir,2539.key = key,2540.prog_data = cs_prog_data,2541.log_data = &ice->dbg,2542};25432544const unsigned *program =2545brw_compile_cs(compiler, mem_ctx, ¶ms);2546if (program == NULL) {2547dbg_printf("Failed to compile compute shader: %s\n", params.error_str);2548ralloc_free(mem_ctx);2549return false;2550}25512552if (ish->compiled_once) {2553crocus_debug_recompile(ice, &nir->info, &key->base);2554} else {2555ish->compiled_once = true;2556}25572558struct crocus_compiled_shader *shader =2559crocus_upload_shader(ice, CROCUS_CACHE_CS, sizeof(*key), key, program,2560prog_data->program_size,2561prog_data, sizeof(*cs_prog_data), NULL,2562system_values, num_system_values,2563num_cbufs, &bt);25642565crocus_disk_cache_store(screen->disk_cache, ish, shader,2566ice->shaders.cache_bo_map,2567key, sizeof(*key));25682569ralloc_free(mem_ctx);2570return shader;2571}25722573static void2574crocus_update_compiled_cs(struct crocus_context *ice)2575{2576struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];2577struct crocus_uncompiled_shader *ish =2578ice->shaders.uncompiled[MESA_SHADER_COMPUTE];2579struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2580const struct intel_device_info *devinfo = &screen->devinfo;2581struct brw_cs_prog_key key = { KEY_INIT() };25822583if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))2584crocus_populate_sampler_prog_key_data(ice, devinfo, MESA_SHADER_COMPUTE, ish,2585ish->nir->info.uses_texture_gather, &key.base.tex);2586screen->vtbl.populate_cs_key(ice, &key);25872588struct crocus_compiled_shader *old = ice->shaders.prog[CROCUS_CACHE_CS];2589struct crocus_compiled_shader *shader =2590crocus_find_cached_shader(ice, CROCUS_CACHE_CS, sizeof(key), &key);25912592if (!shader)2593shader = crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key));25942595if (!shader)2596shader = crocus_compile_cs(ice, ish, &key);25972598if (old != shader) {2599ice->shaders.prog[CROCUS_CACHE_CS] = shader;2600ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CS |2601CROCUS_STAGE_DIRTY_BINDINGS_CS |2602CROCUS_STAGE_DIRTY_CONSTANTS_CS;2603shs->sysvals_need_upload = true;2604}2605}26062607void2608crocus_update_compiled_compute_shader(struct crocus_context *ice)2609{2610if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_UNCOMPILED_CS)2611crocus_update_compiled_cs(ice);26122613if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_CONSTANTS_CS)2614crocus_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);2615}26162617void2618crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,2619unsigned threads,2620uint32_t *dst)2621{2622assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);2623assert(cs_prog_data->push.cross_thread.size == 0);2624assert(cs_prog_data->push.per_thread.dwords == 1);2625assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);2626for (unsigned t = 0; t < threads; t++)2627dst[8 * t] = t;2628}26292630/**2631* Allocate scratch BOs as needed for the given per-thread size and stage.2632*/2633struct crocus_bo *2634crocus_get_scratch_space(struct crocus_context *ice,2635unsigned per_thread_scratch,2636gl_shader_stage stage)2637{2638struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;2639struct crocus_bufmgr *bufmgr = screen->bufmgr;2640const struct intel_device_info *devinfo = &screen->devinfo;26412642unsigned encoded_size = ffs(per_thread_scratch) - 11;2643assert(encoded_size < (1 << 16));26442645struct crocus_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];26462647unsigned subslice_total = screen->subslice_total;2648subslice_total = 4 * devinfo->num_slices;2649// assert(subslice_total >= screen->subslice_total);26502651if (!*bop) {2652unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;26532654uint32_t max_threads[] = {2655[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,2656[MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,2657[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,2658[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,2659[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,2660[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,2661};26622663uint32_t size = per_thread_scratch * max_threads[stage];26642665*bop = crocus_bo_alloc(bufmgr, "scratch", size);2666}26672668return *bop;2669}26702671/* ------------------------------------------------------------------- */26722673/**2674* The pipe->create_[stage]_state() driver hooks.2675*2676* Performs basic NIR preprocessing, records any state dependencies, and2677* returns an crocus_uncompiled_shader as the Gallium CSO.2678*2679* Actual shader compilation to assembly happens later, at first use.2680*/2681static void *2682crocus_create_uncompiled_shader(struct pipe_context *ctx,2683nir_shader *nir,2684const struct pipe_stream_output_info *so_info)2685{2686struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;2687const struct intel_device_info *devinfo = &screen->devinfo;2688struct crocus_uncompiled_shader *ish =2689calloc(1, sizeof(struct crocus_uncompiled_shader));2690if (!ish)2691return NULL;26922693if (devinfo->ver >= 6)2694NIR_PASS(ish->needs_edge_flag, nir, crocus_fix_edge_flags);2695else2696ish->needs_edge_flag = false;26972698brw_preprocess_nir(screen->compiler, nir, NULL);26992700NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo, false);2701NIR_PASS_V(nir, crocus_lower_storage_image_derefs);27022703nir_sweep(nir);27042705ish->program_id = get_new_program_id(screen);2706ish->nir = nir;2707if (so_info) {2708memcpy(&ish->stream_output, so_info, sizeof(*so_info));2709update_so_info(&ish->stream_output, nir->info.outputs_written);2710}27112712/* Save this now before potentially dropping nir->info.name */2713if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)2714ish->use_alt_mode = true;27152716if (screen->disk_cache) {2717/* Serialize the NIR to a binary blob that we can hash for the disk2718* cache. Drop unnecessary information (like variable names)2719* so the serialized NIR is smaller, and also to let us detect more2720* isomorphic shaders when hashing, increasing cache hits.2721*/2722struct blob blob;2723blob_init(&blob);2724nir_serialize(&blob, nir, true);2725_mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);2726blob_finish(&blob);2727}27282729return ish;2730}27312732static struct crocus_uncompiled_shader *2733crocus_create_shader_state(struct pipe_context *ctx,2734const struct pipe_shader_state *state)2735{2736struct nir_shader *nir;27372738if (state->type == PIPE_SHADER_IR_TGSI)2739nir = tgsi_to_nir(state->tokens, ctx->screen, false);2740else2741nir = state->ir.nir;27422743return crocus_create_uncompiled_shader(ctx, nir, &state->stream_output);2744}27452746static void *2747crocus_create_vs_state(struct pipe_context *ctx,2748const struct pipe_shader_state *state)2749{2750struct crocus_context *ice = (void *) ctx;2751struct crocus_screen *screen = (void *) ctx->screen;2752struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);27532754ish->nos |= (1ull << CROCUS_NOS_TEXTURES);2755/* User clip planes or gen5 sprite coord enable */2756if (ish->nir->info.clip_distance_array_size == 0 ||2757screen->devinfo.ver <= 5)2758ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);27592760if (screen->devinfo.verx10 < 75)2761ish->nos |= (1ull << CROCUS_NOS_VERTEX_ELEMENTS);27622763if (screen->precompile) {2764struct brw_vs_prog_key key = { KEY_INIT() };27652766if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2767crocus_compile_vs(ice, ish, &key);2768}27692770return ish;2771}27722773static void *2774crocus_create_tcs_state(struct pipe_context *ctx,2775const struct pipe_shader_state *state)2776{2777struct crocus_context *ice = (void *) ctx;2778struct crocus_screen *screen = (void *) ctx->screen;2779struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);2780struct shader_info *info = &ish->nir->info;27812782ish->nos |= (1ull << CROCUS_NOS_TEXTURES);2783if (screen->precompile) {2784const unsigned _GL_TRIANGLES = 0x0004;2785struct brw_tcs_prog_key key = {2786KEY_INIT(),2787// XXX: make sure the linker fills this out from the TES...2788.tes_primitive_mode =2789info->tess.primitive_mode ? info->tess.primitive_mode2790: _GL_TRIANGLES,2791.outputs_written = info->outputs_written,2792.patch_outputs_written = info->patch_outputs_written,2793};27942795key.input_vertices = info->tess.tcs_vertices_out;27962797if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2798crocus_compile_tcs(ice, ish, &key);2799}28002801return ish;2802}28032804static void *2805crocus_create_tes_state(struct pipe_context *ctx,2806const struct pipe_shader_state *state)2807{2808struct crocus_context *ice = (void *) ctx;2809struct crocus_screen *screen = (void *) ctx->screen;2810struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);2811struct shader_info *info = &ish->nir->info;28122813ish->nos |= (1ull << CROCUS_NOS_TEXTURES);2814/* User clip planes */2815if (ish->nir->info.clip_distance_array_size == 0)2816ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);28172818if (screen->precompile) {2819struct brw_tes_prog_key key = {2820KEY_INIT(),2821// XXX: not ideal, need TCS output/TES input unification2822.inputs_read = info->inputs_read,2823.patch_inputs_read = info->patch_inputs_read,2824};28252826if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2827crocus_compile_tes(ice, ish, &key);2828}28292830return ish;2831}28322833static void *2834crocus_create_gs_state(struct pipe_context *ctx,2835const struct pipe_shader_state *state)2836{2837struct crocus_context *ice = (void *) ctx;2838struct crocus_screen *screen = (void *) ctx->screen;2839struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);28402841ish->nos |= (1ull << CROCUS_NOS_TEXTURES);2842/* User clip planes */2843if (ish->nir->info.clip_distance_array_size == 0)2844ish->nos |= (1ull << CROCUS_NOS_RASTERIZER);28452846if (screen->precompile) {2847struct brw_gs_prog_key key = { KEY_INIT() };28482849if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2850crocus_compile_gs(ice, ish, &key);2851}28522853return ish;2854}28552856static void *2857crocus_create_fs_state(struct pipe_context *ctx,2858const struct pipe_shader_state *state)2859{2860struct crocus_context *ice = (void *) ctx;2861struct crocus_screen *screen = (void *) ctx->screen;2862struct crocus_uncompiled_shader *ish = crocus_create_shader_state(ctx, state);2863struct shader_info *info = &ish->nir->info;28642865ish->nos |= (1ull << CROCUS_NOS_FRAMEBUFFER) |2866(1ull << CROCUS_NOS_DEPTH_STENCIL_ALPHA) |2867(1ull << CROCUS_NOS_RASTERIZER) |2868(1ull << CROCUS_NOS_TEXTURES) |2869(1ull << CROCUS_NOS_BLEND);28702871/* The program key needs the VUE map if there are > 16 inputs or gen4/5 */2872if (screen->devinfo.ver < 6 || util_bitcount64(ish->nir->info.inputs_read &2873BRW_FS_VARYING_INPUT_MASK) > 16) {2874ish->nos |= (1ull << CROCUS_NOS_LAST_VUE_MAP);2875}28762877if (screen->precompile) {2878const uint64_t color_outputs = info->outputs_written &2879~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |2880BITFIELD64_BIT(FRAG_RESULT_STENCIL) |2881BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));28822883bool can_rearrange_varyings =2884screen->devinfo.ver > 6 && util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;28852886const struct intel_device_info *devinfo = &screen->devinfo;2887struct brw_wm_prog_key key = {2888KEY_INIT(),2889.nr_color_regions = util_bitcount(color_outputs),2890.coherent_fb_fetch = false,2891.input_slots_valid =2892can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,2893};28942895struct brw_vue_map vue_map;2896if (devinfo->ver < 6) {2897brw_compute_vue_map(devinfo, &vue_map,2898info->inputs_read | VARYING_BIT_POS,2899false, /* pos slots */ 1);2900}2901if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2902crocus_compile_fs(ice, ish, &key, &vue_map);2903}29042905return ish;2906}29072908static void *2909crocus_create_compute_state(struct pipe_context *ctx,2910const struct pipe_compute_state *state)2911{2912assert(state->ir_type == PIPE_SHADER_IR_NIR);29132914struct crocus_context *ice = (void *) ctx;2915struct crocus_screen *screen = (void *) ctx->screen;2916struct crocus_uncompiled_shader *ish =2917crocus_create_uncompiled_shader(ctx, (void *) state->prog, NULL);29182919ish->nos |= (1ull << CROCUS_NOS_TEXTURES);2920// XXX: disallow more than 64KB of shared variables29212922if (screen->precompile) {2923struct brw_cs_prog_key key = { KEY_INIT() };29242925if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))2926crocus_compile_cs(ice, ish, &key);2927}29282929return ish;2930}29312932/**2933* The pipe->delete_[stage]_state() driver hooks.2934*2935* Frees the crocus_uncompiled_shader.2936*/2937static void2938crocus_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)2939{2940struct crocus_uncompiled_shader *ish = state;2941struct crocus_context *ice = (void *) ctx;29422943if (ice->shaders.uncompiled[stage] == ish) {2944ice->shaders.uncompiled[stage] = NULL;2945ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage;2946}29472948if (ish->const_data) {2949pipe_resource_reference(&ish->const_data, NULL);2950pipe_resource_reference(&ish->const_data_state.res, NULL);2951}29522953ralloc_free(ish->nir);2954free(ish);2955}29562957static void2958crocus_delete_vs_state(struct pipe_context *ctx, void *state)2959{2960crocus_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);2961}29622963static void2964crocus_delete_tcs_state(struct pipe_context *ctx, void *state)2965{2966crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);2967}29682969static void2970crocus_delete_tes_state(struct pipe_context *ctx, void *state)2971{2972crocus_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);2973}29742975static void2976crocus_delete_gs_state(struct pipe_context *ctx, void *state)2977{2978crocus_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);2979}29802981static void2982crocus_delete_fs_state(struct pipe_context *ctx, void *state)2983{2984crocus_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);2985}29862987static void2988crocus_delete_cs_state(struct pipe_context *ctx, void *state)2989{2990crocus_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);2991}29922993/**2994* The pipe->bind_[stage]_state() driver hook.2995*2996* Binds an uncompiled shader as the current one for a particular stage.2997* Updates dirty tracking to account for the shader's NOS.2998*/2999static void3000bind_shader_state(struct crocus_context *ice,3001struct crocus_uncompiled_shader *ish,3002gl_shader_stage stage)3003{3004uint64_t dirty_bit = CROCUS_STAGE_DIRTY_UNCOMPILED_VS << stage;3005const uint64_t nos = ish ? ish->nos : 0;30063007const struct shader_info *old_info = crocus_get_shader_info(ice, stage);3008const struct shader_info *new_info = ish ? &ish->nir->info : NULL;30093010if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=3011(new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {3012ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;3013}30143015ice->shaders.uncompiled[stage] = ish;3016ice->state.stage_dirty |= dirty_bit;30173018/* Record that CSOs need to mark CROCUS_DIRTY_UNCOMPILED_XS when they change3019* (or that they no longer need to do so).3020*/3021for (int i = 0; i < CROCUS_NOS_COUNT; i++) {3022if (nos & (1 << i))3023ice->state.stage_dirty_for_nos[i] |= dirty_bit;3024else3025ice->state.stage_dirty_for_nos[i] &= ~dirty_bit;3026}3027}30283029static void3030crocus_bind_vs_state(struct pipe_context *ctx, void *state)3031{3032struct crocus_context *ice = (struct crocus_context *)ctx;3033struct crocus_uncompiled_shader *new_ish = state;3034struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;3035const struct intel_device_info *devinfo = &screen->devinfo;30363037if (new_ish &&3038ice->state.window_space_position !=3039new_ish->nir->info.vs.window_space_position) {3040ice->state.window_space_position =3041new_ish->nir->info.vs.window_space_position;30423043ice->state.dirty |= CROCUS_DIRTY_CLIP |3044CROCUS_DIRTY_RASTER |3045CROCUS_DIRTY_CC_VIEWPORT;3046}30473048if (devinfo->ver == 6) {3049ice->state.stage_dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;3050}30513052bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);3053}30543055static void3056crocus_bind_tcs_state(struct pipe_context *ctx, void *state)3057{3058bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);3059}30603061static void3062crocus_bind_tes_state(struct pipe_context *ctx, void *state)3063{3064struct crocus_context *ice = (struct crocus_context *)ctx;30653066/* Enabling/disabling optional stages requires a URB reconfiguration. */3067if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])3068ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;30693070bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);3071}30723073static void3074crocus_bind_gs_state(struct pipe_context *ctx, void *state)3075{3076struct crocus_context *ice = (struct crocus_context *)ctx;30773078/* Enabling/disabling optional stages requires a URB reconfiguration. */3079if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])3080ice->state.dirty |= CROCUS_DIRTY_GEN6_URB;30813082bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);3083}30843085static void3086crocus_bind_fs_state(struct pipe_context *ctx, void *state)3087{3088struct crocus_context *ice = (struct crocus_context *) ctx;3089struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;3090const struct intel_device_info *devinfo = &screen->devinfo;3091struct crocus_uncompiled_shader *old_ish =3092ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];3093struct crocus_uncompiled_shader *new_ish = state;30943095const unsigned color_bits =3096BITFIELD64_BIT(FRAG_RESULT_COLOR) |3097BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);30983099/* Fragment shader outputs influence HasWriteableRT */3100if (!old_ish || !new_ish ||3101(old_ish->nir->info.outputs_written & color_bits) !=3102(new_ish->nir->info.outputs_written & color_bits)) {3103if (devinfo->ver == 8)3104ice->state.dirty |= CROCUS_DIRTY_GEN8_PS_BLEND;3105else3106ice->state.dirty |= CROCUS_DIRTY_WM;3107}31083109if (devinfo->ver == 8)3110ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;3111bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);3112}31133114static void3115crocus_bind_cs_state(struct pipe_context *ctx, void *state)3116{3117bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);3118}31193120void3121crocus_init_program_functions(struct pipe_context *ctx)3122{3123ctx->create_vs_state = crocus_create_vs_state;3124ctx->create_tcs_state = crocus_create_tcs_state;3125ctx->create_tes_state = crocus_create_tes_state;3126ctx->create_gs_state = crocus_create_gs_state;3127ctx->create_fs_state = crocus_create_fs_state;3128ctx->create_compute_state = crocus_create_compute_state;31293130ctx->delete_vs_state = crocus_delete_vs_state;3131ctx->delete_tcs_state = crocus_delete_tcs_state;3132ctx->delete_tes_state = crocus_delete_tes_state;3133ctx->delete_gs_state = crocus_delete_gs_state;3134ctx->delete_fs_state = crocus_delete_fs_state;3135ctx->delete_compute_state = crocus_delete_cs_state;31363137ctx->bind_vs_state = crocus_bind_vs_state;3138ctx->bind_tcs_state = crocus_bind_tcs_state;3139ctx->bind_tes_state = crocus_bind_tes_state;3140ctx->bind_gs_state = crocus_bind_gs_state;3141ctx->bind_fs_state = crocus_bind_fs_state;3142ctx->bind_compute_state = crocus_bind_cs_state;3143}314431453146