Path: blob/21.2-virgl/src/gallium/drivers/iris/iris_program.c
4565 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file iris_program.c24*25* This file contains the driver interface for compiling shaders.26*27* See iris_program_cache.c for the in-memory program cache where the28* compiled shaders are stored.29*/3031#include <stdio.h>32#include <errno.h>33#include "pipe/p_defines.h"34#include "pipe/p_state.h"35#include "pipe/p_context.h"36#include "pipe/p_screen.h"37#include "util/u_atomic.h"38#include "util/u_upload_mgr.h"39#include "util/debug.h"40#include "compiler/nir/nir.h"41#include "compiler/nir/nir_builder.h"42#include "compiler/nir/nir_serialize.h"43#include "intel/compiler/brw_compiler.h"44#include "intel/compiler/brw_nir.h"45#include "iris_context.h"46#include "nir/tgsi_to_nir.h"4748#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id49#define BRW_KEY_INIT(gen, prog_id) \50.base.program_string_id = prog_id, \51.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \52.base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \53.base.tex.compressed_multisample_layout_mask = ~0, \54.base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)5556static unsigned57get_new_program_id(struct iris_screen *screen)58{59return p_atomic_inc_return(&screen->program_id);60}6162static struct brw_vs_prog_key63iris_to_brw_vs_key(const struct intel_device_info *devinfo,64const struct iris_vs_prog_key *key)65{66return (struct brw_vs_prog_key) {67BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),6869/* Don't tell the backend about our clip plane constants, we've70* already lowered them in NIR and don't want it doing it again.71*/72.nr_userclip_plane_consts = 0,73};74}7576static struct brw_tcs_prog_key77iris_to_brw_tcs_key(const struct intel_device_info *devinfo,78const struct iris_tcs_prog_key *key)79{80return (struct brw_tcs_prog_key) {81BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),82.tes_primitive_mode = key->tes_primitive_mode,83.input_vertices = key->input_vertices,84.patch_outputs_written = key->patch_outputs_written,85.outputs_written = key->outputs_written,86.quads_workaround = key->quads_workaround,87};88}8990static struct brw_tes_prog_key91iris_to_brw_tes_key(const struct intel_device_info *devinfo,92const struct iris_tes_prog_key *key)93{94return (struct brw_tes_prog_key) {95BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),96.patch_inputs_read = key->patch_inputs_read,97.inputs_read = key->inputs_read,98};99}100101static struct brw_gs_prog_key102iris_to_brw_gs_key(const struct intel_device_info *devinfo,103const struct iris_gs_prog_key *key)104{105return (struct brw_gs_prog_key) {106BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),107};108}109110static struct brw_wm_prog_key111iris_to_brw_fs_key(const struct intel_device_info *devinfo,112const struct iris_fs_prog_key *key)113{114return (struct brw_wm_prog_key) {115BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),116.nr_color_regions = key->nr_color_regions,117.flat_shade = key->flat_shade,118.alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,119.alpha_to_coverage = key->alpha_to_coverage,120.clamp_fragment_color = key->clamp_fragment_color,121.persample_interp = key->persample_interp,122.multisample_fbo = key->multisample_fbo,123.force_dual_color_blend = key->force_dual_color_blend,124.coherent_fb_fetch = key->coherent_fb_fetch,125.color_outputs_valid = key->color_outputs_valid,126.input_slots_valid = key->input_slots_valid,127.ignore_sample_mask_out = !key->multisample_fbo,128};129}130131static struct brw_cs_prog_key132iris_to_brw_cs_key(const struct intel_device_info *devinfo,133const struct iris_cs_prog_key *key)134{135return (struct brw_cs_prog_key) {136BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),137};138}139140static void *141upload_state(struct u_upload_mgr *uploader,142struct iris_state_ref *ref,143unsigned size,144unsigned alignment)145{146void *p = NULL;147u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);148return p;149}150151void152iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,153struct pipe_shader_buffer *buf,154struct iris_state_ref *surf_state,155isl_surf_usage_flags_t usage)156{157struct pipe_context *ctx = &ice->ctx;158struct iris_screen *screen = (struct iris_screen *) ctx->screen;159bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;160161void *map =162upload_state(ice->state.surface_uploader, surf_state,163screen->isl_dev.ss.size, 64);164if (!unlikely(map)) {165surf_state->res = NULL;166return;167}168169struct iris_resource *res = (void *) buf->buffer;170struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);171surf_state->offset += iris_bo_offset_from_base_address(surf_bo);172173const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;174175isl_buffer_fill_state(&screen->isl_dev, map,176.address = res->bo->gtt_offset + res->offset +177buf->buffer_offset,178.size_B = buf->buffer_size - res->offset,179.format = dataport ? ISL_FORMAT_RAW180: ISL_FORMAT_R32G32B32A32_FLOAT,181.swizzle = ISL_SWIZZLE_IDENTITY,182.stride_B = 1,183.mocs = iris_mocs(res->bo, &screen->isl_dev, usage));184}185186static nir_ssa_def *187get_aoa_deref_offset(nir_builder *b,188nir_deref_instr *deref,189unsigned elem_size)190{191unsigned array_size = elem_size;192nir_ssa_def *offset = nir_imm_int(b, 0);193194while (deref->deref_type != nir_deref_type_var) {195assert(deref->deref_type == nir_deref_type_array);196197/* This level's element size is the previous level's array size */198nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);199assert(deref->arr.index.ssa);200offset = nir_iadd(b, offset,201nir_imul(b, index, nir_imm_int(b, array_size)));202203deref = nir_deref_instr_parent(deref);204assert(glsl_type_is_array(deref->type));205array_size *= glsl_get_length(deref->type);206}207208/* Accessing an invalid surface index with the dataport can result in a209* hang. According to the spec "if the index used to select an individual210* element is negative or greater than or equal to the size of the array,211* the results of the operation are undefined but may not lead to212* termination" -- which is one of the possible outcomes of the hang.213* Clamp the index to prevent access outside of the array bounds.214*/215return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));216}217218static void219iris_lower_storage_image_derefs(nir_shader *nir)220{221nir_function_impl *impl = nir_shader_get_entrypoint(nir);222223nir_builder b;224nir_builder_init(&b, impl);225226nir_foreach_block(block, impl) {227nir_foreach_instr_safe(instr, block) {228if (instr->type != nir_instr_type_intrinsic)229continue;230231nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);232switch (intrin->intrinsic) {233case nir_intrinsic_image_deref_load:234case nir_intrinsic_image_deref_store:235case nir_intrinsic_image_deref_atomic_add:236case nir_intrinsic_image_deref_atomic_imin:237case nir_intrinsic_image_deref_atomic_umin:238case nir_intrinsic_image_deref_atomic_imax:239case nir_intrinsic_image_deref_atomic_umax:240case nir_intrinsic_image_deref_atomic_and:241case nir_intrinsic_image_deref_atomic_or:242case nir_intrinsic_image_deref_atomic_xor:243case nir_intrinsic_image_deref_atomic_exchange:244case nir_intrinsic_image_deref_atomic_comp_swap:245case nir_intrinsic_image_deref_size:246case nir_intrinsic_image_deref_samples:247case nir_intrinsic_image_deref_load_raw_intel:248case nir_intrinsic_image_deref_store_raw_intel: {249nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);250nir_variable *var = nir_deref_instr_get_variable(deref);251252b.cursor = nir_before_instr(&intrin->instr);253nir_ssa_def *index =254nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),255get_aoa_deref_offset(&b, deref, 1));256nir_rewrite_image_intrinsic(intrin, index, false);257break;258}259260default:261break;262}263}264}265}266267/**268* Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.269*/270static bool271iris_fix_edge_flags(nir_shader *nir)272{273if (nir->info.stage != MESA_SHADER_VERTEX) {274nir_shader_preserve_all_metadata(nir);275return false;276}277278nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,279VARYING_SLOT_EDGE);280if (!var) {281nir_shader_preserve_all_metadata(nir);282return false;283}284285var->data.mode = nir_var_shader_temp;286nir->info.outputs_written &= ~VARYING_BIT_EDGE;287nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;288nir_fixup_deref_modes(nir);289290nir_foreach_function(f, nir) {291if (f->impl) {292nir_metadata_preserve(f->impl, nir_metadata_block_index |293nir_metadata_dominance |294nir_metadata_live_ssa_defs |295nir_metadata_loop_analysis);296} else {297nir_metadata_preserve(f->impl, nir_metadata_all);298}299}300301return true;302}303304/**305* Fix an uncompiled shader's stream output info.306*307* Core Gallium stores output->register_index as a "slot" number, where308* slots are assigned consecutively to all outputs in info->outputs_written.309* This naive packing of outputs doesn't work for us - we too have slots,310* but the layout is defined by the VUE map, which we won't have until we311* compile a specific shader variant. So, we remap these and simply store312* VARYING_SLOT_* in our copy's output->register_index fields.313*314* We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W315* components of our VUE header. See brw_vue_map.c for the layout.316*/317static void318update_so_info(struct pipe_stream_output_info *so_info,319uint64_t outputs_written)320{321uint8_t reverse_map[64] = {};322unsigned slot = 0;323while (outputs_written) {324reverse_map[slot++] = u_bit_scan64(&outputs_written);325}326327for (unsigned i = 0; i < so_info->num_outputs; i++) {328struct pipe_stream_output *output = &so_info->output[i];329330/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */331output->register_index = reverse_map[output->register_index];332333/* The VUE header contains three scalar fields packed together:334* - gl_PointSize is stored in VARYING_SLOT_PSIZ.w335* - gl_Layer is stored in VARYING_SLOT_PSIZ.y336* - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z337*/338switch (output->register_index) {339case VARYING_SLOT_LAYER:340assert(output->num_components == 1);341output->register_index = VARYING_SLOT_PSIZ;342output->start_component = 1;343break;344case VARYING_SLOT_VIEWPORT:345assert(output->num_components == 1);346output->register_index = VARYING_SLOT_PSIZ;347output->start_component = 2;348break;349case VARYING_SLOT_PSIZ:350assert(output->num_components == 1);351output->start_component = 3;352break;353}354355//info->outputs_written |= 1ull << output->register_index;356}357}358359static void360setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,361unsigned offset, unsigned n)362{363assert(offset % sizeof(uint32_t) == 0);364365for (unsigned i = 0; i < n; ++i)366sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);367368for (unsigned i = n; i < 4; ++i)369sysvals[i] = BRW_PARAM_BUILTIN_ZERO;370}371372/**373* Associate NIR uniform variables with the prog_data->param[] mechanism374* used by the backend. Also, decide which UBOs we'd like to push in an375* ideal situation (though the backend can reduce this).376*/377static void378iris_setup_uniforms(const struct brw_compiler *compiler,379void *mem_ctx,380nir_shader *nir,381struct brw_stage_prog_data *prog_data,382unsigned kernel_input_size,383enum brw_param_builtin **out_system_values,384unsigned *out_num_system_values,385unsigned *out_num_cbufs)386{387UNUSED const struct intel_device_info *devinfo = compiler->devinfo;388389unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));390391const unsigned IRIS_MAX_SYSTEM_VALUES =392PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;393enum brw_param_builtin *system_values =394rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);395unsigned num_system_values = 0;396397unsigned patch_vert_idx = -1;398unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];399unsigned img_idx[PIPE_MAX_SHADER_IMAGES];400unsigned variable_group_size_idx = -1;401unsigned work_dim_idx = -1;402memset(ucp_idx, -1, sizeof(ucp_idx));403memset(img_idx, -1, sizeof(img_idx));404405nir_function_impl *impl = nir_shader_get_entrypoint(nir);406407nir_builder b;408nir_builder_init(&b, impl);409410b.cursor = nir_before_block(nir_start_block(impl));411nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);412413/* Turn system value intrinsics into uniforms */414nir_foreach_block(block, impl) {415nir_foreach_instr_safe(instr, block) {416if (instr->type != nir_instr_type_intrinsic)417continue;418419nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);420nir_ssa_def *offset;421422switch (intrin->intrinsic) {423case nir_intrinsic_load_constant: {424unsigned load_size = intrin->dest.ssa.num_components *425intrin->dest.ssa.bit_size / 8;426unsigned load_align = intrin->dest.ssa.bit_size / 8;427428/* This one is special because it reads from the shader constant429* data and not cbuf0 which gallium uploads for us.430*/431b.cursor = nir_instr_remove(&intrin->instr);432433nir_ssa_def *offset =434nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),435nir_intrinsic_base(intrin));436437assert(load_size < b.shader->constant_data_size);438unsigned max_offset = b.shader->constant_data_size - load_size;439offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));440441nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,442nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),443nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));444445nir_ssa_def *data =446nir_load_global(&b, nir_iadd(&b, const_data_base_addr,447nir_u2u64(&b, offset)),448load_align,449intrin->dest.ssa.num_components,450intrin->dest.ssa.bit_size);451452nir_ssa_def_rewrite_uses(&intrin->dest.ssa,453data);454continue;455}456case nir_intrinsic_load_user_clip_plane: {457unsigned ucp = nir_intrinsic_ucp_id(intrin);458459if (ucp_idx[ucp] == -1) {460ucp_idx[ucp] = num_system_values;461num_system_values += 4;462}463464for (int i = 0; i < 4; i++) {465system_values[ucp_idx[ucp] + i] =466BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);467}468469b.cursor = nir_before_instr(instr);470offset = nir_imm_int(&b, system_values_start +471ucp_idx[ucp] * sizeof(uint32_t));472break;473}474case nir_intrinsic_load_patch_vertices_in:475if (patch_vert_idx == -1)476patch_vert_idx = num_system_values++;477478system_values[patch_vert_idx] =479BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;480481b.cursor = nir_before_instr(instr);482offset = nir_imm_int(&b, system_values_start +483patch_vert_idx * sizeof(uint32_t));484break;485case nir_intrinsic_image_deref_load_param_intel: {486assert(devinfo->ver < 9);487nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);488nir_variable *var = nir_deref_instr_get_variable(deref);489490if (img_idx[var->data.binding] == -1) {491/* GL only allows arrays of arrays of images. */492assert(glsl_type_is_image(glsl_without_array(var->type)));493unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));494495for (int i = 0; i < num_images; i++) {496const unsigned img = var->data.binding + i;497498img_idx[img] = num_system_values;499num_system_values += BRW_IMAGE_PARAM_SIZE;500501uint32_t *img_sv = &system_values[img_idx[img]];502503setup_vec4_image_sysval(504img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,505offsetof(struct brw_image_param, offset), 2);506setup_vec4_image_sysval(507img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,508offsetof(struct brw_image_param, size), 3);509setup_vec4_image_sysval(510img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,511offsetof(struct brw_image_param, stride), 4);512setup_vec4_image_sysval(513img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,514offsetof(struct brw_image_param, tiling), 3);515setup_vec4_image_sysval(516img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,517offsetof(struct brw_image_param, swizzling), 2);518}519}520521b.cursor = nir_before_instr(instr);522offset = nir_iadd(&b,523get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),524nir_imm_int(&b, system_values_start +525img_idx[var->data.binding] * 4 +526nir_intrinsic_base(intrin) * 16));527break;528}529case nir_intrinsic_load_workgroup_size: {530assert(nir->info.workgroup_size_variable);531if (variable_group_size_idx == -1) {532variable_group_size_idx = num_system_values;533num_system_values += 3;534for (int i = 0; i < 3; i++) {535system_values[variable_group_size_idx + i] =536BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;537}538}539540b.cursor = nir_before_instr(instr);541offset = nir_imm_int(&b, system_values_start +542variable_group_size_idx * sizeof(uint32_t));543break;544}545case nir_intrinsic_load_work_dim: {546if (work_dim_idx == -1) {547work_dim_idx = num_system_values++;548system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;549}550b.cursor = nir_before_instr(instr);551offset = nir_imm_int(&b, system_values_start +552work_dim_idx * sizeof(uint32_t));553break;554}555case nir_intrinsic_load_kernel_input: {556assert(nir_intrinsic_base(intrin) +557nir_intrinsic_range(intrin) <= kernel_input_size);558b.cursor = nir_before_instr(instr);559offset = nir_iadd_imm(&b, intrin->src[0].ssa,560nir_intrinsic_base(intrin));561break;562}563default:564continue;565}566567nir_ssa_def *load =568nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,569temp_ubo_name, offset,570.align_mul = 4,571.align_offset = 0,572.range_base = 0,573.range = ~0);574575nir_ssa_def_rewrite_uses(&intrin->dest.ssa,576load);577nir_instr_remove(instr);578}579}580581nir_validate_shader(nir, "before remapping");582583/* Uniforms are stored in constant buffer 0, the584* user-facing UBOs are indexed by one. So if any constant buffer is585* needed, the constant buffer 0 will be needed, so account for it.586*/587unsigned num_cbufs = nir->info.num_ubos;588if (num_cbufs || nir->num_uniforms)589num_cbufs++;590591/* Place the new params in a new cbuf. */592if (num_system_values > 0 || kernel_input_size > 0) {593unsigned sysval_cbuf_index = num_cbufs;594num_cbufs++;595596system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,597num_system_values);598599nir_foreach_block(block, impl) {600nir_foreach_instr_safe(instr, block) {601if (instr->type != nir_instr_type_intrinsic)602continue;603604nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);605606if (load->intrinsic != nir_intrinsic_load_ubo)607continue;608609b.cursor = nir_before_instr(instr);610611assert(load->src[0].is_ssa);612613if (load->src[0].ssa == temp_ubo_name) {614nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);615nir_instr_rewrite_src(instr, &load->src[0],616nir_src_for_ssa(imm));617}618}619}620621/* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */622nir_opt_constant_folding(nir);623} else {624ralloc_free(system_values);625system_values = NULL;626}627628assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);629nir_validate_shader(nir, "after remap");630631/* We don't use params[] but gallium leaves num_uniforms set. We use this632* to detect when cbuf0 exists but we don't need it anymore when we get633* here. Instead, zero it out so that the back-end doesn't get confused634* when nr_params * 4 != num_uniforms != nr_params * 4.635*/636nir->num_uniforms = 0;637638*out_system_values = system_values;639*out_num_system_values = num_system_values;640*out_num_cbufs = num_cbufs;641}642643static const char *surface_group_names[] = {644[IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",645[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",646[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",647[IRIS_SURFACE_GROUP_TEXTURE] = "texture",648[IRIS_SURFACE_GROUP_UBO] = "ubo",649[IRIS_SURFACE_GROUP_SSBO] = "ssbo",650[IRIS_SURFACE_GROUP_IMAGE] = "image",651};652653static void654iris_print_binding_table(FILE *fp, const char *name,655const struct iris_binding_table *bt)656{657STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);658659uint32_t total = 0;660uint32_t compacted = 0;661662for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {663uint32_t size = bt->sizes[i];664total += size;665if (size)666compacted += util_bitcount64(bt->used_mask[i]);667}668669if (total == 0) {670fprintf(fp, "Binding table for %s is empty\n\n", name);671return;672}673674if (total != compacted) {675fprintf(fp, "Binding table for %s "676"(compacted to %u entries from %u entries)\n",677name, compacted, total);678} else {679fprintf(fp, "Binding table for %s (%u entries)\n", name, total);680}681682uint32_t entry = 0;683for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {684uint64_t mask = bt->used_mask[i];685while (mask) {686int index = u_bit_scan64(&mask);687fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);688}689}690fprintf(fp, "\n");691}692693enum {694/* Max elements in a surface group. */695SURFACE_GROUP_MAX_ELEMENTS = 64,696};697698/**699* Map a <group, index> pair to a binding table index.700*701* For example: <UBO, 5> => binding table index 12702*/703uint32_t704iris_group_index_to_bti(const struct iris_binding_table *bt,705enum iris_surface_group group, uint32_t index)706{707assert(index < bt->sizes[group]);708uint64_t mask = bt->used_mask[group];709uint64_t bit = 1ull << index;710if (bit & mask) {711return bt->offsets[group] + util_bitcount64((bit - 1) & mask);712} else {713return IRIS_SURFACE_NOT_USED;714}715}716717/**718* Map a binding table index back to a <group, index> pair.719*720* For example: binding table index 12 => <UBO, 5>721*/722uint32_t723iris_bti_to_group_index(const struct iris_binding_table *bt,724enum iris_surface_group group, uint32_t bti)725{726uint64_t used_mask = bt->used_mask[group];727assert(bti >= bt->offsets[group]);728729uint32_t c = bti - bt->offsets[group];730while (used_mask) {731int i = u_bit_scan64(&used_mask);732if (c == 0)733return i;734c--;735}736737return IRIS_SURFACE_NOT_USED;738}739740static void741rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,742nir_instr *instr, nir_src *src,743enum iris_surface_group group)744{745assert(bt->sizes[group] > 0);746747b->cursor = nir_before_instr(instr);748nir_ssa_def *bti;749if (nir_src_is_const(*src)) {750uint32_t index = nir_src_as_uint(*src);751bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),752src->ssa->bit_size);753} else {754/* Indirect usage makes all the surfaces of the group to be available,755* so we can just add the base.756*/757assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));758bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);759}760nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));761}762763static void764mark_used_with_src(struct iris_binding_table *bt, nir_src *src,765enum iris_surface_group group)766{767assert(bt->sizes[group] > 0);768769if (nir_src_is_const(*src)) {770uint64_t index = nir_src_as_uint(*src);771assert(index < bt->sizes[group]);772bt->used_mask[group] |= 1ull << index;773} else {774/* There's an indirect usage, we need all the surfaces. */775bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);776}777}778779static bool780skip_compacting_binding_tables(void)781{782static int skip = -1;783if (skip < 0)784skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);785return skip;786}787788/**789* Set up the binding table indices and apply to the shader.790*/791static void792iris_setup_binding_table(const struct intel_device_info *devinfo,793struct nir_shader *nir,794struct iris_binding_table *bt,795unsigned num_render_targets,796unsigned num_system_values,797unsigned num_cbufs)798{799const struct shader_info *info = &nir->info;800801memset(bt, 0, sizeof(*bt));802803/* Set the sizes for each surface group. For some groups, we already know804* upfront how many will be used, so mark them.805*/806if (info->stage == MESA_SHADER_FRAGMENT) {807bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;808/* All render targets used. */809bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =810BITFIELD64_MASK(num_render_targets);811812/* Setup render target read surface group in order to support non-coherent813* framebuffer fetch on Gfx8814*/815if (devinfo->ver == 8 && info->outputs_read) {816bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;817bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =818BITFIELD64_MASK(num_render_targets);819}820} else if (info->stage == MESA_SHADER_COMPUTE) {821bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;822}823824bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);825bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];826827bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;828829/* Allocate an extra slot in the UBO section for NIR constants.830* Binding table compaction will remove it if unnecessary.831*832* We don't include them in iris_compiled_shader::num_cbufs because833* they are uploaded separately from shs->constbuf[], but from a shader834* point of view, they're another UBO (at the end of the section).835*/836bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;837838bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;839840for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)841assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);842843/* Mark surfaces used for the cases we don't have the information available844* upfront.845*/846nir_function_impl *impl = nir_shader_get_entrypoint(nir);847nir_foreach_block (block, impl) {848nir_foreach_instr (instr, block) {849if (instr->type != nir_instr_type_intrinsic)850continue;851852nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);853switch (intrin->intrinsic) {854case nir_intrinsic_load_num_workgroups:855bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;856break;857858case nir_intrinsic_load_output:859if (devinfo->ver == 8) {860mark_used_with_src(bt, &intrin->src[0],861IRIS_SURFACE_GROUP_RENDER_TARGET_READ);862}863break;864865case nir_intrinsic_image_size:866case nir_intrinsic_image_load:867case nir_intrinsic_image_store:868case nir_intrinsic_image_atomic_add:869case nir_intrinsic_image_atomic_imin:870case nir_intrinsic_image_atomic_umin:871case nir_intrinsic_image_atomic_imax:872case nir_intrinsic_image_atomic_umax:873case nir_intrinsic_image_atomic_and:874case nir_intrinsic_image_atomic_or:875case nir_intrinsic_image_atomic_xor:876case nir_intrinsic_image_atomic_exchange:877case nir_intrinsic_image_atomic_comp_swap:878case nir_intrinsic_image_load_raw_intel:879case nir_intrinsic_image_store_raw_intel:880mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);881break;882883case nir_intrinsic_load_ubo:884mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);885break;886887case nir_intrinsic_store_ssbo:888mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);889break;890891case nir_intrinsic_get_ssbo_size:892case nir_intrinsic_ssbo_atomic_add:893case nir_intrinsic_ssbo_atomic_imin:894case nir_intrinsic_ssbo_atomic_umin:895case nir_intrinsic_ssbo_atomic_imax:896case nir_intrinsic_ssbo_atomic_umax:897case nir_intrinsic_ssbo_atomic_and:898case nir_intrinsic_ssbo_atomic_or:899case nir_intrinsic_ssbo_atomic_xor:900case nir_intrinsic_ssbo_atomic_exchange:901case nir_intrinsic_ssbo_atomic_comp_swap:902case nir_intrinsic_ssbo_atomic_fmin:903case nir_intrinsic_ssbo_atomic_fmax:904case nir_intrinsic_ssbo_atomic_fcomp_swap:905case nir_intrinsic_load_ssbo:906mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);907break;908909default:910break;911}912}913}914915/* When disable we just mark everything as used. */916if (unlikely(skip_compacting_binding_tables())) {917for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)918bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);919}920921/* Calculate the offsets and the binding table size based on the used922* surfaces. After this point, the functions to go between "group indices"923* and binding table indices can be used.924*/925uint32_t next = 0;926for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {927if (bt->used_mask[i] != 0) {928bt->offsets[i] = next;929next += util_bitcount64(bt->used_mask[i]);930}931}932bt->size_bytes = next * 4;933934if (INTEL_DEBUG & DEBUG_BT) {935iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);936}937938/* Apply the binding table indices. The backend compiler is not expected939* to change those, as we haven't set any of the *_start entries in brw940* binding_table.941*/942nir_builder b;943nir_builder_init(&b, impl);944945nir_foreach_block (block, impl) {946nir_foreach_instr (instr, block) {947if (instr->type == nir_instr_type_tex) {948nir_tex_instr *tex = nir_instr_as_tex(instr);949tex->texture_index =950iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,951tex->texture_index);952continue;953}954955if (instr->type != nir_instr_type_intrinsic)956continue;957958nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);959switch (intrin->intrinsic) {960case nir_intrinsic_image_size:961case nir_intrinsic_image_load:962case nir_intrinsic_image_store:963case nir_intrinsic_image_atomic_add:964case nir_intrinsic_image_atomic_imin:965case nir_intrinsic_image_atomic_umin:966case nir_intrinsic_image_atomic_imax:967case nir_intrinsic_image_atomic_umax:968case nir_intrinsic_image_atomic_and:969case nir_intrinsic_image_atomic_or:970case nir_intrinsic_image_atomic_xor:971case nir_intrinsic_image_atomic_exchange:972case nir_intrinsic_image_atomic_comp_swap:973case nir_intrinsic_image_load_raw_intel:974case nir_intrinsic_image_store_raw_intel:975rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],976IRIS_SURFACE_GROUP_IMAGE);977break;978979case nir_intrinsic_load_ubo:980rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],981IRIS_SURFACE_GROUP_UBO);982break;983984case nir_intrinsic_store_ssbo:985rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],986IRIS_SURFACE_GROUP_SSBO);987break;988989case nir_intrinsic_load_output:990if (devinfo->ver == 8) {991rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],992IRIS_SURFACE_GROUP_RENDER_TARGET_READ);993}994break;995996case nir_intrinsic_get_ssbo_size:997case nir_intrinsic_ssbo_atomic_add:998case nir_intrinsic_ssbo_atomic_imin:999case nir_intrinsic_ssbo_atomic_umin:1000case nir_intrinsic_ssbo_atomic_imax:1001case nir_intrinsic_ssbo_atomic_umax:1002case nir_intrinsic_ssbo_atomic_and:1003case nir_intrinsic_ssbo_atomic_or:1004case nir_intrinsic_ssbo_atomic_xor:1005case nir_intrinsic_ssbo_atomic_exchange:1006case nir_intrinsic_ssbo_atomic_comp_swap:1007case nir_intrinsic_ssbo_atomic_fmin:1008case nir_intrinsic_ssbo_atomic_fmax:1009case nir_intrinsic_ssbo_atomic_fcomp_swap:1010case nir_intrinsic_load_ssbo:1011rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],1012IRIS_SURFACE_GROUP_SSBO);1013break;10141015default:1016break;1017}1018}1019}1020}10211022static void1023iris_debug_recompile(struct iris_screen *screen,1024struct pipe_debug_callback *dbg,1025struct iris_uncompiled_shader *ish,1026const struct brw_base_prog_key *key)1027{1028if (!ish || list_is_empty(&ish->variants)1029|| list_is_singular(&ish->variants))1030return;10311032const struct intel_device_info *devinfo = &screen->devinfo;1033const struct brw_compiler *c = screen->compiler;1034const struct shader_info *info = &ish->nir->info;10351036c->shader_perf_log(dbg, "Recompiling %s shader for program %s: %s\n",1037_mesa_shader_stage_to_string(info->stage),1038info->name ? info->name : "(no identifier)",1039info->label ? info->label : "");10401041struct iris_compiled_shader *shader =1042list_first_entry(&ish->variants, struct iris_compiled_shader, link);1043const void *old_iris_key = &shader->key;10441045union brw_any_prog_key old_key;10461047switch (info->stage) {1048case MESA_SHADER_VERTEX:1049old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);1050break;1051case MESA_SHADER_TESS_CTRL:1052old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);1053break;1054case MESA_SHADER_TESS_EVAL:1055old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);1056break;1057case MESA_SHADER_GEOMETRY:1058old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);1059break;1060case MESA_SHADER_FRAGMENT:1061old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);1062break;1063case MESA_SHADER_COMPUTE:1064old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);1065break;1066default:1067unreachable("invalid shader stage");1068}10691070brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);1071}10721073static void1074check_urb_size(struct iris_context *ice,1075unsigned needed_size,1076gl_shader_stage stage)1077{1078unsigned last_allocated_size = ice->shaders.urb.size[stage];10791080/* If the last URB allocation wasn't large enough for our needs,1081* flag it as needing to be reconfigured. Otherwise, we can use1082* the existing config. However, if the URB is constrained, and1083* we can shrink our size for this stage, we may be able to gain1084* extra concurrency by reconfiguring it to be smaller. Do so.1085*/1086if (last_allocated_size < needed_size ||1087(ice->shaders.urb.constrained && last_allocated_size > needed_size)) {1088ice->state.dirty |= IRIS_DIRTY_URB;1089}1090}10911092/**1093* Get the shader for the last enabled geometry stage.1094*1095* This stage is the one which will feed stream output and the rasterizer.1096*/1097static gl_shader_stage1098last_vue_stage(struct iris_context *ice)1099{1100if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])1101return MESA_SHADER_GEOMETRY;11021103if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])1104return MESA_SHADER_TESS_EVAL;11051106return MESA_SHADER_VERTEX;1107}11081109static inline struct iris_compiled_shader *1110find_variant(const struct iris_screen *screen,1111struct iris_uncompiled_shader *ish,1112const void *key, unsigned key_size)1113{1114struct list_head *start = ish->variants.next;11151116if (screen->precompile) {1117/* Check the first list entry. There will always be at least one1118* variant in the list (most likely the precompile variant), and1119* other contexts only append new variants, so we can safely check1120* it without locking, saving that cost in the common case.1121*/1122struct iris_compiled_shader *first =1123list_first_entry(&ish->variants, struct iris_compiled_shader, link);11241125if (memcmp(&first->key, key, key_size) == 0)1126return first;11271128/* Skip this one in the loop below */1129start = first->link.next;1130}11311132struct iris_compiled_shader *variant = NULL;11331134/* If it doesn't match, we have to walk the list; other contexts may be1135* concurrently appending shaders to it, so we need to lock here.1136*/1137simple_mtx_lock(&ish->lock);11381139list_for_each_entry_from(struct iris_compiled_shader, v, start,1140&ish->variants, link) {1141if (memcmp(&v->key, key, key_size) == 0) {1142variant = v;1143break;1144}1145}11461147simple_mtx_unlock(&ish->lock);11481149return variant;1150}11511152/**1153* Compile a vertex shader, and upload the assembly.1154*/1155static struct iris_compiled_shader *1156iris_compile_vs(struct iris_screen *screen,1157struct u_upload_mgr *uploader,1158struct pipe_debug_callback *dbg,1159struct iris_uncompiled_shader *ish,1160const struct iris_vs_prog_key *key)1161{1162const struct brw_compiler *compiler = screen->compiler;1163const struct intel_device_info *devinfo = &screen->devinfo;1164void *mem_ctx = ralloc_context(NULL);1165struct brw_vs_prog_data *vs_prog_data =1166rzalloc(mem_ctx, struct brw_vs_prog_data);1167struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;1168struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1169enum brw_param_builtin *system_values;1170unsigned num_system_values;1171unsigned num_cbufs;11721173nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);11741175if (key->vue.nr_userclip_plane_consts) {1176nir_function_impl *impl = nir_shader_get_entrypoint(nir);1177nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,1178true, false, NULL);1179nir_lower_io_to_temporaries(nir, impl, true, false);1180nir_lower_global_vars_to_local(nir);1181nir_lower_vars_to_ssa(nir);1182nir_shader_gather_info(nir, impl);1183}11841185prog_data->use_alt_mode = ish->use_alt_mode;11861187iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,1188&num_system_values, &num_cbufs);11891190struct iris_binding_table bt;1191iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1192num_system_values, num_cbufs);11931194brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);11951196brw_compute_vue_map(devinfo,1197&vue_prog_data->vue_map, nir->info.outputs_written,1198nir->info.separate_shader, /* pos_slots */ 1);11991200struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);12011202struct brw_compile_vs_params params = {1203.nir = nir,1204.key = &brw_key,1205.prog_data = vs_prog_data,1206.log_data = dbg,1207};12081209const unsigned *program = brw_compile_vs(compiler, mem_ctx, ¶ms);1210if (program == NULL) {1211dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);1212ralloc_free(mem_ctx);1213return false;1214}12151216iris_debug_recompile(screen, dbg, ish, &brw_key.base);12171218uint32_t *so_decls =1219screen->vtbl.create_so_decl_list(&ish->stream_output,1220&vue_prog_data->vue_map);12211222struct iris_compiled_shader *shader =1223iris_upload_shader(screen, ish, NULL, uploader,1224IRIS_CACHE_VS, sizeof(*key), key, program,1225prog_data, so_decls, system_values, num_system_values,12260, num_cbufs, &bt);12271228iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));12291230ralloc_free(mem_ctx);1231return shader;1232}12331234/**1235* Update the current vertex shader variant.1236*1237* Fill out the key, look in the cache, compile and bind if needed.1238*/1239static void1240iris_update_compiled_vs(struct iris_context *ice)1241{1242struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;1243struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];1244struct u_upload_mgr *uploader = ice->shaders.uploader_driver;1245struct iris_uncompiled_shader *ish =1246ice->shaders.uncompiled[MESA_SHADER_VERTEX];12471248struct iris_vs_prog_key key = { KEY_ID(vue.base) };1249screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);12501251struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];1252struct iris_compiled_shader *shader =1253find_variant(screen, ish, &key, sizeof(key));12541255if (!shader) {1256shader = iris_disk_cache_retrieve(screen, uploader, ish,1257&key, sizeof(key));1258}12591260if (!shader)1261shader = iris_compile_vs(screen, uploader, &ice->dbg, ish, &key);12621263if (old != shader) {1264iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],1265shader);1266ice->state.dirty |= IRIS_DIRTY_VF_SGVS;1267ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |1268IRIS_STAGE_DIRTY_BINDINGS_VS |1269IRIS_STAGE_DIRTY_CONSTANTS_VS;1270shs->sysvals_need_upload = true;12711272const struct brw_vue_prog_data *vue_prog_data =1273(void *) shader->prog_data;1274check_urb_size(ice, vue_prog_data->urb_entry_size, MESA_SHADER_VERTEX);1275}1276}12771278/**1279* Get the shader_info for a given stage, or NULL if the stage is disabled.1280*/1281const struct shader_info *1282iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)1283{1284const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];12851286if (!ish)1287return NULL;12881289const nir_shader *nir = ish->nir;1290return &nir->info;1291}12921293/**1294* Get the union of TCS output and TES input slots.1295*1296* TCS and TES need to agree on a common URB entry layout. In particular,1297* the data for all patch vertices is stored in a single URB entry (unlike1298* GS which has one entry per input vertex). This means that per-vertex1299* array indexing needs a stride.1300*1301* SSO requires locations to match, but doesn't require the number of1302* outputs/inputs to match (in fact, the TCS often has extra outputs).1303* So, we need to take the extra step of unifying these on the fly.1304*/1305static void1306get_unified_tess_slots(const struct iris_context *ice,1307uint64_t *per_vertex_slots,1308uint32_t *per_patch_slots)1309{1310const struct shader_info *tcs =1311iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);1312const struct shader_info *tes =1313iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);13141315*per_vertex_slots = tes->inputs_read;1316*per_patch_slots = tes->patch_inputs_read;13171318if (tcs) {1319*per_vertex_slots |= tcs->outputs_written;1320*per_patch_slots |= tcs->patch_outputs_written;1321}1322}13231324/**1325* Compile a tessellation control shader, and upload the assembly.1326*/1327static struct iris_compiled_shader *1328iris_compile_tcs(struct iris_screen *screen,1329struct hash_table *passthrough_ht,1330struct u_upload_mgr *uploader,1331struct pipe_debug_callback *dbg,1332struct iris_uncompiled_shader *ish,1333const struct iris_tcs_prog_key *key)1334{1335const struct brw_compiler *compiler = screen->compiler;1336const struct nir_shader_compiler_options *options =1337compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;1338void *mem_ctx = ralloc_context(NULL);1339struct brw_tcs_prog_data *tcs_prog_data =1340rzalloc(mem_ctx, struct brw_tcs_prog_data);1341struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;1342struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1343const struct intel_device_info *devinfo = &screen->devinfo;1344enum brw_param_builtin *system_values = NULL;1345unsigned num_system_values = 0;1346unsigned num_cbufs = 0;13471348nir_shader *nir;13491350struct iris_binding_table bt;13511352struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);13531354if (ish) {1355nir = nir_shader_clone(mem_ctx, ish->nir);13561357iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,1358&num_system_values, &num_cbufs);1359iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1360num_system_values, num_cbufs);1361brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);1362} else {1363nir =1364brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);13651366/* Reserve space for passing the default tess levels as constants. */1367num_cbufs = 1;1368num_system_values = 8;1369system_values =1370rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);1371prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);1372prog_data->nr_params = num_system_values;13731374if (key->tes_primitive_mode == GL_QUADS) {1375for (int i = 0; i < 4; i++)1376system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;13771378system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;1379system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;1380} else if (key->tes_primitive_mode == GL_TRIANGLES) {1381for (int i = 0; i < 3; i++)1382system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;13831384system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;1385} else {1386assert(key->tes_primitive_mode == GL_ISOLINES);1387system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;1388system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;1389}13901391/* Manually setup the TCS binding table. */1392memset(&bt, 0, sizeof(bt));1393bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;1394bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;1395bt.size_bytes = 4;13961397prog_data->ubo_ranges[0].length = 1;1398}13991400char *error_str = NULL;1401const unsigned *program =1402brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data,1403nir, -1, NULL, &error_str);1404if (program == NULL) {1405dbg_printf("Failed to compile control shader: %s\n", error_str);1406ralloc_free(mem_ctx);1407return false;1408}14091410iris_debug_recompile(screen, dbg, ish, &brw_key.base);14111412struct iris_compiled_shader *shader =1413iris_upload_shader(screen, ish, passthrough_ht, uploader,1414IRIS_CACHE_TCS, sizeof(*key), key, program,1415prog_data, NULL, system_values, num_system_values,14160, num_cbufs, &bt);14171418if (ish)1419iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));14201421ralloc_free(mem_ctx);1422return shader;1423}14241425/**1426* Update the current tessellation control shader variant.1427*1428* Fill out the key, look in the cache, compile and bind if needed.1429*/1430static void1431iris_update_compiled_tcs(struct iris_context *ice)1432{1433struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];1434struct iris_uncompiled_shader *tcs =1435ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];1436struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;1437struct u_upload_mgr *uploader = ice->shaders.uploader_driver;1438const struct brw_compiler *compiler = screen->compiler;1439const struct intel_device_info *devinfo = &screen->devinfo;14401441const struct shader_info *tes_info =1442iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);1443struct iris_tcs_prog_key key = {1444.vue.base.program_string_id = tcs ? tcs->program_id : 0,1445.tes_primitive_mode = tes_info->tess.primitive_mode,1446.input_vertices =1447!tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,1448.quads_workaround = devinfo->ver < 9 &&1449tes_info->tess.primitive_mode == GL_QUADS &&1450tes_info->tess.spacing == TESS_SPACING_EQUAL,1451};1452get_unified_tess_slots(ice, &key.outputs_written,1453&key.patch_outputs_written);1454screen->vtbl.populate_tcs_key(ice, &key);14551456struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];1457struct iris_compiled_shader *shader =1458tcs ? find_variant(screen, tcs, &key, sizeof(key)) :1459iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);14601461if (tcs && !shader) {1462shader = iris_disk_cache_retrieve(screen, uploader, tcs,1463&key, sizeof(key));1464}14651466if (!shader) {1467shader = iris_compile_tcs(screen, ice->shaders.cache,1468uploader, &ice->dbg, tcs, &key);1469}14701471if (old != shader) {1472iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],1473shader);1474ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |1475IRIS_STAGE_DIRTY_BINDINGS_TCS |1476IRIS_STAGE_DIRTY_CONSTANTS_TCS;1477shs->sysvals_need_upload = true;14781479const struct brw_vue_prog_data *prog_data = (void *) shader->prog_data;1480check_urb_size(ice, prog_data->urb_entry_size, MESA_SHADER_TESS_CTRL);1481}1482}14831484/**1485* Compile a tessellation evaluation shader, and upload the assembly.1486*/1487static struct iris_compiled_shader *1488iris_compile_tes(struct iris_screen *screen,1489struct u_upload_mgr *uploader,1490struct pipe_debug_callback *dbg,1491struct iris_uncompiled_shader *ish,1492const struct iris_tes_prog_key *key)1493{1494const struct brw_compiler *compiler = screen->compiler;1495void *mem_ctx = ralloc_context(NULL);1496struct brw_tes_prog_data *tes_prog_data =1497rzalloc(mem_ctx, struct brw_tes_prog_data);1498struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;1499struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1500enum brw_param_builtin *system_values;1501const struct intel_device_info *devinfo = &screen->devinfo;1502unsigned num_system_values;1503unsigned num_cbufs;15041505nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);15061507if (key->vue.nr_userclip_plane_consts) {1508nir_function_impl *impl = nir_shader_get_entrypoint(nir);1509nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,1510true, false, NULL);1511nir_lower_io_to_temporaries(nir, impl, true, false);1512nir_lower_global_vars_to_local(nir);1513nir_lower_vars_to_ssa(nir);1514nir_shader_gather_info(nir, impl);1515}15161517iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,1518&num_system_values, &num_cbufs);15191520struct iris_binding_table bt;1521iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1522num_system_values, num_cbufs);15231524brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);15251526struct brw_vue_map input_vue_map;1527brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,1528key->patch_inputs_read);15291530struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);15311532char *error_str = NULL;1533const unsigned *program =1534brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map,1535tes_prog_data, nir, -1, NULL, &error_str);1536if (program == NULL) {1537dbg_printf("Failed to compile evaluation shader: %s\n", error_str);1538ralloc_free(mem_ctx);1539return false;1540}15411542iris_debug_recompile(screen, dbg, ish, &brw_key.base);15431544uint32_t *so_decls =1545screen->vtbl.create_so_decl_list(&ish->stream_output,1546&vue_prog_data->vue_map);154715481549struct iris_compiled_shader *shader =1550iris_upload_shader(screen, ish, NULL, uploader,1551IRIS_CACHE_TES, sizeof(*key), key, program,1552prog_data, so_decls, system_values, num_system_values,15530, num_cbufs, &bt);15541555iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));15561557ralloc_free(mem_ctx);1558return shader;1559}15601561/**1562* Update the current tessellation evaluation shader variant.1563*1564* Fill out the key, look in the cache, compile and bind if needed.1565*/1566static void1567iris_update_compiled_tes(struct iris_context *ice)1568{1569struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;1570struct u_upload_mgr *uploader = ice->shaders.uploader_driver;1571struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];1572struct iris_uncompiled_shader *ish =1573ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];15741575struct iris_tes_prog_key key = { KEY_ID(vue.base) };1576get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);1577screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);15781579struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];1580struct iris_compiled_shader *shader =1581find_variant(screen, ish, &key, sizeof(key));15821583if (!shader) {1584shader = iris_disk_cache_retrieve(screen, uploader, ish,1585&key, sizeof(key));1586}15871588if (!shader)1589shader = iris_compile_tes(screen, uploader, &ice->dbg, ish, &key);15901591if (old != shader) {1592iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],1593shader);1594ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |1595IRIS_STAGE_DIRTY_BINDINGS_TES |1596IRIS_STAGE_DIRTY_CONSTANTS_TES;1597shs->sysvals_need_upload = true;15981599const struct brw_vue_prog_data *prog_data = (void *) shader->prog_data;1600check_urb_size(ice, prog_data->urb_entry_size, MESA_SHADER_TESS_EVAL);1601}16021603/* TODO: Could compare and avoid flagging this. */1604const struct shader_info *tes_info = &ish->nir->info;1605if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {1606ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;1607ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;1608}1609}16101611/**1612* Compile a geometry shader, and upload the assembly.1613*/1614static struct iris_compiled_shader *1615iris_compile_gs(struct iris_screen *screen,1616struct u_upload_mgr *uploader,1617struct pipe_debug_callback *dbg,1618struct iris_uncompiled_shader *ish,1619const struct iris_gs_prog_key *key)1620{1621const struct brw_compiler *compiler = screen->compiler;1622const struct intel_device_info *devinfo = &screen->devinfo;1623void *mem_ctx = ralloc_context(NULL);1624struct brw_gs_prog_data *gs_prog_data =1625rzalloc(mem_ctx, struct brw_gs_prog_data);1626struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;1627struct brw_stage_prog_data *prog_data = &vue_prog_data->base;1628enum brw_param_builtin *system_values;1629unsigned num_system_values;1630unsigned num_cbufs;16311632nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);16331634if (key->vue.nr_userclip_plane_consts) {1635nir_function_impl *impl = nir_shader_get_entrypoint(nir);1636nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,1637false, NULL);1638nir_lower_io_to_temporaries(nir, impl, true, false);1639nir_lower_global_vars_to_local(nir);1640nir_lower_vars_to_ssa(nir);1641nir_shader_gather_info(nir, impl);1642}16431644iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,1645&num_system_values, &num_cbufs);16461647struct iris_binding_table bt;1648iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,1649num_system_values, num_cbufs);16501651brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);16521653brw_compute_vue_map(devinfo,1654&vue_prog_data->vue_map, nir->info.outputs_written,1655nir->info.separate_shader, /* pos_slots */ 1);16561657struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);16581659char *error_str = NULL;1660const unsigned *program =1661brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data,1662nir, -1, NULL, &error_str);1663if (program == NULL) {1664dbg_printf("Failed to compile geometry shader: %s\n", error_str);1665ralloc_free(mem_ctx);1666return false;1667}16681669iris_debug_recompile(screen, dbg, ish, &brw_key.base);16701671uint32_t *so_decls =1672screen->vtbl.create_so_decl_list(&ish->stream_output,1673&vue_prog_data->vue_map);16741675struct iris_compiled_shader *shader =1676iris_upload_shader(screen, ish, NULL, uploader,1677IRIS_CACHE_GS, sizeof(*key), key, program,1678prog_data, so_decls, system_values, num_system_values,16790, num_cbufs, &bt);16801681iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));16821683ralloc_free(mem_ctx);1684return shader;1685}16861687/**1688* Update the current geometry shader variant.1689*1690* Fill out the key, look in the cache, compile and bind if needed.1691*/1692static void1693iris_update_compiled_gs(struct iris_context *ice)1694{1695struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];1696struct u_upload_mgr *uploader = ice->shaders.uploader_driver;1697struct iris_uncompiled_shader *ish =1698ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];1699struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];1700struct iris_compiled_shader *shader = NULL;1701struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;17021703if (ish) {1704struct iris_gs_prog_key key = { KEY_ID(vue.base) };1705screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);17061707shader = find_variant(screen, ish, &key, sizeof(key));17081709if (!shader) {1710shader = iris_disk_cache_retrieve(screen, uploader, ish,1711&key, sizeof(key));1712}17131714if (!shader)1715shader = iris_compile_gs(screen, uploader, &ice->dbg, ish, &key);1716}17171718if (old != shader) {1719iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],1720shader);1721ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |1722IRIS_STAGE_DIRTY_BINDINGS_GS |1723IRIS_STAGE_DIRTY_CONSTANTS_GS;1724shs->sysvals_need_upload = true;17251726unsigned urb_entry_size = shader ?1727((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;1728check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);1729}1730}17311732/**1733* Compile a fragment (pixel) shader, and upload the assembly.1734*/1735static struct iris_compiled_shader *1736iris_compile_fs(struct iris_screen *screen,1737struct u_upload_mgr *uploader,1738struct pipe_debug_callback *dbg,1739struct iris_uncompiled_shader *ish,1740const struct iris_fs_prog_key *key,1741struct brw_vue_map *vue_map)1742{1743const struct brw_compiler *compiler = screen->compiler;1744void *mem_ctx = ralloc_context(NULL);1745struct brw_wm_prog_data *fs_prog_data =1746rzalloc(mem_ctx, struct brw_wm_prog_data);1747struct brw_stage_prog_data *prog_data = &fs_prog_data->base;1748enum brw_param_builtin *system_values;1749const struct intel_device_info *devinfo = &screen->devinfo;1750unsigned num_system_values;1751unsigned num_cbufs;17521753nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);17541755prog_data->use_alt_mode = ish->use_alt_mode;17561757iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,1758&num_system_values, &num_cbufs);17591760/* Lower output variables to load_output intrinsics before setting up1761* binding tables, so iris_setup_binding_table can map any load_output1762* intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for1763* non-coherent framebuffer fetches.1764*/1765brw_nir_lower_fs_outputs(nir);17661767/* On Gfx11+, shader RT write messages have a "Null Render Target" bit1768* and do not need a binding table entry with a null surface. Earlier1769* generations need an entry for a null surface.1770*/1771int null_rts = devinfo->ver < 11 ? 1 : 0;17721773struct iris_binding_table bt;1774iris_setup_binding_table(devinfo, nir, &bt,1775MAX2(key->nr_color_regions, null_rts),1776num_system_values, num_cbufs);17771778brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);17791780struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);17811782struct brw_compile_fs_params params = {1783.nir = nir,1784.key = &brw_key,1785.prog_data = fs_prog_data,17861787.allow_spilling = true,1788.vue_map = vue_map,17891790.log_data = dbg,1791};17921793const unsigned *program = brw_compile_fs(compiler, mem_ctx, ¶ms);1794if (program == NULL) {1795dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);1796ralloc_free(mem_ctx);1797return false;1798}17991800iris_debug_recompile(screen, dbg, ish, &brw_key.base);18011802struct iris_compiled_shader *shader =1803iris_upload_shader(screen, ish, NULL, uploader,1804IRIS_CACHE_FS, sizeof(*key), key, program,1805prog_data, NULL, system_values, num_system_values,18060, num_cbufs, &bt);18071808iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));18091810ralloc_free(mem_ctx);1811return shader;1812}18131814/**1815* Update the current fragment shader variant.1816*1817* Fill out the key, look in the cache, compile and bind if needed.1818*/1819static void1820iris_update_compiled_fs(struct iris_context *ice)1821{1822struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];1823struct u_upload_mgr *uploader = ice->shaders.uploader_driver;1824struct iris_uncompiled_shader *ish =1825ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];1826struct iris_fs_prog_key key = { KEY_ID(base) };1827struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;1828screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);18291830struct brw_vue_map *last_vue_map =1831&brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;18321833if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))1834key.input_slots_valid = last_vue_map->slots_valid;18351836struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];1837struct iris_compiled_shader *shader =1838find_variant(screen, ish, &key, sizeof(key));18391840if (!shader) {1841shader = iris_disk_cache_retrieve(screen, uploader, ish,1842&key, sizeof(key));1843}18441845if (!shader) {1846shader = iris_compile_fs(screen, uploader, &ice->dbg,1847ish, &key, last_vue_map);1848}18491850if (old != shader) {1851// XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE1852// toggles. might be able to avoid flagging SBE too.1853iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],1854shader);1855ice->state.dirty |= IRIS_DIRTY_WM |1856IRIS_DIRTY_CLIP |1857IRIS_DIRTY_SBE;1858ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |1859IRIS_STAGE_DIRTY_BINDINGS_FS |1860IRIS_STAGE_DIRTY_CONSTANTS_FS;1861shs->sysvals_need_upload = true;1862}1863}18641865/**1866* Update the last enabled stage's VUE map.1867*1868* When the shader feeding the rasterizer's output interface changes, we1869* need to re-emit various packets.1870*/1871static void1872update_last_vue_map(struct iris_context *ice,1873struct iris_compiled_shader *shader)1874{1875struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;1876struct brw_vue_map *vue_map = &vue_prog_data->vue_map;1877struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :1878&brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;1879const uint64_t changed_slots =1880(old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;18811882if (changed_slots & VARYING_BIT_VIEWPORT) {1883ice->state.num_viewports =1884(vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;1885ice->state.dirty |= IRIS_DIRTY_CLIP |1886IRIS_DIRTY_SF_CL_VIEWPORT |1887IRIS_DIRTY_CC_VIEWPORT |1888IRIS_DIRTY_SCISSOR_RECT;1889ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |1890ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];1891}18921893if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {1894ice->state.dirty |= IRIS_DIRTY_SBE;1895}18961897iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);1898}18991900static void1901iris_update_pull_constant_descriptors(struct iris_context *ice,1902gl_shader_stage stage)1903{1904struct iris_compiled_shader *shader = ice->shaders.prog[stage];19051906if (!shader || !shader->prog_data->has_ubo_pull)1907return;19081909struct iris_shader_state *shs = &ice->state.shaders[stage];1910bool any_new_descriptors =1911shader->num_system_values > 0 && shs->sysvals_need_upload;19121913unsigned bound_cbufs = shs->bound_cbufs;19141915while (bound_cbufs) {1916const int i = u_bit_scan(&bound_cbufs);1917struct pipe_shader_buffer *cbuf = &shs->constbuf[i];1918struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];1919if (!surf_state->res && cbuf->buffer) {1920iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,1921ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);1922any_new_descriptors = true;1923}1924}19251926if (any_new_descriptors)1927ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;1928}19291930/**1931* Update the current shader variants for the given state.1932*1933* This should be called on every draw call to ensure that the correct1934* shaders are bound. It will also flag any dirty state triggered by1935* swapping out those shaders.1936*/1937void1938iris_update_compiled_shaders(struct iris_context *ice)1939{1940const uint64_t stage_dirty = ice->state.stage_dirty;19411942if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |1943IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {1944struct iris_uncompiled_shader *tes =1945ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];1946if (tes) {1947iris_update_compiled_tcs(ice);1948iris_update_compiled_tes(ice);1949} else {1950iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);1951iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);1952ice->state.stage_dirty |=1953IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |1954IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |1955IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;19561957if (ice->shaders.urb.constrained)1958ice->state.dirty |= IRIS_DIRTY_URB;1959}1960}19611962if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)1963iris_update_compiled_vs(ice);1964if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)1965iris_update_compiled_gs(ice);19661967if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |1968IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {1969const struct iris_compiled_shader *gs =1970ice->shaders.prog[MESA_SHADER_GEOMETRY];1971const struct iris_compiled_shader *tes =1972ice->shaders.prog[MESA_SHADER_TESS_EVAL];19731974bool points_or_lines = false;19751976if (gs) {1977const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;1978points_or_lines =1979gs_prog_data->output_topology == _3DPRIM_POINTLIST ||1980gs_prog_data->output_topology == _3DPRIM_LINESTRIP;1981} else if (tes) {1982const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;1983points_or_lines =1984tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||1985tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;1986}19871988if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {1989/* Outbound to XY Clip enables */1990ice->shaders.output_topology_is_points_or_lines = points_or_lines;1991ice->state.dirty |= IRIS_DIRTY_CLIP;1992}1993}19941995gl_shader_stage last_stage = last_vue_stage(ice);1996struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];1997struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];1998update_last_vue_map(ice, shader);1999if (ice->state.streamout != shader->streamout) {2000ice->state.streamout = shader->streamout;2001ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;2002}20032004if (ice->state.streamout_active) {2005for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {2006struct iris_stream_output_target *so =2007(void *) ice->state.so_target[i];2008if (so)2009so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);2010}2011}20122013if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)2014iris_update_compiled_fs(ice);20152016for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {2017if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))2018iris_update_pull_constant_descriptors(ice, i);2019}2020}20212022static struct iris_compiled_shader *2023iris_compile_cs(struct iris_screen *screen,2024struct u_upload_mgr *uploader,2025struct pipe_debug_callback *dbg,2026struct iris_uncompiled_shader *ish,2027const struct iris_cs_prog_key *key)2028{2029const struct brw_compiler *compiler = screen->compiler;2030void *mem_ctx = ralloc_context(NULL);2031struct brw_cs_prog_data *cs_prog_data =2032rzalloc(mem_ctx, struct brw_cs_prog_data);2033struct brw_stage_prog_data *prog_data = &cs_prog_data->base;2034enum brw_param_builtin *system_values;2035const struct intel_device_info *devinfo = &screen->devinfo;2036unsigned num_system_values;2037unsigned num_cbufs;20382039nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);20402041NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);20422043iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,2044ish->kernel_input_size,2045&system_values, &num_system_values, &num_cbufs);20462047struct iris_binding_table bt;2048iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,2049num_system_values, num_cbufs);20502051struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);20522053struct brw_compile_cs_params params = {2054.nir = nir,2055.key = &brw_key,2056.prog_data = cs_prog_data,2057.log_data = dbg,2058};20592060const unsigned *program = brw_compile_cs(compiler, mem_ctx, ¶ms);2061if (program == NULL) {2062dbg_printf("Failed to compile compute shader: %s\n", params.error_str);2063ralloc_free(mem_ctx);2064return false;2065}20662067iris_debug_recompile(screen, dbg, ish, &brw_key.base);20682069struct iris_compiled_shader *shader =2070iris_upload_shader(screen, ish, NULL, uploader,2071IRIS_CACHE_CS, sizeof(*key), key, program,2072prog_data, NULL, system_values, num_system_values,2073ish->kernel_input_size, num_cbufs, &bt);20742075iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));20762077ralloc_free(mem_ctx);2078return shader;2079}20802081static void2082iris_update_compiled_cs(struct iris_context *ice)2083{2084struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];2085struct u_upload_mgr *uploader = ice->shaders.uploader_driver;2086struct iris_uncompiled_shader *ish =2087ice->shaders.uncompiled[MESA_SHADER_COMPUTE];20882089struct iris_cs_prog_key key = { KEY_ID(base) };2090struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;2091screen->vtbl.populate_cs_key(ice, &key);20922093struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];2094struct iris_compiled_shader *shader =2095find_variant(screen, ish, &key, sizeof(key));20962097if (!shader) {2098shader = iris_disk_cache_retrieve(screen, uploader, ish,2099&key, sizeof(key));2100}21012102if (!shader)2103shader = iris_compile_cs(screen, uploader, &ice->dbg, ish, &key);21042105if (old != shader) {2106iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],2107shader);2108ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |2109IRIS_STAGE_DIRTY_BINDINGS_CS |2110IRIS_STAGE_DIRTY_CONSTANTS_CS;2111shs->sysvals_need_upload = true;2112}2113}21142115void2116iris_update_compiled_compute_shader(struct iris_context *ice)2117{2118if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)2119iris_update_compiled_cs(ice);21202121if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)2122iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);2123}21242125void2126iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,2127unsigned threads,2128uint32_t *dst)2129{2130assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);2131assert(cs_prog_data->push.cross_thread.size == 0);2132assert(cs_prog_data->push.per_thread.dwords == 1);2133assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);2134for (unsigned t = 0; t < threads; t++)2135dst[8 * t] = t;2136}21372138/**2139* Allocate scratch BOs as needed for the given per-thread size and stage.2140*/2141struct iris_bo *2142iris_get_scratch_space(struct iris_context *ice,2143unsigned per_thread_scratch,2144gl_shader_stage stage)2145{2146struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;2147struct iris_bufmgr *bufmgr = screen->bufmgr;2148const struct intel_device_info *devinfo = &screen->devinfo;21492150unsigned encoded_size = ffs(per_thread_scratch) - 11;2151assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));2152assert(per_thread_scratch == 1 << (encoded_size + 10));21532154/* On GFX version 12.5, scratch access changed to a surface-based model.2155* Instead of each shader type having its own layout based on IDs passed2156* from the relevant fixed-function unit, all scratch access is based on2157* thread IDs like it always has been for compute.2158*/2159if (devinfo->verx10 >= 125)2160stage = MESA_SHADER_COMPUTE;21612162struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];21632164/* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:2165*2166* "Scratch Space per slice is computed based on 4 sub-slices. SW2167* must allocate scratch space enough so that each slice has 42168* slices allowed."2169*2170* According to the other driver team, this applies to compute shaders2171* as well. This is not currently documented at all.2172*2173* This hack is no longer necessary on Gfx11+.2174*2175* For, Gfx11+, scratch space allocation is based on the number of threads2176* in the base configuration.2177*/2178unsigned subslice_total = screen->subslice_total;2179if (devinfo->verx10 == 125)2180subslice_total = 32;2181else if (devinfo->ver == 12)2182subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);2183else if (devinfo->ver == 11)2184subslice_total = 8;2185else if (devinfo->ver < 11)2186subslice_total = 4 * devinfo->num_slices;2187assert(subslice_total >= screen->subslice_total);21882189if (!*bop) {2190unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;21912192if (devinfo->ver >= 12) {2193/* Same as ICL below, but with 16 EUs. */2194scratch_ids_per_subslice = 16 * 8;2195} else if (devinfo->ver == 11) {2196/* The MEDIA_VFE_STATE docs say:2197*2198* "Starting with this configuration, the Maximum Number of2199* Threads must be set to (#EU * 8) for GPGPU dispatches.2200*2201* Although there are only 7 threads per EU in the configuration,2202* the FFTID is calculated as if there are 8 threads per EU,2203* which in turn requires a larger amount of Scratch Space to be2204* allocated by the driver."2205*/2206scratch_ids_per_subslice = 8 * 8;2207}22082209uint32_t max_threads[] = {2210[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,2211[MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,2212[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,2213[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,2214[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,2215[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,2216};22172218uint32_t size = per_thread_scratch * max_threads[stage];22192220*bop = iris_bo_alloc(bufmgr, "scratch", size, 1, IRIS_MEMZONE_SHADER, 0);2221}22222223return *bop;2224}22252226const struct iris_state_ref *2227iris_get_scratch_surf(struct iris_context *ice,2228unsigned per_thread_scratch)2229{2230struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;2231ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;22322233assert(devinfo->verx10 >= 125);22342235unsigned encoded_size = ffs(per_thread_scratch) - 11;2236assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));2237assert(per_thread_scratch == 1 << (encoded_size + 10));22382239struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];22402241if (ref->res)2242return ref;22432244struct iris_bo *scratch_bo =2245iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);22462247void *map = upload_state(ice->state.bindless_uploader, ref,2248screen->isl_dev.ss.size, 64);22492250isl_buffer_fill_state(&screen->isl_dev, map,2251.address = scratch_bo->gtt_offset,2252.size_B = scratch_bo->size,2253.format = ISL_FORMAT_RAW,2254.swizzle = ISL_SWIZZLE_IDENTITY,2255.mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),2256.stride_B = per_thread_scratch,2257.is_scratch = true);22582259return ref;2260}22612262/* ------------------------------------------------------------------- */22632264/**2265* The pipe->create_[stage]_state() driver hooks.2266*2267* Performs basic NIR preprocessing, records any state dependencies, and2268* returns an iris_uncompiled_shader as the Gallium CSO.2269*2270* Actual shader compilation to assembly happens later, at first use.2271*/2272static void *2273iris_create_uncompiled_shader(struct iris_screen *screen,2274nir_shader *nir,2275const struct pipe_stream_output_info *so_info)2276{2277const struct intel_device_info *devinfo = &screen->devinfo;22782279struct iris_uncompiled_shader *ish =2280calloc(1, sizeof(struct iris_uncompiled_shader));2281if (!ish)2282return NULL;22832284list_inithead(&ish->variants);2285simple_mtx_init(&ish->lock, mtx_plain);22862287NIR_PASS(ish->needs_edge_flag, nir, iris_fix_edge_flags);22882289brw_preprocess_nir(screen->compiler, nir, NULL);22902291NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo,2292&ish->uses_atomic_load_store);2293NIR_PASS_V(nir, iris_lower_storage_image_derefs);22942295nir_sweep(nir);22962297ish->program_id = get_new_program_id(screen);2298ish->nir = nir;2299if (so_info) {2300memcpy(&ish->stream_output, so_info, sizeof(*so_info));2301update_so_info(&ish->stream_output, nir->info.outputs_written);2302}23032304/* Save this now before potentially dropping nir->info.name */2305if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)2306ish->use_alt_mode = true;23072308if (screen->disk_cache) {2309/* Serialize the NIR to a binary blob that we can hash for the disk2310* cache. Drop unnecessary information (like variable names)2311* so the serialized NIR is smaller, and also to let us detect more2312* isomorphic shaders when hashing, increasing cache hits.2313*/2314struct blob blob;2315blob_init(&blob);2316nir_serialize(&blob, nir, true);2317_mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);2318blob_finish(&blob);2319}23202321return ish;2322}23232324static struct iris_uncompiled_shader *2325iris_create_shader_state(struct pipe_context *ctx,2326const struct pipe_shader_state *state)2327{2328struct iris_screen *screen = (void *) ctx->screen;2329struct nir_shader *nir;23302331if (state->type == PIPE_SHADER_IR_TGSI)2332nir = tgsi_to_nir(state->tokens, ctx->screen, false);2333else2334nir = state->ir.nir;23352336return iris_create_uncompiled_shader(screen, nir, &state->stream_output);2337}23382339static void *2340iris_create_vs_state(struct pipe_context *ctx,2341const struct pipe_shader_state *state)2342{2343struct iris_context *ice = (void *) ctx;2344struct iris_screen *screen = (void *) ctx->screen;2345struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2346struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);23472348/* User clip planes */2349if (ish->nir->info.clip_distance_array_size == 0)2350ish->nos |= (1ull << IRIS_NOS_RASTERIZER);23512352if (screen->precompile) {2353struct iris_vs_prog_key key = { KEY_ID(vue.base) };23542355if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2356iris_compile_vs(screen, uploader, &ice->dbg, ish, &key);2357}23582359return ish;2360}23612362static void *2363iris_create_tcs_state(struct pipe_context *ctx,2364const struct pipe_shader_state *state)2365{2366struct iris_context *ice = (void *) ctx;2367struct iris_screen *screen = (void *) ctx->screen;2368const struct brw_compiler *compiler = screen->compiler;2369struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2370struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);2371struct shader_info *info = &ish->nir->info;23722373if (screen->precompile) {2374const unsigned _GL_TRIANGLES = 0x0004;2375struct iris_tcs_prog_key key = {2376KEY_ID(vue.base),2377// XXX: make sure the linker fills this out from the TES...2378.tes_primitive_mode =2379info->tess.primitive_mode ? info->tess.primitive_mode2380: _GL_TRIANGLES,2381.outputs_written = info->outputs_written,2382.patch_outputs_written = info->patch_outputs_written,2383};23842385/* 8_PATCH mode needs the key to contain the input patch dimensionality.2386* We don't have that information, so we randomly guess that the input2387* and output patches are the same size. This is a bad guess, but we2388* can't do much better.2389*/2390if (compiler->use_tcs_8_patch)2391key.input_vertices = info->tess.tcs_vertices_out;23922393if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2394iris_compile_tcs(screen, NULL, uploader, &ice->dbg, ish, &key);2395}23962397return ish;2398}23992400static void *2401iris_create_tes_state(struct pipe_context *ctx,2402const struct pipe_shader_state *state)2403{2404struct iris_context *ice = (void *) ctx;2405struct iris_screen *screen = (void *) ctx->screen;2406struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2407struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);2408struct shader_info *info = &ish->nir->info;24092410/* User clip planes */2411if (ish->nir->info.clip_distance_array_size == 0)2412ish->nos |= (1ull << IRIS_NOS_RASTERIZER);24132414if (screen->precompile) {2415struct iris_tes_prog_key key = {2416KEY_ID(vue.base),2417// XXX: not ideal, need TCS output/TES input unification2418.inputs_read = info->inputs_read,2419.patch_inputs_read = info->patch_inputs_read,2420};24212422if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2423iris_compile_tes(screen, uploader, &ice->dbg, ish, &key);2424}24252426return ish;2427}24282429static void *2430iris_create_gs_state(struct pipe_context *ctx,2431const struct pipe_shader_state *state)2432{2433struct iris_context *ice = (void *) ctx;2434struct iris_screen *screen = (void *) ctx->screen;2435struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2436struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);24372438/* User clip planes */2439if (ish->nir->info.clip_distance_array_size == 0)2440ish->nos |= (1ull << IRIS_NOS_RASTERIZER);24412442if (screen->precompile) {2443struct iris_gs_prog_key key = { KEY_ID(vue.base) };24442445if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2446iris_compile_gs(screen, uploader, &ice->dbg, ish, &key);2447}24482449return ish;2450}24512452static void *2453iris_create_fs_state(struct pipe_context *ctx,2454const struct pipe_shader_state *state)2455{2456struct iris_context *ice = (void *) ctx;2457struct iris_screen *screen = (void *) ctx->screen;2458struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2459struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);2460struct shader_info *info = &ish->nir->info;24612462ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |2463(1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |2464(1ull << IRIS_NOS_RASTERIZER) |2465(1ull << IRIS_NOS_BLEND);24662467/* The program key needs the VUE map if there are > 16 inputs */2468if (util_bitcount64(ish->nir->info.inputs_read &2469BRW_FS_VARYING_INPUT_MASK) > 16) {2470ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);2471}24722473if (screen->precompile) {2474const uint64_t color_outputs = info->outputs_written &2475~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |2476BITFIELD64_BIT(FRAG_RESULT_STENCIL) |2477BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));24782479bool can_rearrange_varyings =2480util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;24812482const struct intel_device_info *devinfo = &screen->devinfo;2483struct iris_fs_prog_key key = {2484KEY_ID(base),2485.nr_color_regions = util_bitcount(color_outputs),2486.coherent_fb_fetch = devinfo->ver >= 9,2487.input_slots_valid =2488can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,2489};24902491if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2492iris_compile_fs(screen, uploader, &ice->dbg, ish, &key, NULL);2493}24942495return ish;2496}24972498static void *2499iris_create_compute_state(struct pipe_context *ctx,2500const struct pipe_compute_state *state)2501{2502struct iris_context *ice = (void *) ctx;2503struct iris_screen *screen = (void *) ctx->screen;2504struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;2505const nir_shader_compiler_options *options =2506screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions;25072508nir_shader *nir;2509switch (state->ir_type) {2510case PIPE_SHADER_IR_NIR:2511nir = (void *)state->prog;2512break;25132514case PIPE_SHADER_IR_NIR_SERIALIZED: {2515struct blob_reader reader;2516const struct pipe_binary_program_header *hdr = state->prog;2517blob_reader_init(&reader, hdr->blob, hdr->num_bytes);2518nir = nir_deserialize(NULL, options, &reader);2519break;2520}25212522default:2523unreachable("Unsupported IR");2524}25252526/* Most of iris doesn't really care about the difference between compute2527* shaders and kernels. We also tend to hard-code COMPUTE everywhere so2528* it's way easier if we just normalize to COMPUTE here.2529*/2530assert(nir->info.stage == MESA_SHADER_COMPUTE ||2531nir->info.stage == MESA_SHADER_KERNEL);2532nir->info.stage = MESA_SHADER_COMPUTE;25332534struct iris_uncompiled_shader *ish =2535iris_create_uncompiled_shader(screen, nir, NULL);2536ish->kernel_input_size = state->req_input_mem;2537ish->kernel_shared_size = state->req_local_mem;25382539// XXX: disallow more than 64KB of shared variables25402541if (screen->precompile) {2542struct iris_cs_prog_key key = { KEY_ID(base) };25432544if (!iris_disk_cache_retrieve(screen, uploader, ish, &key, sizeof(key)))2545iris_compile_cs(screen, uploader, &ice->dbg, ish, &key);2546}25472548return ish;2549}25502551/**2552* The pipe->delete_[stage]_state() driver hooks.2553*2554* Frees the iris_uncompiled_shader.2555*/2556static void2557iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage stage)2558{2559struct iris_uncompiled_shader *ish = state;2560struct iris_context *ice = (void *) ctx;25612562if (ice->shaders.uncompiled[stage] == ish) {2563ice->shaders.uncompiled[stage] = NULL;2564ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;2565}25662567/* No need to take ish->lock; we hold the last reference to ish */2568list_for_each_entry_safe(struct iris_compiled_shader, shader,2569&ish->variants, link) {2570list_del(&shader->link);25712572iris_shader_variant_reference(&shader, NULL);2573}25742575simple_mtx_destroy(&ish->lock);25762577ralloc_free(ish->nir);2578free(ish);2579}25802581static void2582iris_delete_vs_state(struct pipe_context *ctx, void *state)2583{2584iris_delete_shader_state(ctx, state, MESA_SHADER_VERTEX);2585}25862587static void2588iris_delete_tcs_state(struct pipe_context *ctx, void *state)2589{2590iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_CTRL);2591}25922593static void2594iris_delete_tes_state(struct pipe_context *ctx, void *state)2595{2596iris_delete_shader_state(ctx, state, MESA_SHADER_TESS_EVAL);2597}25982599static void2600iris_delete_gs_state(struct pipe_context *ctx, void *state)2601{2602iris_delete_shader_state(ctx, state, MESA_SHADER_GEOMETRY);2603}26042605static void2606iris_delete_fs_state(struct pipe_context *ctx, void *state)2607{2608iris_delete_shader_state(ctx, state, MESA_SHADER_FRAGMENT);2609}26102611static void2612iris_delete_cs_state(struct pipe_context *ctx, void *state)2613{2614iris_delete_shader_state(ctx, state, MESA_SHADER_COMPUTE);2615}26162617/**2618* The pipe->bind_[stage]_state() driver hook.2619*2620* Binds an uncompiled shader as the current one for a particular stage.2621* Updates dirty tracking to account for the shader's NOS.2622*/2623static void2624bind_shader_state(struct iris_context *ice,2625struct iris_uncompiled_shader *ish,2626gl_shader_stage stage)2627{2628uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;2629const uint64_t nos = ish ? ish->nos : 0;26302631const struct shader_info *old_info = iris_get_shader_info(ice, stage);2632const struct shader_info *new_info = ish ? &ish->nir->info : NULL;26332634if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=2635(new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {2636ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;2637}26382639ice->shaders.uncompiled[stage] = ish;2640ice->state.stage_dirty |= stage_dirty_bit;26412642/* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change2643* (or that they no longer need to do so).2644*/2645for (int i = 0; i < IRIS_NOS_COUNT; i++) {2646if (nos & (1 << i))2647ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;2648else2649ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;2650}2651}26522653static void2654iris_bind_vs_state(struct pipe_context *ctx, void *state)2655{2656struct iris_context *ice = (struct iris_context *)ctx;2657struct iris_uncompiled_shader *ish = state;26582659if (ish) {2660const struct shader_info *info = &ish->nir->info;2661if (ice->state.window_space_position != info->vs.window_space_position) {2662ice->state.window_space_position = info->vs.window_space_position;26632664ice->state.dirty |= IRIS_DIRTY_CLIP |2665IRIS_DIRTY_RASTER |2666IRIS_DIRTY_CC_VIEWPORT;2667}26682669const bool uses_draw_params =2670BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||2671BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);2672const bool uses_derived_draw_params =2673BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||2674BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);2675const bool needs_sgvs_element = uses_draw_params ||2676BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||2677BITSET_TEST(info->system_values_read,2678SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);26792680if (ice->state.vs_uses_draw_params != uses_draw_params ||2681ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||2682ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {2683ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |2684IRIS_DIRTY_VERTEX_ELEMENTS;2685}26862687ice->state.vs_uses_draw_params = uses_draw_params;2688ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;2689ice->state.vs_needs_sgvs_element = needs_sgvs_element;2690ice->state.vs_needs_edge_flag = ish->needs_edge_flag;2691}26922693bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);2694}26952696static void2697iris_bind_tcs_state(struct pipe_context *ctx, void *state)2698{2699bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);2700}27012702static void2703iris_bind_tes_state(struct pipe_context *ctx, void *state)2704{2705struct iris_context *ice = (struct iris_context *)ctx;27062707/* Enabling/disabling optional stages requires a URB reconfiguration. */2708if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])2709ice->state.dirty |= IRIS_DIRTY_URB;27102711bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);2712}27132714static void2715iris_bind_gs_state(struct pipe_context *ctx, void *state)2716{2717struct iris_context *ice = (struct iris_context *)ctx;27182719/* Enabling/disabling optional stages requires a URB reconfiguration. */2720if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])2721ice->state.dirty |= IRIS_DIRTY_URB;27222723bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);2724}27252726static void2727iris_bind_fs_state(struct pipe_context *ctx, void *state)2728{2729struct iris_context *ice = (struct iris_context *) ctx;2730struct iris_screen *screen = (struct iris_screen *) ctx->screen;2731const struct intel_device_info *devinfo = &screen->devinfo;2732struct iris_uncompiled_shader *old_ish =2733ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];2734struct iris_uncompiled_shader *new_ish = state;27352736const unsigned color_bits =2737BITFIELD64_BIT(FRAG_RESULT_COLOR) |2738BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);27392740/* Fragment shader outputs influence HasWriteableRT */2741if (!old_ish || !new_ish ||2742(old_ish->nir->info.outputs_written & color_bits) !=2743(new_ish->nir->info.outputs_written & color_bits))2744ice->state.dirty |= IRIS_DIRTY_PS_BLEND;27452746if (devinfo->ver == 8)2747ice->state.dirty |= IRIS_DIRTY_PMA_FIX;27482749bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);2750}27512752static void2753iris_bind_cs_state(struct pipe_context *ctx, void *state)2754{2755bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);2756}27572758void2759iris_init_program_functions(struct pipe_context *ctx)2760{2761ctx->create_vs_state = iris_create_vs_state;2762ctx->create_tcs_state = iris_create_tcs_state;2763ctx->create_tes_state = iris_create_tes_state;2764ctx->create_gs_state = iris_create_gs_state;2765ctx->create_fs_state = iris_create_fs_state;2766ctx->create_compute_state = iris_create_compute_state;27672768ctx->delete_vs_state = iris_delete_vs_state;2769ctx->delete_tcs_state = iris_delete_tcs_state;2770ctx->delete_tes_state = iris_delete_tes_state;2771ctx->delete_gs_state = iris_delete_gs_state;2772ctx->delete_fs_state = iris_delete_fs_state;2773ctx->delete_compute_state = iris_delete_cs_state;27742775ctx->bind_vs_state = iris_bind_vs_state;2776ctx->bind_tcs_state = iris_bind_tcs_state;2777ctx->bind_tes_state = iris_bind_tes_state;2778ctx->bind_gs_state = iris_bind_gs_state;2779ctx->bind_fs_state = iris_bind_fs_state;2780ctx->bind_compute_state = iris_bind_cs_state;2781}278227832784