Path: blob/21.2-virgl/src/freedreno/ir3/ir3_nir_lower_tess.c
4565 views
/*1* Copyright © 2019 Google, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "compiler/nir/nir_builder.h"24#include "ir3_compiler.h"25#include "ir3_nir.h"2627struct state {28uint32_t topology;2930struct primitive_map {31unsigned loc[32 + 4]; /* +POSITION +PSIZE +CLIP_DIST0 +CLIP_DIST1 */32unsigned stride;33} map;3435nir_ssa_def *header;3637nir_variable *vertex_count_var;38nir_variable *emitted_vertex_var;39nir_variable *vertex_flags_out;4041struct exec_list old_outputs;42struct exec_list new_outputs;43struct exec_list emit_outputs;4445/* tess ctrl shader on a650 gets the local primitive id at different bits: */46unsigned local_primitive_id_start;47};4849static nir_ssa_def *50bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)51{52return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),53nir_imm_int(b, mask));54}5556static nir_ssa_def *57build_invocation_id(nir_builder *b, struct state *state)58{59return bitfield_extract(b, state->header, 11, 31);60}6162static nir_ssa_def *63build_vertex_id(nir_builder *b, struct state *state)64{65return bitfield_extract(b, state->header, 6, 31);66}6768static nir_ssa_def *69build_local_primitive_id(nir_builder *b, struct state *state)70{71return bitfield_extract(b, state->header, state->local_primitive_id_start,7263);73}7475static bool76is_tess_levels(gl_varying_slot slot)77{78return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||79slot == VARYING_SLOT_TESS_LEVEL_INNER);80}8182/* Return a deterministic index for varyings. We can't rely on driver_location83* to be correct without linking the different stages first, so we create84* "primitive maps" where the producer decides on the location of each varying85* slot and then exports a per-slot array to the consumer. This compacts the86* gl_varying_slot space down a bit so that the primitive maps aren't too87* large.88*89* Note: per-patch varyings are currently handled separately, without any90* compacting.91*92* TODO: We could probably use the driver_location's directly in the non-SSO93* (Vulkan) case.94*/9596static unsigned97shader_io_get_unique_index(gl_varying_slot slot)98{99if (slot == VARYING_SLOT_POS)100return 0;101if (slot == VARYING_SLOT_PSIZ)102return 1;103if (slot == VARYING_SLOT_CLIP_DIST0)104return 2;105if (slot == VARYING_SLOT_CLIP_DIST1)106return 3;107if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)108return 4 + (slot - VARYING_SLOT_VAR0);109unreachable("illegal slot in get unique index\n");110}111112static nir_ssa_def *113build_local_offset(nir_builder *b, struct state *state, nir_ssa_def *vertex,114uint32_t location, uint32_t comp, nir_ssa_def *offset)115{116nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);117nir_ssa_def *primitive_offset =118nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);119nir_ssa_def *attr_offset;120nir_ssa_def *vertex_stride;121unsigned index = shader_io_get_unique_index(location);122123switch (b->shader->info.stage) {124case MESA_SHADER_VERTEX:125case MESA_SHADER_TESS_EVAL:126vertex_stride = nir_imm_int(b, state->map.stride * 4);127attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);128break;129case MESA_SHADER_TESS_CTRL:130case MESA_SHADER_GEOMETRY:131vertex_stride = nir_load_vs_vertex_stride_ir3(b);132attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),133nir_imm_int(b, comp * 4));134break;135default:136unreachable("bad shader stage");137}138139nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);140141return nir_iadd(142b, nir_iadd(b, primitive_offset, vertex_offset),143nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));144}145146static nir_intrinsic_instr *147replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,148nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1,149nir_ssa_def *src2)150{151nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, op);152153new_intr->src[0] = nir_src_for_ssa(src0);154if (src1)155new_intr->src[1] = nir_src_for_ssa(src1);156if (src2)157new_intr->src[2] = nir_src_for_ssa(src2);158159new_intr->num_components = intr->num_components;160161if (nir_intrinsic_infos[op].has_dest)162nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, intr->num_components,16332, NULL);164165nir_builder_instr_insert(b, &new_intr->instr);166167if (nir_intrinsic_infos[op].has_dest)168nir_ssa_def_rewrite_uses(&intr->dest.ssa, &new_intr->dest.ssa);169170nir_instr_remove(&intr->instr);171172return new_intr;173}174175static void176build_primitive_map(nir_shader *shader, struct primitive_map *map)177{178/* All interfaces except the TCS <-> TES interface use ldlw, which takes179* an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses180* ldg, which takes an offset in dwords, but each per-vertex slot has181* space for every vertex, and there's space at the beginning for182* per-patch varyings.183*/184unsigned slot_size = 16, start = 0;185if (shader->info.stage == MESA_SHADER_TESS_CTRL) {186slot_size = shader->info.tess.tcs_vertices_out * 4;187start = util_last_bit(shader->info.patch_outputs_written) * 4;188}189190uint64_t mask = shader->info.outputs_written;191unsigned loc = start;192while (mask) {193int location = u_bit_scan64(&mask);194if (is_tess_levels(location))195continue;196197unsigned index = shader_io_get_unique_index(location);198map->loc[index] = loc;199loc += slot_size;200}201202map->stride = loc;203/* Use units of dwords for the stride. */204if (shader->info.stage != MESA_SHADER_TESS_CTRL)205map->stride /= 4;206}207208/* For shader stages that receive a primitive map, calculate how big it should209* be.210*/211212static unsigned213calc_primitive_map_size(nir_shader *shader)214{215uint64_t mask = shader->info.inputs_read;216unsigned max_index = 0;217while (mask) {218int location = u_bit_scan64(&mask);219220if (is_tess_levels(location))221continue;222223unsigned index = shader_io_get_unique_index(location);224max_index = MAX2(max_index, index + 1);225}226227return max_index;228}229230static void231lower_block_to_explicit_output(nir_block *block, nir_builder *b,232struct state *state)233{234nir_foreach_instr_safe (instr, block) {235if (instr->type != nir_instr_type_intrinsic)236continue;237238nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);239240switch (intr->intrinsic) {241case nir_intrinsic_store_output: {242// src[] = { value, offset }.243244/* nir_lower_io_to_temporaries replaces all access to output245* variables with temp variables and then emits a nir_copy_var at246* the end of the shader. Thus, we should always get a full wrmask247* here.248*/249assert(250util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));251252b->cursor = nir_instr_remove(&intr->instr);253254nir_ssa_def *vertex_id = build_vertex_id(b, state);255nir_ssa_def *offset = build_local_offset(256b, state, vertex_id, nir_intrinsic_io_semantics(intr).location,257nir_intrinsic_component(intr), intr->src[1].ssa);258259nir_store_shared_ir3(b, intr->src[0].ssa, offset);260break;261}262263default:264break;265}266}267}268269static nir_ssa_def *270local_thread_id(nir_builder *b)271{272return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);273}274275void276ir3_nir_lower_to_explicit_output(nir_shader *shader,277struct ir3_shader_variant *v,278unsigned topology)279{280struct state state = {};281282build_primitive_map(shader, &state.map);283memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));284285nir_function_impl *impl = nir_shader_get_entrypoint(shader);286assert(impl);287288nir_builder b;289nir_builder_init(&b, impl);290b.cursor = nir_before_cf_list(&impl->body);291292if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)293state.header = nir_load_tcs_header_ir3(&b);294else295state.header = nir_load_gs_header_ir3(&b);296297nir_foreach_block_safe (block, impl)298lower_block_to_explicit_output(block, &b, &state);299300nir_metadata_preserve(impl,301nir_metadata_block_index | nir_metadata_dominance);302303v->output_size = state.map.stride;304}305306static void307lower_block_to_explicit_input(nir_block *block, nir_builder *b,308struct state *state)309{310nir_foreach_instr_safe (instr, block) {311if (instr->type != nir_instr_type_intrinsic)312continue;313314nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);315316switch (intr->intrinsic) {317case nir_intrinsic_load_per_vertex_input: {318// src[] = { vertex, offset }.319320b->cursor = nir_before_instr(&intr->instr);321322nir_ssa_def *offset = build_local_offset(323b, state,324intr->src[0].ssa, // this is typically gl_InvocationID325nir_intrinsic_io_semantics(intr).location,326nir_intrinsic_component(intr), intr->src[1].ssa);327328replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL,329NULL);330break;331}332333case nir_intrinsic_load_invocation_id: {334b->cursor = nir_before_instr(&intr->instr);335336nir_ssa_def *iid = build_invocation_id(b, state);337nir_ssa_def_rewrite_uses(&intr->dest.ssa, iid);338nir_instr_remove(&intr->instr);339break;340}341342default:343break;344}345}346}347348void349ir3_nir_lower_to_explicit_input(nir_shader *shader,350struct ir3_shader_variant *v)351{352struct state state = {};353354/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,355* HS uses a different primitive id, which starts at bit 16 in the header356*/357if (shader->info.stage == MESA_SHADER_TESS_CTRL &&358v->shader->compiler->tess_use_shared)359state.local_primitive_id_start = 16;360361nir_function_impl *impl = nir_shader_get_entrypoint(shader);362assert(impl);363364nir_builder b;365nir_builder_init(&b, impl);366b.cursor = nir_before_cf_list(&impl->body);367368if (shader->info.stage == MESA_SHADER_GEOMETRY)369state.header = nir_load_gs_header_ir3(&b);370else371state.header = nir_load_tcs_header_ir3(&b);372373nir_foreach_block_safe (block, impl)374lower_block_to_explicit_input(block, &b, &state);375376v->input_size = calc_primitive_map_size(shader);377}378379static nir_ssa_def *380build_tcs_out_vertices(nir_builder *b)381{382if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)383return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);384else385return nir_load_patch_vertices_in(b);386}387388static nir_ssa_def *389build_per_vertex_offset(nir_builder *b, struct state *state,390nir_ssa_def *vertex, uint32_t location, uint32_t comp,391nir_ssa_def *offset)392{393nir_ssa_def *primitive_id = nir_load_primitive_id(b);394nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);395nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);396nir_ssa_def *attr_offset;397398if (nir_src_is_const(nir_src_for_ssa(offset))) {399location += nir_src_as_uint(nir_src_for_ssa(offset));400offset = nir_imm_int(b, 0);401} else {402/* Offset is in vec4's, but we need it in unit of components for the403* load/store_global_ir3 offset.404*/405offset = nir_ishl(b, offset, nir_imm_int(b, 2));406}407408nir_ssa_def *vertex_offset;409if (vertex) {410unsigned index = shader_io_get_unique_index(location);411switch (b->shader->info.stage) {412case MESA_SHADER_TESS_CTRL:413attr_offset = nir_imm_int(b, state->map.loc[index] + comp);414break;415case MESA_SHADER_TESS_EVAL:416attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),417nir_imm_int(b, comp));418break;419default:420unreachable("bad shader state");421}422423attr_offset = nir_iadd(b, attr_offset,424nir_imul24(b, offset, build_tcs_out_vertices(b)));425vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));426} else {427assert(location >= VARYING_SLOT_PATCH0 &&428location <= VARYING_SLOT_TESS_MAX);429unsigned index = location - VARYING_SLOT_PATCH0;430attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);431vertex_offset = nir_imm_int(b, 0);432}433434return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);435}436437static nir_ssa_def *438build_patch_offset(nir_builder *b, struct state *state, uint32_t base,439uint32_t comp, nir_ssa_def *offset)440{441return build_per_vertex_offset(b, state, NULL, base, comp, offset);442}443444static void445tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)446{447switch (state->topology) {448case IR3_TESS_TRIANGLES:449*inner = 1;450*outer = 3;451break;452case IR3_TESS_QUADS:453*inner = 2;454*outer = 4;455break;456case IR3_TESS_ISOLINES:457*inner = 0;458*outer = 2;459break;460default:461unreachable("bad");462}463}464465static nir_ssa_def *466build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)467{468uint32_t inner_levels, outer_levels;469tess_level_components(state, &inner_levels, &outer_levels);470471const uint32_t patch_stride = 1 + inner_levels + outer_levels;472473nir_ssa_def *primitive_id = nir_load_primitive_id(b);474475nir_ssa_def *patch_offset =476nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));477478uint32_t offset;479switch (slot) {480case VARYING_SLOT_TESS_LEVEL_OUTER:481/* There's some kind of header dword, tess levels start at index 1. */482offset = 1;483break;484case VARYING_SLOT_TESS_LEVEL_INNER:485offset = 1 + outer_levels;486break;487default:488unreachable("bad");489}490491return nir_iadd(b, patch_offset, nir_imm_int(b, offset));492}493494static void495lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)496{497nir_foreach_instr_safe (instr, block) {498if (instr->type != nir_instr_type_intrinsic)499continue;500501nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);502503switch (intr->intrinsic) {504case nir_intrinsic_load_per_vertex_output: {505// src[] = { vertex, offset }.506507b->cursor = nir_before_instr(&intr->instr);508509nir_ssa_def *address = nir_load_tess_param_base_ir3(b);510nir_ssa_def *offset = build_per_vertex_offset(511b, state, intr->src[0].ssa,512nir_intrinsic_io_semantics(intr).location,513nir_intrinsic_component(intr), intr->src[1].ssa);514515replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,516offset, NULL);517break;518}519520case nir_intrinsic_store_per_vertex_output: {521// src[] = { value, vertex, offset }.522523b->cursor = nir_before_instr(&intr->instr);524525/* sparse writemask not supported */526assert(527util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));528529nir_ssa_def *value = intr->src[0].ssa;530nir_ssa_def *address = nir_load_tess_param_base_ir3(b);531nir_ssa_def *offset = build_per_vertex_offset(532b, state, intr->src[1].ssa,533nir_intrinsic_io_semantics(intr).location,534nir_intrinsic_component(intr), intr->src[2].ssa);535536replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value,537address, offset);538539break;540}541542case nir_intrinsic_load_output: {543// src[] = { offset }.544545b->cursor = nir_before_instr(&intr->instr);546547nir_ssa_def *address, *offset;548549/* note if vectorization of the tess level loads ever happens:550* "ldg" across 16-byte boundaries can behave incorrectly if results551* are never used. most likely some issue with (sy) not properly552* syncing with values coming from a second memory transaction.553*/554gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;555if (is_tess_levels(location)) {556assert(intr->dest.ssa.num_components == 1);557address = nir_load_tess_factor_base_ir3(b);558offset = build_tessfactor_base(b, location, state);559} else {560address = nir_load_tess_param_base_ir3(b);561offset = build_patch_offset(b, state, location,562nir_intrinsic_component(intr),563intr->src[0].ssa);564}565566replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,567offset, NULL);568break;569}570571case nir_intrinsic_store_output: {572// src[] = { value, offset }.573574/* write patch output to bo */575576b->cursor = nir_before_instr(&intr->instr);577578/* sparse writemask not supported */579assert(580util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));581582gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;583if (is_tess_levels(location)) {584/* with tess levels are defined as float[4] and float[2],585* but tess factor BO has smaller sizes for tris/isolines,586* so we have to discard any writes beyond the number of587* components for inner/outer levels */588uint32_t inner_levels, outer_levels, levels;589tess_level_components(state, &inner_levels, &outer_levels);590591if (location == VARYING_SLOT_TESS_LEVEL_OUTER)592levels = outer_levels;593else594levels = inner_levels;595596assert(intr->src[0].ssa->num_components == 1);597598nir_ssa_def *offset =599nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));600601nir_if *nif =602nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));603604replace_intrinsic(605b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa,606nir_load_tess_factor_base_ir3(b),607nir_iadd(b, offset, build_tessfactor_base(b, location, state)));608609nir_pop_if(b, nif);610} else {611nir_ssa_def *address = nir_load_tess_param_base_ir3(b);612nir_ssa_def *offset = build_patch_offset(613b, state, location, nir_intrinsic_component(intr),614intr->src[1].ssa);615616debug_assert(nir_intrinsic_component(intr) == 0);617618replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,619intr->src[0].ssa, address, offset);620}621break;622}623624default:625break;626}627}628}629630static void631emit_tess_epilouge(nir_builder *b, struct state *state)632{633/* Insert endpatch instruction:634*635* TODO we should re-work this to use normal flow control.636*/637638nir_end_patch_ir3(b);639}640641void642ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,643unsigned topology)644{645struct state state = {.topology = topology};646647if (shader_debug_enabled(shader->info.stage)) {648mesa_logi("NIR (before tess lowering) for %s shader:",649_mesa_shader_stage_to_string(shader->info.stage));650nir_log_shaderi(shader);651}652653build_primitive_map(shader, &state.map);654memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));655v->output_size = state.map.stride;656657nir_function_impl *impl = nir_shader_get_entrypoint(shader);658assert(impl);659660nir_builder b;661nir_builder_init(&b, impl);662b.cursor = nir_before_cf_list(&impl->body);663664state.header = nir_load_tcs_header_ir3(&b);665666nir_foreach_block_safe (block, impl)667lower_tess_ctrl_block(block, &b, &state);668669/* Now move the body of the TCS into a conditional:670*671* if (gl_InvocationID < num_vertices)672* // body673*674*/675676nir_cf_list body;677nir_cf_extract(&body, nir_before_cf_list(&impl->body),678nir_after_cf_list(&impl->body));679680b.cursor = nir_after_cf_list(&impl->body);681682/* Re-emit the header, since the old one got moved into the if branch */683state.header = nir_load_tcs_header_ir3(&b);684nir_ssa_def *iid = build_invocation_id(&b, &state);685686const uint32_t nvertices = shader->info.tess.tcs_vertices_out;687nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));688689nir_if *nif = nir_push_if(&b, cond);690691nir_cf_reinsert(&body, b.cursor);692693b.cursor = nir_after_cf_list(&nif->then_list);694695/* Insert conditional exit for threads invocation id != 0 */696nir_ssa_def *iid0_cond = nir_ieq_imm(&b, iid, 0);697nir_cond_end_ir3(&b, iid0_cond);698699emit_tess_epilouge(&b, &state);700701nir_pop_if(&b, nif);702703nir_metadata_preserve(impl, 0);704}705706static void707lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)708{709nir_foreach_instr_safe (instr, block) {710if (instr->type != nir_instr_type_intrinsic)711continue;712713nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);714715switch (intr->intrinsic) {716case nir_intrinsic_load_tess_coord: {717b->cursor = nir_after_instr(&intr->instr);718nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);719nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);720nir_ssa_def *z;721722if (state->topology == IR3_TESS_TRIANGLES)723z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);724else725z = nir_imm_float(b, 0.0f);726727nir_ssa_def *coord = nir_vec3(b, x, y, z);728729nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, coord,730b->cursor.instr);731break;732}733734case nir_intrinsic_load_per_vertex_input: {735// src[] = { vertex, offset }.736737b->cursor = nir_before_instr(&intr->instr);738739nir_ssa_def *address = nir_load_tess_param_base_ir3(b);740nir_ssa_def *offset = build_per_vertex_offset(741b, state, intr->src[0].ssa,742nir_intrinsic_io_semantics(intr).location,743nir_intrinsic_component(intr), intr->src[1].ssa);744745replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,746offset, NULL);747break;748}749750case nir_intrinsic_load_input: {751// src[] = { offset }.752753b->cursor = nir_before_instr(&intr->instr);754755nir_ssa_def *address, *offset;756757/* note if vectorization of the tess level loads ever happens:758* "ldg" across 16-byte boundaries can behave incorrectly if results759* are never used. most likely some issue with (sy) not properly760* syncing with values coming from a second memory transaction.761*/762gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;763if (is_tess_levels(location)) {764assert(intr->dest.ssa.num_components == 1);765address = nir_load_tess_factor_base_ir3(b);766offset = build_tessfactor_base(b, location, state);767} else {768address = nir_load_tess_param_base_ir3(b);769offset = build_patch_offset(b, state, location,770nir_intrinsic_component(intr),771intr->src[0].ssa);772}773774offset =775nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));776777replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,778offset, NULL);779break;780}781782default:783break;784}785}786}787788void789ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,790unsigned topology)791{792struct state state = {.topology = topology};793794if (shader_debug_enabled(shader->info.stage)) {795mesa_logi("NIR (before tess lowering) for %s shader:",796_mesa_shader_stage_to_string(shader->info.stage));797nir_log_shaderi(shader);798}799800nir_function_impl *impl = nir_shader_get_entrypoint(shader);801assert(impl);802803nir_builder b;804nir_builder_init(&b, impl);805806nir_foreach_block_safe (block, impl)807lower_tess_eval_block(block, &b, &state);808809v->input_size = calc_primitive_map_size(shader);810811nir_metadata_preserve(impl, 0);812}813814static void815lower_gs_block(nir_block *block, nir_builder *b, struct state *state)816{817nir_foreach_instr_safe (instr, block) {818if (instr->type != nir_instr_type_intrinsic)819continue;820821nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);822823switch (intr->intrinsic) {824case nir_intrinsic_end_primitive: {825/* Note: This ignores the stream, which seems to match the blob826* behavior. I'm guessing the HW ignores any extraneous cut827* signals from an EndPrimitive() that doesn't correspond to the828* rasterized stream.829*/830b->cursor = nir_before_instr(&intr->instr);831nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);832nir_instr_remove(&intr->instr);833break;834}835836case nir_intrinsic_emit_vertex: {837/* Load the vertex count */838b->cursor = nir_before_instr(&intr->instr);839nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);840841nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));842843unsigned stream = nir_intrinsic_stream_id(intr);844/* vertex_flags_out |= stream */845nir_store_var(b, state->vertex_flags_out,846nir_ior(b, nir_load_var(b, state->vertex_flags_out),847nir_imm_int(b, stream)),8480x1 /* .x */);849850foreach_two_lists (dest_node, &state->emit_outputs, src_node,851&state->old_outputs) {852nir_variable *dest = exec_node_data(nir_variable, dest_node, node);853nir_variable *src = exec_node_data(nir_variable, src_node, node);854nir_copy_var(b, dest, src);855}856857nir_instr_remove(&intr->instr);858859nir_store_var(b, state->emitted_vertex_var,860nir_iadd(b, nir_load_var(b, state->emitted_vertex_var),861nir_imm_int(b, 1)),8620x1);863864nir_pop_if(b, NULL);865866/* Increment the vertex count by 1 */867nir_store_var(b, state->vertex_count_var,868nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */869nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);870871break;872}873874default:875break;876}877}878}879880void881ir3_nir_lower_gs(nir_shader *shader)882{883struct state state = {};884885if (shader_debug_enabled(shader->info.stage)) {886mesa_logi("NIR (before gs lowering):");887nir_log_shaderi(shader);888}889890/* Create an output var for vertex_flags. This will be shadowed below,891* same way regular outputs get shadowed, and this variable will become a892* temporary.893*/894state.vertex_flags_out = nir_variable_create(895shader, nir_var_shader_out, glsl_uint_type(), "vertex_flags");896state.vertex_flags_out->data.driver_location = shader->num_outputs++;897state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;898state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;899900nir_function_impl *impl = nir_shader_get_entrypoint(shader);901assert(impl);902903nir_builder b;904nir_builder_init(&b, impl);905b.cursor = nir_before_cf_list(&impl->body);906907state.header = nir_load_gs_header_ir3(&b);908909/* Generate two set of shadow vars for the output variables. The first910* set replaces the real outputs and the second set (emit_outputs) we'll911* assign in the emit_vertex conditionals. Then at the end of the shader912* we copy the emit_outputs to the real outputs, so that we get913* store_output in uniform control flow.914*/915exec_list_make_empty(&state.old_outputs);916nir_foreach_shader_out_variable_safe (var, shader) {917exec_node_remove(&var->node);918exec_list_push_tail(&state.old_outputs, &var->node);919}920exec_list_make_empty(&state.new_outputs);921exec_list_make_empty(&state.emit_outputs);922nir_foreach_variable_in_list (var, &state.old_outputs) {923/* Create a new output var by cloning the original output var and924* stealing the name.925*/926nir_variable *output = nir_variable_clone(var, shader);927exec_list_push_tail(&state.new_outputs, &output->node);928929/* Rewrite the original output to be a shadow variable. */930var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);931var->data.mode = nir_var_shader_temp;932933/* Clone the shadow variable to create the emit shadow variable that934* we'll assign in the emit conditionals.935*/936nir_variable *emit_output = nir_variable_clone(var, shader);937emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);938exec_list_push_tail(&state.emit_outputs, &emit_output->node);939}940941/* During the shader we'll keep track of which vertex we're currently942* emitting for the EmitVertex test and how many vertices we emitted so we943* know to discard if didn't emit any. In most simple shaders, this can944* all be statically determined and gets optimized away.945*/946state.vertex_count_var =947nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");948state.emitted_vertex_var =949nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");950951/* Initialize to 0. */952b.cursor = nir_before_cf_list(&impl->body);953nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);954nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);955nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);956957nir_foreach_block_safe (block, impl)958lower_gs_block(block, &b, &state);959960set_foreach (impl->end_block->predecessors, block_entry) {961struct nir_block *block = (void *)block_entry->key;962b.cursor = nir_after_block_before_jump(block);963964nir_ssa_def *cond =965nir_ieq_imm(&b, nir_load_var(&b, state.emitted_vertex_var), 0);966967nir_discard_if(&b, cond);968969foreach_two_lists (dest_node, &state.new_outputs, src_node,970&state.emit_outputs) {971nir_variable *dest = exec_node_data(nir_variable, dest_node, node);972nir_variable *src = exec_node_data(nir_variable, src_node, node);973nir_copy_var(&b, dest, src);974}975}976977exec_list_append(&shader->variables, &state.old_outputs);978exec_list_append(&shader->variables, &state.emit_outputs);979exec_list_append(&shader->variables, &state.new_outputs);980981nir_metadata_preserve(impl, 0);982983nir_lower_global_vars_to_local(shader);984nir_split_var_copies(shader);985nir_lower_var_copies(shader);986987nir_fixup_deref_modes(shader);988989if (shader_debug_enabled(shader->info.stage)) {990mesa_logi("NIR (after gs lowering):");991nir_log_shaderi(shader);992}993}994995996