Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
4570 views
/*1* Copyright 2020 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "si_pipe.h"25#include "si_shader_internal.h"26#include "sid.h"2728static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)29{30switch (ctx->stage) {31case MESA_SHADER_TESS_CTRL:32return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);3334case MESA_SHADER_TESS_EVAL:35return ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id);3637default:38assert(0);39return NULL;40}41}4243/* Tessellation shaders pass outputs to the next shader using LDS.44*45* LS outputs = TCS inputs46* TCS outputs = TES inputs47*48* The LDS layout is:49* - TCS inputs for patch 050* - TCS inputs for patch 151* - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)52* - ...53* - TCS outputs for patch 0 = get_tcs_out_patch0_offset54* - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset55* - TCS outputs for patch 156* - Per-patch TCS outputs for patch 157* - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)58* - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)59* - ...60*61* All three shaders VS(LS), TCS, TES share the same LDS space.62*/6364static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)65{66return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);67}6869static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)70{71assert(ctx->stage == MESA_SHADER_TESS_CTRL);7273if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)74return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;7576return util_last_bit64(ctx->shader->selector->outputs_written) * 4;77}7879static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)80{81unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);8283return LLVMConstInt(ctx->ac.i32, stride, 0);84}8586static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)87{88if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)89return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);9091const struct si_shader_info *info = &ctx->shader->selector->info;92unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;93unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);94unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);95unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;96return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);97}9899static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx)100{101return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),102LLVMConstInt(ctx->ac.i32, 4, 0), "");103}104105static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)106{107return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),108LLVMConstInt(ctx->ac.i32, 4, 0), "");109}110111static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx)112{113LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);114LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);115116return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");117}118119static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx)120{121LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);122LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);123LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);124125return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);126}127128static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)129{130LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);131LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);132LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);133134return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);135}136137static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)138{139unsigned tcs_out_vertices =140ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out141: 0;142143/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */144if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)145return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);146147return LLVMBuildAdd(ctx->ac.builder,148si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 5), ctx->ac.i32_1, "");149}150151static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)152{153unsigned stride;154155switch (ctx->stage) {156case MESA_SHADER_VERTEX:157stride = ctx->shader->selector->lshs_vertex_stride / 4;158return LLVMConstInt(ctx->ac.i32, stride, 0);159160case MESA_SHADER_TESS_CTRL:161if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {162stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;163return LLVMConstInt(ctx->ac.i32, stride, 0);164}165return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);166167default:168assert(0);169return NULL;170}171}172173static LLVMValueRef174get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride,175LLVMValueRef base_addr, LLVMValueRef vertex_index,176LLVMValueRef param_index, ubyte name)177{178if (vertex_dw_stride) {179base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr);180}181182if (param_index) {183base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr);184}185186int param = name >= VARYING_SLOT_PATCH0 ||187name == VARYING_SLOT_TESS_LEVEL_INNER ||188name == VARYING_SLOT_TESS_LEVEL_OUTER189? si_shader_io_get_unique_index_patch(name)190: si_shader_io_get_unique_index(name, false);191192/* Add the base address of the element. */193return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");194}195196/* The offchip buffer layout for TCS->TES is197*198* - attribute 0 of patch 0 vertex 0199* - attribute 0 of patch 0 vertex 1200* - attribute 0 of patch 0 vertex 2201* ...202* - attribute 0 of patch 1 vertex 0203* - attribute 0 of patch 1 vertex 1204* ...205* - attribute 1 of patch 0 vertex 0206* - attribute 1 of patch 0 vertex 1207* ...208* - per patch attribute 0 of patch 0209* - per patch attribute 0 of patch 1210* ...211*212* Note that every attribute has 4 components.213*/214static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,215LLVMValueRef rel_patch_id, LLVMValueRef vertex_index,216LLVMValueRef param_index)217{218LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;219LLVMValueRef param_stride, constant16;220221vertices_per_patch = get_num_tcs_out_vertices(ctx);222num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);223num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");224total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");225226constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);227if (vertex_index) {228base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index);229param_stride = total_vertices;230} else {231base_addr = rel_patch_id;232param_stride = num_patches;233}234235base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);236base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");237238if (!vertex_index) {239LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 11, 21);240241base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");242}243return base_addr;244}245246static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx,247LLVMValueRef vertex_index,248LLVMValueRef param_index,249ubyte name)250{251unsigned param_index_base;252253param_index_base = name >= VARYING_SLOT_PATCH0 ||254name == VARYING_SLOT_TESS_LEVEL_INNER ||255name == VARYING_SLOT_TESS_LEVEL_OUTER256? si_shader_io_get_unique_index_patch(name)257: si_shader_io_get_unique_index(name, false);258259if (param_index) {260param_index = LLVMBuildAdd(ctx->ac.builder, param_index,261LLVMConstInt(ctx->ac.i32, param_index_base, 0), "");262} else {263param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0);264}265266return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index);267}268269static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,270LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,271bool can_speculate)272{273LLVMValueRef value;274LLVMTypeRef vec_type = LLVMVectorType(type, 4);275276if (swizzle == ~0) {277value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, type, ac_glc,278can_speculate, false);279280return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");281}282283value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, type, ac_glc,284can_speculate, false);285286value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");287return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),288"");289}290291/**292* Load from LSHS LDS storage.293*294* \param type output value type295* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4296* \param dw_addr address in dwords297*/298static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,299LLVMValueRef dw_addr)300{301LLVMValueRef value;302303if (swizzle == ~0) {304LLVMValueRef values[4];305306for (unsigned chan = 0; chan < 4; chan++)307values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);308309return ac_build_gather_values(&ctx->ac, values, 4);310}311312dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");313value = ac_lds_load(&ctx->ac, dw_addr);314return LLVMBuildBitCast(ctx->ac.builder, value, type, "");315}316317/**318* Store to LSHS LDS storage.319*320* \param swizzle offset (typically 0..3)321* \param dw_addr address in dwords322* \param value value to store323*/324static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm,325LLVMValueRef dw_addr, LLVMValueRef value)326{327dw_addr =328LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), "");329330ac_lds_store(&ctx->ac, dw_addr, value);331}332333enum si_tess_ring334{335TCS_FACTOR_RING,336TESS_OFFCHIP_RING_TCS,337TESS_OFFCHIP_RING_TES,338};339340static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)341{342LLVMBuilderRef builder = ctx->ac.builder;343LLVMValueRef addr = ac_get_arg(344&ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);345346/* TCS only receives high 13 bits of the address. */347if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {348addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), "");349}350351if (ring == TCS_FACTOR_RING) {352unsigned tf_offset = ctx->screen->tess_offchip_ring_size;353addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");354}355356uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |357S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);358359if (ctx->screen->info.chip_class >= GFX10)360rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |361S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);362else363rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |364S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);365366LLVMValueRef desc[4];367desc[0] = addr;368desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);369desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);370desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);371372return ac_build_gather_values(&ctx->ac, desc, 4);373}374375void si_llvm_preload_tes_rings(struct si_shader_context *ctx)376{377ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);378}379380static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,381LLVMValueRef vertex_index, LLVMValueRef param_index,382unsigned driver_location, unsigned component,383unsigned num_components, bool load_input,384bool vertex_index_is_invoc_id)385{386struct si_shader_context *ctx = si_shader_context_from_abi(abi);387struct si_shader_info *info = &ctx->shader->selector->info;388LLVMValueRef dw_addr, stride;389ubyte semantic;390391if (load_input) {392semantic = info->input_semantic[driver_location];393} else {394semantic = info->output_semantic[driver_location];395}396397/* Load the TCS input from a VGPR if possible. */398if (ctx->shader->key.opt.same_patch_vertices &&399load_input && vertex_index_is_invoc_id && !param_index) {400unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +401si_shader_io_get_unique_index(semantic, false) * 4;402LLVMValueRef value[4];403404for (unsigned i = component; i < component + num_components; i++) {405value[i] = LLVMGetParam(ctx->main_fn, func_param + i);406value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");407}408409return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);410}411412bool is_patch = vertex_index == NULL;413assert((semantic >= VARYING_SLOT_PATCH0 ||414semantic == VARYING_SLOT_TESS_LEVEL_INNER ||415semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);416417if (load_input) {418stride = get_tcs_in_vertex_dw_stride(ctx);419dw_addr = get_tcs_in_current_patch_offset(ctx);420} else {421if (is_patch) {422stride = NULL;423dw_addr = get_tcs_out_current_patch_data_offset(ctx);424} else {425stride = get_tcs_out_vertex_dw_stride(ctx);426dw_addr = get_tcs_out_current_patch_offset(ctx);427}428}429430dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,431semantic);432433LLVMValueRef value[4];434for (unsigned i = component; i < component + num_components; i++)435value[i] = lshs_lds_load(ctx, type, i, dw_addr);436437return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);438}439440static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,441LLVMValueRef vertex_index, LLVMValueRef param_index,442unsigned driver_location, unsigned component,443unsigned num_components,444bool load_input, bool vertex_index_is_invoc_id)445{446struct si_shader_context *ctx = si_shader_context_from_abi(abi);447struct si_shader_info *info = &ctx->shader->selector->info;448LLVMValueRef base, addr;449450ubyte semantic = info->input_semantic[driver_location];451452assert((semantic >= VARYING_SLOT_PATCH0 ||453semantic == VARYING_SLOT_TESS_LEVEL_INNER ||454semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == (vertex_index == NULL));455456base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);457458addr =459get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);460461/* TODO: This will generate rather ordinary llvm code, although it462* should be easy for the optimizer to fix up. In future we might want463* to refactor buffer_load().464*/465LLVMValueRef value[4];466for (unsigned i = component; i < component + num_components; i++)467value[i] = buffer_load(ctx, type, i, ctx->tess_offchip_ring, base, addr, true);468469return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);470}471472static void si_nir_store_output_tcs(struct ac_shader_abi *abi,473LLVMValueRef vertex_index, LLVMValueRef param_index,474LLVMValueRef src, unsigned writemask,475unsigned component, unsigned location, unsigned driver_location)476{477struct si_shader_context *ctx = si_shader_context_from_abi(abi);478struct si_shader_info *info = &ctx->shader->selector->info;479LLVMValueRef dw_addr, stride;480LLVMValueRef buffer, base, addr;481LLVMValueRef values[8];482bool is_tess_factor = false, is_tess_inner = false;483484ubyte semantic = info->output_semantic[driver_location];485486const bool is_const = !param_index;487const bool is_patch = vertex_index == NULL;488489/* Invalid SPIR-V can cause this. */490if ((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER ||491semantic == VARYING_SLOT_TESS_LEVEL_OUTER) != is_patch)492return;493494if (!is_patch) {495stride = get_tcs_out_vertex_dw_stride(ctx);496dw_addr = get_tcs_out_current_patch_offset(ctx);497dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,498semantic);499} else {500dw_addr = get_tcs_out_current_patch_data_offset(ctx);501dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,502semantic);503504if (is_const) {505int semantic = info->output_semantic[driver_location];506507/* Always write tess factors into LDS for the TCS epilog. */508if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||509semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {510is_tess_factor = true;511is_tess_inner = semantic == VARYING_SLOT_TESS_LEVEL_INNER;512}513}514}515516buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);517518base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);519520addr =521get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);522523for (unsigned chan = component; chan < 4; chan++) {524if (!(writemask & (1 << chan)))525continue;526LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);527528/* Skip LDS stores if there is no LDS read of this output. */529if (info->output_readmask[driver_location] & (1 << chan) ||530/* The epilog reads LDS if invocation 0 doesn't define tess factors. */531(is_tess_factor &&532!ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))533lshs_lds_store(ctx, chan, dw_addr, value);534535value = ac_to_integer(&ctx->ac, value);536values[chan] = value;537538if (writemask != 0xF && !is_tess_factor) {539ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,5404 * chan, ac_glc);541}542543/* Write tess factors into VGPRs for the epilog. */544if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {545if (!is_tess_inner) {546LLVMBuildStore(ctx->ac.builder, value, /* outer */547ctx->invoc0_tess_factors[chan]);548} else if (chan < 2) {549LLVMBuildStore(ctx->ac.builder, value, /* inner */550ctx->invoc0_tess_factors[4 + chan]);551}552}553}554555if (writemask == 0xF && !is_tess_factor) {556LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);557ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc);558}559}560561static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)562{563struct si_shader_context *ctx = si_shader_context_from_abi(abi);564LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->args.tes_u),565ac_get_arg(&ctx->ac, ctx->args.tes_v),566ctx->ac.f32_0, ctx->ac.f32_0};567568/* For triangles, the vector should be (u, v, 1-u-v). */569if (ctx->shader->selector->info.base.tess.primitive_mode == GL_TRIANGLES) {570coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,571LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");572}573return ac_build_gather_values(&ctx->ac, coord, 4);574}575576static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic)577{578LLVMValueRef base, addr;579580int param = si_shader_io_get_unique_index_patch(semantic);581582base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);583addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,584LLVMConstInt(ctx->ac.i32, param, 0));585586return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);587}588589static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned sysval)590{591LLVMValueRef buf, slot, val[4];592int i, offset;593594slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);595buf = ac_get_arg(&ctx->ac, ctx->internal_bindings);596buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);597offset = sysval == SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT ? 4 : 0;598599for (i = 0; i < 4; i++)600val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));601return ac_build_gather_values(&ctx->ac, val, 4);602}603604static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,605bool load_default_state)606{607struct si_shader_context *ctx = si_shader_context_from_abi(abi);608unsigned semantic;609610if (load_default_state) {611switch (varying_id) {612case VARYING_SLOT_TESS_LEVEL_INNER:613semantic = SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;614break;615case VARYING_SLOT_TESS_LEVEL_OUTER:616semantic = SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;617break;618default:619unreachable("unknown tess level");620}621return load_tess_level_default(ctx, semantic);622}623624switch (varying_id) {625case VARYING_SLOT_TESS_LEVEL_INNER:626semantic = VARYING_SLOT_TESS_LEVEL_INNER;627break;628case VARYING_SLOT_TESS_LEVEL_OUTER:629semantic = VARYING_SLOT_TESS_LEVEL_OUTER;630break;631default:632unreachable("unknown tess level");633}634635return load_tess_level(ctx, semantic);636}637638static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)639{640struct si_shader_context *ctx = si_shader_context_from_abi(abi);641if (ctx->stage == MESA_SHADER_TESS_CTRL)642return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);643else if (ctx->stage == MESA_SHADER_TESS_EVAL)644return get_num_tcs_out_vertices(ctx);645else646unreachable("invalid shader stage for VERTICESIN");647}648649/**650* Forward all outputs from the vertex shader to the TES. This is only used651* for the fixed function TCS.652*/653static void si_copy_tcs_inputs(struct si_shader_context *ctx)654{655LLVMValueRef invocation_id, buffer, buffer_offset;656LLVMValueRef lds_vertex_stride, lds_base;657uint64_t inputs;658659invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);660buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);661buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);662663lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);664lds_base = get_tcs_in_current_patch_offset(ctx);665lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);666667inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;668while (inputs) {669unsigned i = u_bit_scan64(&inputs);670671LLVMValueRef lds_ptr =672LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");673674LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(675ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));676677LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);678679ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0,680ac_glc);681}682}683684static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id,685LLVMValueRef invocation_id,686LLVMValueRef tcs_out_current_patch_data_offset,687LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])688{689struct si_shader *shader = ctx->shader;690unsigned tess_inner_index, tess_outer_index;691LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;692LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];693unsigned stride, outer_comps, inner_comps, i, offset;694695/* Add a barrier before loading tess factors from LDS. */696if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)697si_llvm_emit_barrier(ctx);698699/* Do this only for invocation 0, because the tess levels are per-patch,700* not per-vertex.701*702* This can't jump, because invocation 0 executes this. It should703* at least mask out the loads and stores for other invocations.704*/705ac_build_ifcc(&ctx->ac,706LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);707708/* Determine the layout of one tess factor element in the buffer. */709switch (shader->key.part.tcs.epilog.prim_mode) {710case GL_LINES:711stride = 2; /* 2 dwords, 1 vec2 store */712outer_comps = 2;713inner_comps = 0;714break;715case GL_TRIANGLES:716stride = 4; /* 4 dwords, 1 vec4 store */717outer_comps = 3;718inner_comps = 1;719break;720case GL_QUADS:721stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */722outer_comps = 4;723inner_comps = 2;724break;725default:726assert(0);727return;728}729730for (i = 0; i < 4; i++) {731inner[i] = LLVMGetUndef(ctx->ac.i32);732outer[i] = LLVMGetUndef(ctx->ac.i32);733}734735if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {736/* Tess factors are in VGPRs. */737for (i = 0; i < outer_comps; i++)738outer[i] = out[i] = invoc0_tf_outer[i];739for (i = 0; i < inner_comps; i++)740inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];741} else {742/* Load tess_inner and tess_outer from LDS.743* Any invocation can write them, so we can't get them from a temporary.744*/745tess_inner_index = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);746tess_outer_index = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);747748lds_base = tcs_out_current_patch_data_offset;749lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,750LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");751lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,752LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");753754for (i = 0; i < outer_comps; i++) {755outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);756}757for (i = 0; i < inner_comps; i++) {758inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);759}760}761762if (shader->key.part.tcs.epilog.prim_mode == GL_LINES) {763/* For isolines, the hardware expects tess factors in the764* reverse order from what NIR specifies.765*/766LLVMValueRef tmp = out[0];767out[0] = out[1];768out[1] = tmp;769}770771/* Convert the outputs to vectors for stores. */772vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));773vec1 = NULL;774775if (stride > 4)776vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);777778/* Get the buffer. */779buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);780781/* Get the offset. */782tf_base = ac_get_arg(&ctx->ac, ctx->args.tcs_factor_offset);783byteoffset =784LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");785offset = 0;786787/* Store the dynamic HS control word. */788if (ctx->screen->info.chip_class <= GFX8) {789ac_build_ifcc(&ctx->ac,790LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);791ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,792ctx->ac.i32_0, tf_base, offset, ac_glc);793ac_build_endif(&ctx->ac, 6504);794offset += 4;795}796797/* Store the tessellation factors. */798ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,799ac_glc);800offset += 16;801if (vec1)802ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,803ac_glc);804805/* Store the tess factors into the offchip buffer if TES reads them. */806if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {807LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;808LLVMValueRef tf_inner_offset;809unsigned param_outer, param_inner;810811buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);812base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);813814param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);815tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,816LLVMConstInt(ctx->ac.i32, param_outer, 0));817818unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)819? outer_comps820: util_next_power_of_two(outer_comps);821outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);822823ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,824ac_glc);825if (inner_comps) {826param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);827tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,828LLVMConstInt(ctx->ac.i32, param_inner, 0));829830inner_vec =831inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);832ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,8330, ac_glc);834}835}836837ac_build_endif(&ctx->ac, 6503);838}839840/* This only writes the tessellation factor levels. */841static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,842LLVMValueRef *addrs)843{844struct si_shader_context *ctx = si_shader_context_from_abi(abi);845LLVMBuilderRef builder = ctx->ac.builder;846LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;847848si_copy_tcs_inputs(ctx);849850rel_patch_id = get_rel_patch_id(ctx);851invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);852tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);853854if (ctx->screen->info.chip_class >= GFX9 && !ctx->shader->is_monolithic) {855LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};856LLVMValueRef values[2];857858ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);859860values[0] = rel_patch_id;861values[1] = LLVMGetUndef(ctx->ac.i32);862rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);863864values[0] = tf_lds_offset;865values[1] = LLVMGetUndef(ctx->ac.i32);866tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);867868values[0] = invocation_id;869values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */870invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);871}872873/* Return epilog parameters from this function. */874LLVMValueRef ret = ctx->return_value;875unsigned vgpr;876877if (ctx->screen->info.chip_class >= GFX9) {878ret =879si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);880ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);881/* Tess offchip and tess factor offsets are at the beginning. */882ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);883ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);884vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;885} else {886ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);887ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);888/* Tess offchip and tess factor offsets are after user SGPRs. */889ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);890ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);891vgpr = GFX6_TCS_NUM_USER_SGPR + 2;892}893894/* VGPRs */895rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);896invocation_id = ac_to_float(&ctx->ac, invocation_id);897tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);898899/* Leave a hole corresponding to the two input VGPRs. This ensures that900* the invocation_id output does not alias the tcs_rel_ids input,901* which saves a V_MOV on gfx9.902*/903vgpr += 2;904905ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");906ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");907908if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {909vgpr++; /* skip the tess factor LDS offset */910for (unsigned i = 0; i < 6; i++) {911LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");912value = ac_to_float(&ctx->ac, value);913ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");914}915} else {916ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");917}918ctx->return_value = ret;919}920921/* Pass TCS inputs from LS to TCS on GFX9. */922static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)923{924if (!ctx->shader->is_monolithic)925ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);926927LLVMValueRef ret = ctx->return_value;928929ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);930ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);931ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);932ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);933ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);934ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);935936ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);937ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,9388 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);939940ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);941942ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);943ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);944ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);945946unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;947ret = LLVMBuildInsertValue(ctx->ac.builder, ret,948ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),949vgpr++, "");950ret = LLVMBuildInsertValue(ctx->ac.builder, ret,951ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),952vgpr++, "");953ctx->return_value = ret;954}955956void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)957{958struct si_shader_context *ctx = si_shader_context_from_abi(abi);959struct si_shader *shader = ctx->shader;960struct si_shader_info *info = &shader->selector->info;961unsigned i, chan;962LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);963LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);964LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");965unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;966967/* Write outputs to LDS. The next shader (TCS aka HS) will read968* its inputs from it. */969for (i = 0; i < info->num_outputs; i++) {970unsigned semantic = info->output_semantic[i];971972/* The ARB_shader_viewport_layer_array spec contains the973* following issue:974*975* 2) What happens if gl_ViewportIndex or gl_Layer is976* written in the vertex shader and a geometry shader is977* present?978*979* RESOLVED: The value written by the last vertex processing980* stage is used. If the last vertex processing stage981* (vertex, tessellation evaluation or geometry) does not982* statically assign to gl_ViewportIndex or gl_Layer, index983* or layer zero is assumed.984*985* So writes to those outputs in VS-as-LS are simply ignored.986*/987if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)988continue;989990int param = si_shader_io_get_unique_index(semantic, false);991LLVMValueRef dw_addr =992LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");993994for (chan = 0; chan < 4; chan++) {995if (!(info->output_usagemask[i] & (1 << chan)))996continue;997998LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");9991000if (!shader->key.opt.same_patch_vertices ||1001!(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))1002lshs_lds_store(ctx, chan, dw_addr, value);10031004if (shader->key.opt.same_patch_vertices) {1005ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,1006value, ret_offset + param * 4 + chan, "");1007}1008}1009}10101011if (ctx->screen->info.chip_class >= GFX9)1012si_set_ls_return_value_for_tcs(ctx);1013}10141015/**1016* Compile the TCS epilog function. This writes tesselation factors to memory1017* based on the output primitive type of the tesselator (determined by TES).1018*/1019void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)1020{1021memset(&ctx->args, 0, sizeof(ctx->args));10221023if (ctx->screen->info.chip_class >= GFX9) {1024ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1025ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1026ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);1027ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */1028ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);1029ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1030ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1031ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1032ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1033ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1034ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1035ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1036ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1037ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1038ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1039ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1040ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);1041ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1042ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);1043} else {1044ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1045ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1046ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1047ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1048ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);1049ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1050ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);1051ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);1052ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);1053ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);1054}10551056ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */1057ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */1058struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */1059ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);1060struct ac_arg invocation_id; /* invocation ID within the patch */1061ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);1062struct ac_arg1063tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */1064ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);10651066struct ac_arg tess_factors[6];1067for (unsigned i = 0; i < 6; i++)1068ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);10691070/* Create the function. */1071si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0);1072ac_declare_lds_as_pointer(&ctx->ac);10731074LLVMValueRef invoc0_tess_factors[6];1075for (unsigned i = 0; i < 6; i++)1076invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);10771078si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id),1079ac_get_arg(&ctx->ac, invocation_id),1080ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),1081invoc0_tess_factors, invoc0_tess_factors + 4);10821083LLVMBuildRetVoid(ctx->ac.builder);1084}10851086void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)1087{1088ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;1089ctx->abi.load_tess_level = si_load_tess_level;1090ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;1091ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;1092ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;1093}10941095void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)1096{1097ctx->abi.load_tess_varyings = si_nir_load_input_tes;1098ctx->abi.load_tess_coord = si_load_tess_coord;1099ctx->abi.load_tess_level = si_load_tess_level;1100ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;11011102if (ctx->shader->key.as_es)1103ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;1104else if (ngg_cull_shader)1105ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;1106else if (ctx->shader->key.as_ngg)1107ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;1108else1109ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;1110}111111121113