Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
4570 views
/*1* Copyright 2020 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "si_pipe.h"25#include "si_shader_internal.h"26#include "sid.h"27#include "util/u_memory.h"2829LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)30{31/* Return true if the current thread should execute an ES thread. */32return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),33si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8), "");34}3536LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)37{38/* Return true if the current thread should execute a GS thread. */39return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),40si_unpack_param(ctx, ctx->args.merged_wave_info, 8, 8), "");41}4243static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned input_index,44unsigned vtx_offset_param, LLVMTypeRef type,45unsigned swizzle)46{47struct si_shader_context *ctx = si_shader_context_from_abi(abi);48struct si_shader *shader = ctx->shader;49LLVMValueRef vtx_offset, soffset;50struct si_shader_info *info = &shader->selector->info;51unsigned param;52LLVMValueRef value;5354param = si_shader_io_get_unique_index(info->input_semantic[input_index], false);5556/* GFX9 has the ESGS ring in LDS. */57if (ctx->screen->info.chip_class >= GFX9) {58unsigned index = vtx_offset_param;5960switch (index / 2) {61case 0:62vtx_offset = si_unpack_param(ctx, ctx->gs_vtx01_offset, index % 2 ? 16 : 0, 16);63break;64case 1:65vtx_offset = si_unpack_param(ctx, ctx->gs_vtx23_offset, index % 2 ? 16 : 0, 16);66break;67case 2:68vtx_offset = si_unpack_param(ctx, ctx->gs_vtx45_offset, index % 2 ? 16 : 0, 16);69break;70default:71assert(0);72return NULL;73}7475unsigned offset = param * 4 + swizzle;76vtx_offset =77LLVMBuildAdd(ctx->ac.builder, vtx_offset, LLVMConstInt(ctx->ac.i32, offset, false), "");7879LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);80LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");81return LLVMBuildBitCast(ctx->ac.builder, value, type, "");82}8384/* GFX6: input load from the ESGS ring in memory. */85/* Get the vertex offset parameter on GFX6. */86LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[vtx_offset_param]);8788vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");8990soffset = LLVMConstInt(ctx->ac.i32, (param * 4 + swizzle) * 256, 0);9192value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->ac.i32_0, vtx_offset, soffset, 0,93ctx->ac.f32, ac_glc, true, false);94return LLVMBuildBitCast(ctx->ac.builder, value, type, "");95}9697static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,98unsigned driver_location, unsigned component,99unsigned num_components, unsigned vertex_index,100LLVMTypeRef type)101{102struct si_shader_context *ctx = si_shader_context_from_abi(abi);103104LLVMValueRef value[4];105for (unsigned i = component; i < component + num_components; i++) {106value[i] = si_llvm_load_input_gs(&ctx->abi, driver_location,107vertex_index, type, i);108}109110return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);111}112113/* Pass GS inputs from ES to GS on GFX9. */114static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)115{116if (!ctx->shader->is_monolithic)117ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);118119LLVMValueRef ret = ctx->return_value;120121ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);122ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);123if (ctx->shader->key.as_ngg)124ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);125else126ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);127ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);128ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);129130ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);131ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,1328 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);133if (ctx->screen->use_ngg) {134ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);135}136137unsigned vgpr = 8 + SI_NUM_VS_STATE_RESOURCE_SGPRS;138139ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx01_offset, vgpr++);140ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx23_offset, vgpr++);141ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);142ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);143ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx45_offset, vgpr++);144ctx->return_value = ret;145}146147void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)148{149struct si_shader_context *ctx = si_shader_context_from_abi(abi);150struct si_shader *es = ctx->shader;151struct si_shader_info *info = &es->selector->info;152LLVMValueRef lds_base = NULL;153unsigned chan;154int i;155156if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {157unsigned itemsize_dw = es->selector->esgs_itemsize / 4;158LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);159LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);160vertex_idx =161LLVMBuildOr(ctx->ac.builder, vertex_idx,162LLVMBuildMul(ctx->ac.builder, wave_idx,163LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), ""),164"");165lds_base =166LLVMBuildMul(ctx->ac.builder, vertex_idx, LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");167}168169for (i = 0; i < info->num_outputs; i++) {170int param;171172if (info->output_semantic[i] == VARYING_SLOT_VIEWPORT ||173info->output_semantic[i] == VARYING_SLOT_LAYER)174continue;175176param = si_shader_io_get_unique_index(info->output_semantic[i], false);177178for (chan = 0; chan < 4; chan++) {179if (!(info->output_usagemask[i] & (1 << chan)))180continue;181182LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");183out_val = ac_to_integer(&ctx->ac, out_val);184185/* GFX9 has the ESGS ring in LDS. */186if (ctx->screen->info.chip_class >= GFX9) {187LLVMValueRef idx = LLVMConstInt(ctx->ac.i32, param * 4 + chan, false);188idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, "");189ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val);190continue;191}192193ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL,194ac_get_arg(&ctx->ac, ctx->args.es2gs_offset),195(4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled);196}197}198199if (ctx->screen->info.chip_class >= GFX9)200si_set_es_return_value_for_gs(ctx);201}202203static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)204{205if (ctx->screen->info.chip_class >= GFX9)206return si_unpack_param(ctx, ctx->args.merged_wave_info, 16, 8);207else208return ac_get_arg(&ctx->ac, ctx->args.gs_wave_id);209}210211static void emit_gs_epilogue(struct si_shader_context *ctx)212{213if (ctx->shader->key.as_ngg) {214gfx10_ngg_gs_emit_epilogue(ctx);215return;216}217218if (ctx->screen->info.chip_class >= GFX10)219LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");220221ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx));222223if (ctx->screen->info.chip_class >= GFX9)224ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);225}226227static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,228LLVMValueRef *addrs)229{230struct si_shader_context *ctx = si_shader_context_from_abi(abi);231struct si_shader_info UNUSED *info = &ctx->shader->selector->info;232233assert(info->num_outputs <= max_outputs);234235emit_gs_epilogue(ctx);236}237238/* Emit one vertex from the geometry shader */239static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)240{241struct si_shader_context *ctx = si_shader_context_from_abi(abi);242243if (ctx->shader->key.as_ngg) {244gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);245return;246}247248struct si_shader_info *info = &ctx->shader->selector->info;249struct si_shader *shader = ctx->shader;250LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset);251LLVMValueRef gs_next_vertex;252LLVMValueRef can_emit;253unsigned chan, offset;254int i;255256/* Write vertex attribute values to GSVS ring */257gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, ctx->gs_next_vertex[stream], "");258259/* If this thread has already emitted the declared maximum number of260* vertices, skip the write: excessive vertex emissions are not261* supposed to have any effect.262*263* If the shader has no writes to memory, kill it instead. This skips264* further memory loads and may allow LLVM to skip to the end265* altogether.266*/267can_emit =268LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,269LLVMConstInt(ctx->ac.i32, shader->selector->info.base.gs.vertices_out, 0), "");270271bool use_kill = !info->base.writes_memory;272if (use_kill) {273ac_build_kill_if_false(&ctx->ac, can_emit);274} else {275ac_build_ifcc(&ctx->ac, can_emit, 6505);276}277278offset = 0;279for (i = 0; i < info->num_outputs; i++) {280for (chan = 0; chan < 4; chan++) {281if (!(info->output_usagemask[i] & (1 << chan)) ||282((info->output_streams[i] >> (2 * chan)) & 3) != stream)283continue;284285LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");286LLVMValueRef voffset =287LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0);288offset++;289290voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");291voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, 0), "");292293out_val = ac_to_integer(&ctx->ac, out_val);294295ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, voffset, soffset,2960, ac_glc | ac_slc | ac_swizzled);297}298}299300gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->ac.i32_1, "");301LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);302303/* Signal vertex emission if vertex data was written. */304if (offset) {305ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),306si_get_gs_wave_id(ctx));307}308309if (!use_kill)310ac_build_endif(&ctx->ac, 6505);311}312313/* Cut one primitive from the geometry shader */314static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)315{316struct si_shader_context *ctx = si_shader_context_from_abi(abi);317318if (ctx->shader->key.as_ngg) {319LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);320return;321}322323/* Signal primitive cut */324ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),325si_get_gs_wave_id(ctx));326}327328void si_preload_esgs_ring(struct si_shader_context *ctx)329{330if (ctx->screen->info.chip_class <= GFX8) {331unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? SI_GS_RING_ESGS : SI_ES_RING_ESGS;332LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, 0);333LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);334335ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);336} else {337if (USE_LDS_SYMBOLS) {338/* Declare the ESGS ring as an explicit LDS symbol. */339si_llvm_declare_esgs_ring(ctx);340} else {341ac_declare_lds_as_pointer(&ctx->ac);342ctx->esgs_ring = ctx->ac.lds;343}344}345}346347void si_preload_gs_rings(struct si_shader_context *ctx)348{349const struct si_shader_selector *sel = ctx->shader->selector;350LLVMBuilderRef builder = ctx->ac.builder;351LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_GSVS, 0);352LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);353LLVMValueRef base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);354355/* The conceptual layout of the GSVS ring is356* v0c0 .. vLv0 v0c1 .. vLc1 ..357* but the real memory layout is swizzled across358* threads:359* t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL360* t16v0c0 ..361* Override the buffer descriptor accordingly.362*/363LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);364uint64_t stream_offset = 0;365366for (unsigned stream = 0; stream < 4; ++stream) {367unsigned num_components;368unsigned stride;369unsigned num_records;370LLVMValueRef ring, tmp;371372num_components = sel->info.num_stream_output_components[stream];373if (!num_components)374continue;375376stride = 4 * num_components * sel->info.base.gs.vertices_out;377378/* Limit on the stride field for <= GFX7. */379assert(stride < (1 << 14));380381num_records = ctx->ac.wave_size;382383ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");384tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_0, "");385tmp = LLVMBuildAdd(builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), "");386stream_offset += stride * ctx->ac.wave_size;387388ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_0, "");389ring = LLVMBuildBitCast(builder, ring, ctx->ac.v4i32, "");390tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_1, "");391tmp = LLVMBuildOr(392builder, tmp,393LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride) | S_008F04_SWIZZLE_ENABLE(1), 0), "");394ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_1, "");395ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, num_records, 0),396LLVMConstInt(ctx->ac.i32, 2, 0), "");397398uint32_t rsrc3 =399S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |400S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |401S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */402S_008F0C_ADD_TID_ENABLE(1);403404if (ctx->ac.chip_class >= GFX10) {405rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |406S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);407} else {408rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |409S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |410S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */411}412413ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, rsrc3, false),414LLVMConstInt(ctx->ac.i32, 3, 0), "");415416ctx->gsvs_ring[stream] = ring;417}418}419420/* Generate code for the hardware VS shader stage to go with a geometry shader */421struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,422struct ac_llvm_compiler *compiler,423struct si_shader_selector *gs_selector,424struct pipe_debug_callback *debug)425{426struct si_shader_context ctx;427struct si_shader *shader;428LLVMBuilderRef builder;429struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];430struct si_shader_info *gsinfo = &gs_selector->info;431int i;432433shader = CALLOC_STRUCT(si_shader);434if (!shader)435return NULL;436437/* We can leave the fence as permanently signaled because the GS copy438* shader only becomes visible globally after it has been compiled. */439util_queue_fence_init(&shader->ready);440441shader->selector = gs_selector;442shader->is_gs_copy_shader = true;443444si_llvm_context_init(&ctx, sscreen, compiler,445si_get_wave_size(sscreen, MESA_SHADER_VERTEX,446false, false, false, false));447ctx.shader = shader;448ctx.stage = MESA_SHADER_VERTEX;449450builder = ctx.ac.builder;451452si_llvm_create_main_func(&ctx, false);453454LLVMValueRef buf_ptr = ac_get_arg(&ctx.ac, ctx.internal_bindings);455ctx.gsvs_ring[0] =456ac_build_load_to_sgpr(&ctx.ac, buf_ptr, LLVMConstInt(ctx.ac.i32, SI_RING_GSVS, 0));457458LLVMValueRef voffset =459LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id, LLVMConstInt(ctx.ac.i32, 4, 0), "");460461/* Fetch the vertex stream ID.*/462LLVMValueRef stream_id;463464if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)465stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2);466else467stream_id = ctx.ac.i32_0;468469/* Fill in output information. */470for (i = 0; i < gsinfo->num_outputs; ++i) {471outputs[i].semantic = gsinfo->output_semantic[i];472473for (int chan = 0; chan < 4; chan++) {474outputs[i].vertex_stream[chan] = (gsinfo->output_streams[i] >> (2 * chan)) & 3;475}476}477478LLVMBasicBlockRef end_bb;479LLVMValueRef switch_inst;480481end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");482switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);483484for (int stream = 0; stream < 4; stream++) {485LLVMBasicBlockRef bb;486unsigned offset;487488if (!gsinfo->num_stream_output_components[stream])489continue;490491if (stream > 0 && !gs_selector->so.num_outputs)492continue;493494bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");495LLVMAddCase(switch_inst, LLVMConstInt(ctx.ac.i32, stream, 0), bb);496LLVMPositionBuilderAtEnd(builder, bb);497498/* Fetch vertex data from GSVS ring */499offset = 0;500for (i = 0; i < gsinfo->num_outputs; ++i) {501for (unsigned chan = 0; chan < 4; chan++) {502if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||503outputs[i].vertex_stream[chan] != stream) {504outputs[i].values[chan] = LLVMGetUndef(ctx.ac.f32);505continue;506}507508LLVMValueRef soffset =509LLVMConstInt(ctx.ac.i32, offset * gs_selector->info.base.gs.vertices_out * 16 * 4, 0);510offset++;511512outputs[i].values[chan] =513ac_build_buffer_load(&ctx.ac, ctx.gsvs_ring[0], 1, ctx.ac.i32_0, voffset, soffset, 0,514ctx.ac.f32, ac_glc | ac_slc, true, false);515}516}517518/* Streamout and exports. */519if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {520si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream);521}522523if (stream == 0)524si_llvm_build_vs_exports(&ctx, outputs, gsinfo->num_outputs);525526LLVMBuildBr(builder, end_bb);527}528529LLVMPositionBuilderAtEnd(builder, end_bb);530531LLVMBuildRetVoid(ctx.ac.builder);532533ctx.stage = MESA_SHADER_GEOMETRY; /* override for shader dumping */534si_llvm_optimize_module(&ctx);535536bool ok = false;537if (si_compile_llvm(sscreen, &ctx.shader->binary, &ctx.shader->config, ctx.compiler, &ctx.ac,538debug, MESA_SHADER_GEOMETRY, "GS Copy Shader", false)) {539if (si_can_dump_shader(sscreen, MESA_SHADER_GEOMETRY))540fprintf(stderr, "GS Copy Shader:\n");541si_shader_dump(sscreen, ctx.shader, debug, stderr, true);542543if (!ctx.shader->config.scratch_bytes_per_wave)544ok = si_shader_binary_upload(sscreen, ctx.shader, 0);545else546ok = true;547}548549si_llvm_dispose(&ctx);550551if (!ok) {552FREE(shader);553shader = NULL;554} else {555si_fix_resource_usage(sscreen, shader);556}557return shader;558}559560/**561* Build the GS prolog function. Rotate the input vertices for triangle strips562* with adjacency.563*/564void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)565{566unsigned num_sgprs, num_vgprs;567LLVMBuilderRef builder = ctx->ac.builder;568LLVMTypeRef returns[AC_MAX_ARGS];569LLVMValueRef func, ret;570571memset(&ctx->args, 0, sizeof(ctx->args));572573if (ctx->screen->info.chip_class >= GFX9) {574/* Other user SGPRs are not needed by GS. */575num_sgprs = 8 + SI_NUM_VS_STATE_RESOURCE_SGPRS;576num_vgprs = 5; /* ES inputs are not needed by GS */577} else {578num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;579num_vgprs = 8;580}581582for (unsigned i = 0; i < num_sgprs; ++i) {583ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);584returns[i] = ctx->ac.i32;585}586587for (unsigned i = 0; i < num_vgprs; ++i) {588ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);589returns[num_sgprs + i] = ctx->ac.f32;590}591592/* Create the function. */593si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0);594func = ctx->main_fn;595596/* Copy inputs to outputs. This should be no-op, as the registers match,597* but it will prevent the compiler from overwriting them unintentionally.598*/599ret = ctx->return_value;600for (unsigned i = 0; i < num_sgprs; i++) {601LLVMValueRef p = LLVMGetParam(func, i);602ret = LLVMBuildInsertValue(builder, ret, p, i, "");603}604for (unsigned i = 0; i < num_vgprs; i++) {605LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);606p = ac_to_float(&ctx->ac, p);607ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");608}609610if (key->gs_prolog.states.tri_strip_adj_fix) {611/* Remap the input vertices for every other primitive. */612const struct ac_arg gfx6_vtx_params[6] = {613{.used = true, .arg_index = num_sgprs}, {.used = true, .arg_index = num_sgprs + 1},614{.used = true, .arg_index = num_sgprs + 3}, {.used = true, .arg_index = num_sgprs + 4},615{.used = true, .arg_index = num_sgprs + 5}, {.used = true, .arg_index = num_sgprs + 6},616};617const struct ac_arg gfx9_vtx_params[3] = {618{.used = true, .arg_index = num_sgprs},619{.used = true, .arg_index = num_sgprs + 1},620{.used = true, .arg_index = num_sgprs + 4},621};622LLVMValueRef vtx_in[6], vtx_out[6];623LLVMValueRef prim_id, rotate;624625if (ctx->screen->info.chip_class >= GFX9) {626for (unsigned i = 0; i < 3; i++) {627vtx_in[i * 2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16);628vtx_in[i * 2 + 1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16);629}630} else {631for (unsigned i = 0; i < 6; i++)632vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]);633}634635prim_id = LLVMGetParam(func, num_sgprs + 2);636rotate = LLVMBuildTrunc(builder, prim_id, ctx->ac.i1, "");637638for (unsigned i = 0; i < 6; ++i) {639LLVMValueRef base, rotated;640base = vtx_in[i];641rotated = vtx_in[(i + 4) % 6];642vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");643}644645if (ctx->screen->info.chip_class >= GFX9) {646for (unsigned i = 0; i < 3; i++) {647LLVMValueRef hi, out;648649hi = LLVMBuildShl(builder, vtx_out[i * 2 + 1], LLVMConstInt(ctx->ac.i32, 16, 0), "");650out = LLVMBuildOr(builder, vtx_out[i * 2], hi, "");651out = ac_to_float(&ctx->ac, out);652ret = LLVMBuildInsertValue(builder, ret, out, gfx9_vtx_params[i].arg_index, "");653}654} else {655for (unsigned i = 0; i < 6; i++) {656LLVMValueRef out;657658out = ac_to_float(&ctx->ac, vtx_out[i]);659ret = LLVMBuildInsertValue(builder, ret, out, gfx6_vtx_params[i].arg_index, "");660}661}662}663664LLVMBuildRet(builder, ret);665}666667void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)668{669ctx->abi.load_inputs = si_nir_load_input_gs;670ctx->abi.emit_vertex = si_llvm_emit_vertex;671ctx->abi.emit_primitive = si_llvm_emit_primitive;672ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;673}674675676